Add method to dump repo dbs to JSON files

Add `dump_db_to_json_files()` which allows for a repository database
file to be read and all of its members to be serialized as JSON and
written to files based upon the packages respective pkgbases.
For the JSON serialization orjson is used, as it provides the highest
speeds available and seems the most correct implementation.

Add a fixture to create and destroy a temporary directory (used to write
JSON files to).
Add tests for `dump_db_to_json_files()`.
parent 7e3a448d
from os.path import join
from pathlib import Path
from typing import Dict, Iterator, Tuple
import orjson
from repo_management import convert, defaults, files, models
......@@ -54,3 +57,33 @@ def db_file_as_models(db_path: Path, compression: str = "gz") -> Iterator[Tuple[
for (name, package) in packages.items():
yield (name, package)
def dump_db_to_json_files(input_path: Path, output_path: Path) -> None:
"""Read a repository database file and dump each pkgbase contained in it to a separate JSON file below a defined
output directory
input_path: Path
The input file to read and parse
output_path: Path
A directory in which to
if not input_path.exists():
raise RuntimeError(f"The input file does not exist: {input_path}")
if not input_path.is_file():
raise RuntimeError(f"The input file is not a file: {input_path}")
if not output_path.exists():
raise RuntimeError(f"The provided output path does not exist: {output_path}")
if not output_path.is_dir():
raise RuntimeError(f"The provided output path is not a directory: {output_path}")
for name, model in db_file_as_models(db_path=input_path):
with open(join(output_path, f"{name}.json"), "wb") as output_file:
model.dict(), option=orjson.OPT_INDENT_2 | orjson.OPT_APPEND_NEWLINE | orjson.OPT_SORT_KEYS
import os
import shutil
import tempfile
from pathlib import Path
from typing import Iterator
from pytest import fixture
from mock import Mock
from pytest import fixture, raises
from repo_management import models, operations
......@@ -16,7 +19,38 @@ def create_gz_db_file() -> Iterator[Path]:
def create_dir_path() -> Iterator[Path]:
temp_dir = tempfile.mkdtemp()
yield Path(temp_dir)
def test_db_file_as_models(create_gz_db_file: Path) -> None:
for (name, model) in operations.db_file_as_models(db_path=create_gz_db_file):
assert isinstance(name, str)
assert isinstance(model, models.OutputPackageBase)
def test_dump_db_to_json_files(
create_gz_db_file: Path,
create_dir_path: Path,
) -> None:
operations.dump_db_to_json_files(input_path=create_gz_db_file, output_path=create_dir_path)
def test_dump_db_to_json_files_raises() -> None:
input_path = Mock(
exists=Mock(side_effect=[False, True, True, True]),
is_file=Mock(side_effect=[False, True, True]),
output_path = Mock(
exists=Mock(side_effect=[False, True]),
counter = 0
while counter < 4:
with raises(RuntimeError):
operations.dump_db_to_json_files(input_path=input_path, output_path=output_path)
counter += 1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment