Skip to content

Commit

Permalink
feat: write json to file
Browse files Browse the repository at this point in the history
  • Loading branch information
tillywoodfield committed Feb 4, 2025
1 parent 4f8e1ce commit d3c52cc
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.venv
__pycache__

data/
14 changes: 14 additions & 0 deletions oc4ids_datastore_pipeline/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import logging
import os
from typing import Any

import requests
Expand Down Expand Up @@ -41,11 +43,23 @@ def validate_json(dataset_name: str, json_data: Any) -> None:
raise Exception("Validation failed", e)


def write_json_to_file(file_name: str, json_data: Any) -> None:
logger.info(f"Writing dataset to file {file_name}")
try:
os.makedirs(os.path.dirname(file_name), exist_ok=True)
with open(file_name, "w") as file:
json.dump(json_data, file, indent=4)
logger.info(f"Finished writing to {file_name}")
except Exception as e:
raise Exception("Error while writing to JSON file", e)


def process_dataset(dataset_name: str, dataset_url: str) -> None:
logger.info(f"Processing dataset {dataset_name}")
try:
json_data = download_json(dataset_url)
validate_json(dataset_name, json_data)
write_json_to_file(f"data/{dataset_name}.json", json_data)
logger.info(f"Processed dataset {dataset_name}")
except Exception as e:
logger.warning(f"Failed to process dataset {dataset_name} with error {e}")
Expand Down
33 changes: 33 additions & 0 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import os
import tempfile
from textwrap import dedent

import pytest
from pytest_mock import MockerFixture

from oc4ids_datastore_pipeline.pipeline import (
download_json,
process_dataset,
validate_json,
write_json_to_file,
)


Expand Down Expand Up @@ -47,6 +52,34 @@ def test_validate_json_raises_validation_errors_exception(
assert "Dataset has 2 validation errors" in str(exc_info.value)


def test_write_json_to_file_writes_in_correct_format() -> None:
with tempfile.TemporaryDirectory() as dir:
file_name = os.path.join(dir, "test_dataset.json")
write_json_to_file(file_name=file_name, json_data={"key": "value"})

expected = dedent(
"""\
{
"key": "value"
}"""
)
with open(file_name) as file:
assert file.read() == expected


def test_write_json_to_file_raises_failure_exception(mocker: MockerFixture) -> None:
patch_json_dump = mocker.patch("oc4ids_datastore_pipeline.pipeline.json.dump")
patch_json_dump.side_effect = Exception("Mocked exception")

with pytest.raises(Exception) as exc_info:
with tempfile.TemporaryDirectory() as dir:
file_name = os.path.join(dir, "test_dataset.json")
write_json_to_file(file_name=file_name, json_data={"key": "value"})

assert "Error while writing to JSON file" in str(exc_info.value)
assert "Mocked exception" in str(exc_info.value)


def test_process_dataset_catches_exception(mocker: MockerFixture) -> None:
patch_download_json = mocker.patch(
"oc4ids_datastore_pipeline.pipeline.download_json"
Expand Down

0 comments on commit d3c52cc

Please sign in to comment.