Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
jenniferjiangkells authored Jun 17, 2024
2 parents 0cfd6b3 + 79a7845 commit a3c2788
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 31 deletions.
77 changes: 47 additions & 30 deletions healthchain/data_generator/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@
from healthchain.fhir_resources.general_purpose_resources import NarrativeModel
from healthchain.base import Workflow
from pydantic import BaseModel
from pathlib import Path


import random
import json
import csv

import logging

logger = logging.getLogger(__name__)


workflow_mappings = {
Expand Down Expand Up @@ -48,55 +54,66 @@ def set_workflow(self, workflow: str):
self.workflow = workflow

def generate(
self, constraints: Optional[list] = None, free_text_json: Optional[str] = None
self,
constraints: Optional[list] = None,
free_text_path: Optional[str] = None,
column_name: Optional[str] = None,
) -> BaseModel:
results = []

if self.workflow not in self.mappings.keys():
raise ValueError(f"Workflow {self.workflow} not found in mappings")

if free_text_json is not None:
parsed_free_text = self.free_text_parser(free_text_json)
else:
parsed_free_text = {self.workflow.value: []}

for resource in self.mappings[self.workflow]:
generator_name = resource["generator"]
generator = self.fetch_generator(generator_name)
result = generator.generate(constraints=constraints)

results.append(Bundle_EntryModel(resource=result))

if (
self.workflow.value in parsed_free_text.keys()
and parsed_free_text[self.workflow.value]
):
results.append(
Bundle_EntryModel(
resource=random.choice(parsed_free_text[self.workflow.value])
)
)
parsed_free_text = (
self.free_text_parser(free_text_path, column_name)
if free_text_path
else None
)
if parsed_free_text:
results.append(Bundle_EntryModel(resource=random.choice(parsed_free_text)))
output = OutputDataModel(context={}, resources=BundleModel(entry=results))
self.data = output
return output

def free_text_parser(self, free_text: str) -> dict:
with open(free_text) as f:
free_text = json.load(f)
def free_text_parser(self, path_to_csv: str, column_name: str) -> dict:
column_data = []

document_dict = {}
# Check that path_to_csv is a valid path with pathlib
path = Path(path_to_csv)
if not path.is_file():
raise FileNotFoundError(
f"The file {path_to_csv} does not exist or is not a file."
)

for x in free_text["resources"]:
try:
with path.open(mode="r", newline="") as file:
reader = csv.DictReader(file)
if column_name is not None:
for row in reader:
column_data.append(row[column_name])
else:
raise ValueError(
"Column name must be provided when header is True."
)
except Exception as ex:
logger.error(f"An error occurred: {ex}")

document_list = []

for x in column_data:
# First parse x in to documentreferencemodel format
text = NarrativeModel(
status="generated",
div=f'<div xmlns="http://www.w3.org/1999/xhtml">{x["text"]}</div>',
div=f'<div xmlns="http://www.w3.org/1999/xhtml">{x}</div>',
)
doc = DocumentReferenceModel(text=text) # TODO: Add more fields
# if key exists append to list, otherwise initialise with list
if x["workflow"] in document_dict.keys():
document_dict[x["workflow"]].append(doc)
else:
document_dict[x["workflow"]] = [doc]

return document_dict
doc = DocumentReferenceModel(text=text)
document_list.append(doc)

return document_list
4 changes: 3 additions & 1 deletion tests/generators_tests/test_data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def test_generator_with_json():

workflow = Workflow.patient_view
generator.set_workflow(workflow=workflow)
generator.generate(free_text_json="use_cases/example_free_text.json")
generator.generate(
free_text_path="use_cases/my_encounter_data.csv", column_name="free_text"
)

assert len(generator.data.model_dump(by_alias=True)["resources"]["entry"]) == 4

0 comments on commit a3c2788

Please sign in to comment.