Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding distribution files #54

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
59 changes: 36 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ For a more detailed Demo, clone repo into Databricks and refer to the notebook [

## Usage: Read & Analyze a FHIR Bundle

### 1. FHIR representations
### 1. FHIR representations & Versions

``` python
from dbignite.fhir_mapping_model import FhirSchemaModel
fhir_schema = FhirSchemaModel()
fhir_schema = FhirSchemaModel(schema_version="ci-build") #this is the default and contains latest, unfinalized changes
fhir_schema = FhirSchemaModel(schema_version="r4") #OR use this for FHIR R4
fhir_schema = FhirSchemaModel(schema_version="r5") #OR use this for FHIR R5

#list all supported FHIR resources
sorted(fhir_schema.list_keys()) # ['Account', 'ActivityDefinition', 'ActorDefinition'...
Expand Down Expand Up @@ -67,7 +69,7 @@ sample_data = "./sampledata/*json"
bundle = read_from_directory(sample_data)

#Read all the bundles and parse
bundle.entry()
bundle.entry() #OR specify a schema version here -> bundle.entry(schemas = FhirSchemaModel(schema_version="r4"))

#Show the total number of patient resources in all bundles
bundle.count_resource_type("Patient").show()
Expand Down Expand Up @@ -106,6 +108,8 @@ bundle.count_resource_type("Patient").show()
```

## SQL on FHIR
> [!TIP]
> For very large batches of FHIR, use bundle.entry().cache() before calling bulk_table_write for best performance

``` python
%python
Expand All @@ -117,6 +121,7 @@ bundle.bulk_table_write(location="hls_healthcare.hls_dev"
,write_mode="overwrite"
,columns=["Patient", "Claim"]) #if columns is not specified, all columns of the dataframe are written (157 resources are written with default settings)
```

``` SQL
%sql
-- Select claim line detailed information
Expand Down Expand Up @@ -303,18 +308,22 @@ result.map(lambda x: json.loads(x)).foreach(lambda x: print(json.dumps(x, indent
"resourceType": "Bundle",
"entry": [
{
"resourceType": "Claim",
"id": "CLM123"
"resource": {
"resourceType": "Claim",
"id": "CLM123"
}
},
{
"resourceType": "Patient",
"id": "PAT01",
"identifier": [
{
"system": "<url of a hardcoded system reference>",
"value": "COH123"
}
]
"resource": {
"resourceType": "Patient",
"id": "PAT01",
"identifier": [
{
"system": "<url of a hardcoded system reference>",
"value": "COH123"
}
]
}
}
]
}
Expand All @@ -323,18 +332,22 @@ result.map(lambda x: json.loads(x)).foreach(lambda x: print(json.dumps(x, indent
"resourceType": "Bundle",
"entry": [
{
"resourceType": "Claim",
"id": "CLM345"
"resource": {
"resourceType": "Claim",
"id": "CLM345"
}
},
{
"resourceType": "Patient",
"id": "PAT02",
"identifier": [
{
"system": "<url of a hardcoded system reference>",
"value": "COH123"
}
]
"resource": {
"resourceType": "Patient",
"id": "PAT02",
"identifier": [
{
"system": "<url of a hardcoded system reference>",
"value": "COH123"
}
]
}
}
]
}
Expand Down
28 changes: 14 additions & 14 deletions dbignite/fhir_mapping_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,29 @@ class FhirSchemaModel:
#
# Class that manages access to FHIR resourceType -> Spark Schema mapping
#
def __init__(
self, fhir_resource_map: Optional[dict[str, StructType]] = None
) -> None:
def __init__(self,
fhir_resource_map: Optional[dict[str, StructType]] = None,
schema_version = "ci-build") -> None:
#schema version can also be r4, or r5. This will change what directory resources are read from under dbignite/schemas/<directory>
self.schema_version = schema_version
self.__fhir_resource_map = (
{
resource_type: FhirSchemaModel.__read_schema(schema_path)
for resource_type, schema_path in FhirSchemaModel.__get_schema_paths()
resource_type: FhirSchemaModel._read_schema(schema_path)
for resource_type, schema_path in self._get_schema_paths()
}
if fhir_resource_map is None
else fhir_resource_map
)


@classmethod
def __read_schema(cls, path: str) -> StructType:
def _read_schema(cls, path: str) -> StructType:
with open(path, "r") as f:
return StructType.fromJson(json.load(f))


@classmethod
def __get_schema_paths(cls) -> list[tuple[str, str]]:
schema_dir = str(files("dbignite")) + "/schemas"
def _get_schema_paths(self) -> list[tuple[str, str]]:
schema_dir = str(files("dbignite")) + "/schemas/" + self.schema_version
return [
(os.path.splitext(p)[0], os.path.join(schema_dir, p))
for p in os.listdir(schema_dir)
Expand Down Expand Up @@ -96,11 +97,10 @@ def us_core_fhir_resource_mapping(cls):
#
# Load supplied subset of FHIR resources into one dictionary
#
@classmethod
def custom_fhir_resource_mapping(cls, resource_list: list[str]) -> "FhirSchemaModel":
def custom_fhir_resource_mapping(self, resource_list: list[str]) -> "FhirSchemaModel":
custom_mapping = {
resource_type: FhirSchemaModel.__read_schema(schema_path)
for resource_type, schema_path in FhirSchemaModel.__get_schema_paths()
resource_type: FhirSchemaModel._read_schema(schema_path)
for resource_type, schema_path in self._get_schema_paths()
if resource_type in resource_list
}
return FhirSchemaModel(fhir_resource_map=custom_mapping)
Expand All @@ -115,7 +115,7 @@ def list_keys(self):
# Return all keys of FHIR Resources packaged
#
def list_packaged_data(self):
return [resource_type for resource_type, _ in FhirSchemaModel.__get_schema_paths()]
return [resource_type for resource_type, _ in self._get_schema_paths()]

#
# Allow searching at the metadata level contained in the spark schema
Expand Down
2 changes: 1 addition & 1 deletion dbignite/schemas/README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
All FHIR Resources definitions from HL7 are translated in this directory. The version of schemas in this repository is [IG 6.0.0](https://hl7.org/fhir/us/core/history.html). To change schemas to a different version see [this](https://github.com/databricks-industry-solutions/json2spark-schema/blob/main/01_healthcare_FHIR_demo.py) public notebook.
All FHIR Resources definitions from HL7 are translated in this directory. The versions of latest countinous integration (ci-build), r4, and r5 are built from [this](https://github.com/databricks-industry-solutions/json2spark-schema/blob/main/01_healthcare_FHIR_demo.py) notebook
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading
Loading