Skip to content

Commit

Permalink
added test_convert_code_to_omop_concept
Browse files Browse the repository at this point in the history
  • Loading branch information
ChaoPang committed Oct 31, 2024
1 parent 3c469fe commit 8cb0a4f
Showing 1 changed file with 58 additions and 1 deletion.
59 changes: 58 additions & 1 deletion tests/unit_tests/test_ehrshot_to_omop.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from datetime import datetime
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType
from cehrbert_data.tools.ehrshot_to_omop import map_unit, map_answer, create_omop_person
from cehrbert_data.tools.ehrshot_to_omop import (
map_unit, map_answer, create_omop_person, convert_code_to_omop_concept
)


# Define the test case
Expand Down Expand Up @@ -39,6 +41,61 @@ def setUp(self):
StructField("start", TimestampType(), True)
])

def test_convert_code_to_omop_concept(self):
# Define schemas for input DataFrames
data_schema = StructType([
StructField("patient_id", IntegerType(), True),
StructField("code", StringType(), True)
])

concept_schema = StructType([
StructField("vocabulary_id", StringType(), True),
StructField("concept_code", StringType(), True),
StructField("concept_id", IntegerType(), True)
])

# Sample data for testing
data = [
(1, "ICD10/1234"),
(2, "SNOMED/5678"),
(3, "ICD10/0000") # No matching concept
]

concept_data = [
("ICD10", "1234", 1001),
("SNOMED", "5678", 1002)
]

# Create DataFrames
data = self.spark.createDataFrame(data, schema=data_schema)
concept = self.spark.createDataFrame(concept_data, schema=concept_schema)
# Run function
actual_df = convert_code_to_omop_concept(data, concept, "code")

# Define expected data and schema
expected_data = [
(1, "ICD10/1234", "ICD10", "1234", 1001), # Match with concept_id 1001
(2, "SNOMED/5678", "SNOMED", "5678", 1002), # Match with concept_id 1002
(3, "ICD10/0000", "ICD10", "0000", 0) # No match, default concept_id 0
]

expected_schema = StructType([
StructField("patient_id", IntegerType(), True),
StructField("code", StringType(), True),
StructField("vocabulary_id", StringType(), True),
StructField("concept_code", StringType(), True),
StructField("concept_id", IntegerType(), True)
])

expected_df = self.spark.createDataFrame(expected_data, schema=expected_schema)

# Collect results for comparison
actual_data = actual_df.sort("patient_id").collect()
expected_data = expected_df.sort("patient_id").collect()

# Compare results
self.assertEqual(actual_data, expected_data)

def test_create_omop_person(self):
# Sample concept data for mapping demographic codes to concept_ids
concept_data = [
Expand Down

0 comments on commit 8cb0a4f

Please sign in to comment.