Skip to content

Commit

Permalink
drop omop_table from the generated visit dataframe after using it
Browse files Browse the repository at this point in the history
  • Loading branch information
ChaoPang committed Oct 31, 2024
1 parent 5e59f51 commit 418bb56
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/cehrbert_data/tools/ehrshot_to_omop.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,12 +475,14 @@ def generate_visit_id(data: DataFrame, time_interval: int = 12) -> DataFrame:
)

# We only allow the generated visit_ids associated with the visit_occurrence table
visit = data.select("patient_id", "visit_order", "omop_table").distinct().withColumn(
visit = data.where(
f.col("omop_table") == "visit_occurrence"
).select("patient_id", "visit_order").distinct().withColumn(
"new_visit_id",
f.abs(
f.hash(f.concat(f.col("patient_id").cast("string"), f.col("visit_order").cast("string")))
).cast("bigint")
).where(f.col("omop_table") == "visit_occurrence")
)

# Validate the uniqueness of visit_id
visit.groupby("new_visit_id").count().select(f.assert_true(f.col("count") == 1))
Expand Down

0 comments on commit 418bb56

Please sign in to comment.