From 6215db42d49f1d119e7dc882d8ef1ae1230ed93f Mon Sep 17 00:00:00 2001 From: Chao Pang Date: Sat, 11 Jan 2025 12:55:26 -0500 Subject: [PATCH] fixed the bug in creating the new visit_id --- src/cehrbert_data/tools/ehrshot_to_omop.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cehrbert_data/tools/ehrshot_to_omop.py b/src/cehrbert_data/tools/ehrshot_to_omop.py index b196bf7..ac456e8 100644 --- a/src/cehrbert_data/tools/ehrshot_to_omop.py +++ b/src/cehrbert_data/tools/ehrshot_to_omop.py @@ -667,13 +667,13 @@ def disconnect_visit_id(data: DataFrame, spark: SparkSession, cache_folder: str) .rowsBetween(Window.unboundedPreceding, Window.currentRow) ) ).withColumn( - "row_number", - f.row_number().over(Window.orderBy(f.col("visit_id"), f.col("visit_partition"))) + "visit_partition_rank", + f.dense_rank().over(Window.orderBy(f.col("visit_id"), f.col("visit_partition"))) ).crossJoin( visit_records.select(f.max("visit_id").alias("max_visit_id")) ).withColumn( "new_visit_id", - f.col("max_visit_id") + f.col("row_number") + f.col("max_visit_id") + f.col("visit_partition_rank") ).drop( "max_visit_id", "row_number" )