Skip to content

Commit

Permalink
fixed the calculation of the time to event column when the prediction…
Browse files Browse the repository at this point in the history
… window is unbounded
  • Loading branch information
ChaoPang committed Sep 25, 2024
1 parent 071771a commit b70450e
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions src/cehrbert_data/cohorts/spark_app_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,11 +494,21 @@ def build(self):
.where(F.col("num_of_concepts") >= self._num_of_concepts)
)

# Add time_to_event
cohort = cohort.withColumn(
"study_end_date",
F.coalesce(F.col("outcome_date"), F.date_add(cohort.index_date, self._prediction_window))
)
if self._is_prediction_window_unbounded:
observation_period = self._dependency_dict[OBSERVATION_PERIOD]
# Add time_to_event
cohort = cohort.join(
observation_period.select("person_id", "observation_period_end_date"), "person_id"
).withColumn(
"study_end_date",
F.coalesce(F.col("outcome_date"), F.col("observation_period_end_date"))
).drop("observation_period_end_date")
else:
# Add time_to_event
cohort = cohort.withColumn(
"study_end_date",
F.coalesce(F.col("outcome_date"), F.date_add(cohort.index_date, self._prediction_window))
)
cohort = cohort.withColumn("time_to_event", F.datediff("study_end_date", "index_date"))

# if patient_splits is provided, we will
Expand Down

0 comments on commit b70450e

Please sign in to comment.