Skip to content

Commit

Permalink
Replaced all literal "N/A" with the constant NA
Browse files Browse the repository at this point in the history
  • Loading branch information
ChaoPang committed Oct 2, 2024
1 parent b9c8cfc commit 8ff9e41
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 13 deletions.
1 change: 1 addition & 0 deletions src/cehrbert_data/const/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
]
REQUIRED_MEASUREMENT = "required_measurement"
UNKNOWN_CONCEPT = "[UNKNOWN]"
NA = "N/A"
CONCEPT = "concept"
CONCEPT_ANCESTOR = "concept_ancestor"
MEASUREMENT_QUESTION_PREFIX = "Question:"
Expand Down
15 changes: 8 additions & 7 deletions src/cehrbert_data/decorators/artificial_time_token_decorator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pyspark.sql import DataFrame, functions as F, types as T, Window as W

from ..const.common import NA
from ..const.artificial_tokens import VS_TOKEN, VE_TOKEN
from .patient_event_decorator_base import (
PatientEventDecorator, AttType,
Expand Down Expand Up @@ -103,7 +104,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("concept_order", F.col("min_concept_order") - 1)
.withColumn("priority", F.lit(VS_TOKEN_PRIORITY))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("min_visit_concept_order", "max_visit_concept_order")
.drop("min_concept_order", "max_concept_order")
)
Expand All @@ -117,7 +118,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("concept_order", F.col("max_concept_order") + 1)
.withColumn("priority", F.lit(VE_TOKEN_PRIORITY))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("min_visit_concept_order", "max_visit_concept_order")
.drop("min_concept_order", "max_concept_order")
)
Expand Down Expand Up @@ -161,7 +162,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("visit_concept_order", F.col("min_visit_concept_order"))
.withColumn("concept_order", F.lit(0))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("prev_visit_end_date", "time_delta")
.drop("min_visit_concept_order", "max_visit_concept_order")
.drop("min_concept_order", "max_concept_order")
Expand All @@ -181,7 +182,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("concept_order", F.lit(0))
.withColumn("priority", F.lit(VISIT_TYPE_TOKEN_PRIORITY))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("min_visit_concept_order", "max_visit_concept_order")
.drop("min_concept_order", "max_concept_order")
)
Expand Down Expand Up @@ -231,7 +232,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("datetime", F.expr("datetime - INTERVAL 1 MINUTE"))
.withColumn("priority", F.lit(DISCHARGE_TOKEN_PRIORITY))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("discharged_to_concept_id", "visit_end_date")
.drop("min_visit_concept_order", "max_visit_concept_order")
.drop("min_concept_order", "max_concept_order")
Expand Down Expand Up @@ -281,7 +282,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("concept_value_mask", F.lit(0))
.withColumn("concept_value", F.lit(0.0))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("prev_date", "time_delta", "is_span_boundary")
.drop("prev_datetime", "hour_delta")
)
Expand All @@ -308,7 +309,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("concept_value_mask", F.lit(0))
.withColumn("concept_value", F.lit(0.0))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("prev_date", "time_delta", "is_span_boundary")
)

Expand Down
16 changes: 11 additions & 5 deletions src/cehrbert_data/decorators/death_event_decorator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pyspark.sql import DataFrame, functions as F, Window as W, types as T

from ..const.common import NA
from ..const.artificial_tokens import VS_TOKEN, VE_TOKEN, DEATH_TOKEN
from .patient_event_decorator_base import (
PatientEventDecorator,
Expand All @@ -10,7 +11,12 @@
time_mix_token,
time_token_func
)
from .token_priority import VS_TOKEN_PRIORITY, VE_TOKEN_PRIORITY, ATT_TOKEN_PRIORITY, DEATH_TOKEN_PRIORITY
from .token_priority import (
VS_TOKEN_PRIORITY,
VE_TOKEN_PRIORITY,
ATT_TOKEN_PRIORITY,
DEATH_TOKEN_PRIORITY
)


class DeathEventDecorator(PatientEventDecorator):
Expand Down Expand Up @@ -54,7 +60,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("domain", F.lit("death"))
.withColumn("visit_rank_order", F.lit(1) + F.col("visit_rank_order"))
.withColumn("priority", DEATH_TOKEN_PRIORITY)
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("max_visit_occurrence_id")
)

Expand All @@ -63,15 +69,15 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("standard_concept_id", F.lit(VS_TOKEN))
.withColumn("priority", VS_TOKEN_PRIORITY)
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
)

ve_records = (
death_records
.withColumn("standard_concept_id", F.lit(VE_TOKEN))
.withColumn("priority", VE_TOKEN_PRIORITY)
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
)

# Udf for calculating the time token
Expand All @@ -97,7 +103,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("standard_concept_id", time_token_udf("time_delta"))
.withColumn("priority", F.lit(ATT_TOKEN_PRIORITY))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.drop("time_delta")
)

Expand Down
3 changes: 2 additions & 1 deletion src/cehrbert_data/decorators/demographic_event_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .patient_event_decorator_base import PatientEventDecorator

from ..const.common import NA
from .token_priority import (
YEAR_TOKEN_PRIORITY,
AGE_TOKEN_PRIORITY,
Expand Down Expand Up @@ -40,7 +41,7 @@ def _decorate(self, patient_events: DataFrame):
.withColumn("concept_value_mask", F.lit(0))
.withColumn("concept_value", F.lit(0.0))
.withColumn("unit", F.lit(None).cast("string"))
.withColumn("event_group_id", F.lit("N/A"))
.withColumn("event_group_id", F.lit(NA))
.where("token_order = 1")
.drop("token_order")
)
Expand Down

0 comments on commit 8ff9e41

Please sign in to comment.