Skip to content

Commit

Permalink
Revert "Fix omop meds order (#68)"
Browse files Browse the repository at this point in the history
This reverts commit 25e2406.
  • Loading branch information
ChaoPang authored Oct 27, 2024
1 parent 25e2406 commit f43491e
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
"Visit/61",
"NUCC/315D00000X",
]
ED_VISIT_TYPE_CODES = ["VISIT/ER"]
DISCHARGE_FACILITY_TYPES = [
"8536",
"8863",
Expand Down Expand Up @@ -174,7 +173,7 @@ def _update_cehrbert_record(
mlm_skip_value: int = 0,
unit: str = NA,
) -> None:
cehrbert_record["concept_ids"].append(replace_escape_chars(code))
cehrbert_record["concept_ids"].append(code)
cehrbert_record["visit_concept_orders"].append(visit_concept_order)
cehrbert_record["ages"].append(age)
cehrbert_record["dates"].append(date)
Expand Down Expand Up @@ -239,11 +238,7 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:

# We assume the first measurement to be the visit type of the current visit
visit_type = visit["visit_type"]
is_er_or_inpatient = (
visit_type in INPATIENT_VISIT_TYPES
or visit_type in INPATIENT_VISIT_TYPE_CODES
or visit_type in ED_VISIT_TYPE_CODES
)
is_inpatient = visit_type in INPATIENT_VISIT_TYPES or visit_type in INPATIENT_VISIT_TYPE_CODES

# Add artificial time tokens to the patient timeline if timedelta exists
if time_delta:
Expand Down Expand Up @@ -280,12 +275,39 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
visit_segment=visit_segment,
visit_concept_id=visit_type,
)
# Keep track of the existing outpatient events, we don't want to add them again
existing_outpatient_events = list()

for e in events:
# If the event doesn't have a time stamp, we skip it
if not e["time"]:
continue
# Add a medical token to the patient timeline
# If this is an inpatient visit, we use the event time stamps to calculate age and date
# because the patient can stay in the hospital for a period of time.
if is_inpatient:
# Calculate age using the event time stamp
age = relativedelta(e["time"], birth_datetime).years
# Calculate the week number since the epoch time
date = (e["time"] - datetime.datetime(year=1970, month=1, day=1)).days // 7
else:
# For outpatient visits, we use the visit time stamp to calculate age and time because we assume
# the outpatient visits start and end on the same day
pass

# Calculate the time diff in days w.r.t the previous measurement
meas_time_diff = (e["time"] - date_cursor).days
# Update the date_cursor if the time diff between two neighboring measurements is greater than and
# equal to 1 day
if meas_time_diff > 0:
date_cursor = e["time"]
if self._inpatient_time_token_function:
# This generates an artificial time token depending on the choice of the time token functions
self._update_cehrbert_record(
cehrbert_record,
code=f"i-{self._inpatient_time_token_function(meas_time_diff)}",
visit_concept_order=i + 1,
visit_segment=visit_segment,
visit_concept_id=visit_type,
)

# If numeric_value exists, this is a concept/value tuple, we indicate this using a concept_value_mask
numeric_value = e.get("numeric_value", None)
Expand All @@ -294,7 +316,6 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
concept_value_mask = int(numeric_value is not None)
concept_value = numeric_value if concept_value_mask == 1 else -1.0
code = replace_escape_chars(e["code"])

# If the value mask is 1, this indicates a numeric value associated with the concept
if concept_value_mask != 1:
# Otherwise we will try to concatenate the answer with the code if the categorical value is provide
Expand All @@ -303,37 +324,6 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
text_value_replaced = replace_escape_chars(text_value)
code = f"{code}//option:{text_value_replaced}"

# Add a medical token to the patient timeline
# If this is an inpatient visit, we use the event time stamps to calculate age and date
# because the patient can stay in the hospital for a period of time.
if is_er_or_inpatient:
# Calculate age using the event time stamp
age = relativedelta(e["time"], birth_datetime).years
# Calculate the week number since the epoch time
date = (e["time"] - datetime.datetime(year=1970, month=1, day=1)).days // 7
# Calculate the time diff in days w.r.t the previous measurement
meas_time_diff = (e["time"] - date_cursor).days
# Update the date_cursor if the time diff between two neighboring measurements is greater than and
# equal to 1 day
if meas_time_diff > 0:
date_cursor = e["time"]
if self._inpatient_time_token_function:
# This generates an artificial time token depending on the choice of the time token functions
self._update_cehrbert_record(
cehrbert_record,
code=f"i-{self._inpatient_time_token_function(meas_time_diff)}",
visit_concept_order=i + 1,
visit_segment=visit_segment,
visit_concept_id=visit_type,
)
else:
# For outpatient visits, we use the visit time stamp to calculate age and time because we assume
# the outpatient visits start and end on the same day.
# We check whether the date/code/value combination already exists in the existing events
# If they exist, we do not add them to the patient timeline for outpatient visits.
if (date, code, concept_value) in existing_outpatient_events:
continue

self._update_cehrbert_record(
cehrbert_record,
code=code,
Expand All @@ -347,10 +337,8 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
unit=unit,
mlm_skip_value=concept_value_mask,
)
existing_outpatient_events.append((date, code, concept_value))

# For inpatient or ER visits, we want to discharge_facility to the end of the visit
if is_er_or_inpatient:
if is_inpatient:
# If visit_end_datetime is populated for the inpatient visit, we update the date_cursor
visit_end_datetime = visit.get("visit_end_datetime", None)
if visit_end_datetime:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,6 @@ def __init__(self, **kwargs):
self._discharge_matching_rules = self._create_discharge_matching_rules()
self._text_event_numeric_event_map = {r.code: r for r in self._create_text_event_to_numeric_event_rules()}

@abstractmethod
def _create_visit_matching_rules(self) -> List[str]:
raise NotImplementedError(
"Must implement the matching rules for identifying the visits other than ED/admission"
)

@abstractmethod
def _create_ed_admission_matching_rules(self) -> List[str]:
"""
Expand Down Expand Up @@ -128,9 +122,6 @@ def _create_text_event_to_numeric_event_rules(self) -> List[EventConversionRule]
"""
raise NotImplementedError("Must implement the event mapping rules for converting text events to numeric events")

def get_other_visit_matching_rules(self) -> List[str]:
return self._create_visit_matching_rules()

def get_ed_admission_matching_rules(self) -> List[str]:
return self._ed_admission_matching_rules

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ def __init__(self, default_visit_id, **kwargs):
super().__init__(**kwargs)
self.default_visit_id = default_visit_id

def _create_visit_matching_rules(self) -> List[str]:
return []

def _create_ed_admission_matching_rules(self) -> List[str]:
return ["ED_REGISTRATION//", "TRANSFER_TO//ED"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

class MedsToCehrbertOMOP(MedsToCehrBertConversion):

def _create_visit_matching_rules(self) -> List[str]:
return ["Visit/"]

def _create_ed_admission_matching_rules(self) -> List[str]:
return ["Visit/ER"]

Expand Down
8 changes: 2 additions & 6 deletions src/cehrbert/data_generators/hf_data_generator/meds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
from datasets import Dataset, DatasetDict, Split
from transformers.utils import logging

from cehrbert.data_generators.hf_data_generator import (
DEFAULT_ED_CONCEPT_ID,
DEFAULT_INPATIENT_CONCEPT_ID,
UNKNOWN_VALUE,
)
from cehrbert.data_generators.hf_data_generator import DEFAULT_INPATIENT_CONCEPT_ID, UNKNOWN_VALUE
from cehrbert.data_generators.hf_data_generator.hf_dataset import apply_cehrbert_dataset_mapping
from cehrbert.data_generators.hf_data_generator.hf_dataset_mapping import MedToCehrBertDatasetMapping
from cehrbert.data_generators.hf_data_generator.meds_to_cehrbert_conversion_rules import MedsToCehrBertConversion
Expand Down Expand Up @@ -136,7 +132,7 @@ def convert_one_patient(
visit_end_datetime = max([b.max_time for b in blocks])
discharge_facility = (
next(filter(None, [b.get_discharge_facility() for b in blocks]), None)
if visit_type in [DEFAULT_INPATIENT_CONCEPT_ID, DEFAULT_ED_CONCEPT_ID]
if visit_type == DEFAULT_INPATIENT_CONCEPT_ID
else None
)
visit_events = list()
Expand Down
35 changes: 20 additions & 15 deletions src/cehrbert/data_generators/hf_data_generator/patient_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ def __init__(
# Cache these variables so we don't need to compute
self.has_ed_admission = self._has_ed_admission()
self.has_admission = self._has_admission()
self.discharged_to = self.get_discharge_facility()
self.has_discharge = self.discharged_to is not None
self.has_discharge = self._has_discharge()

# Infer the visit_type from the events
# Admission takes precedence over ED
Expand All @@ -83,14 +82,7 @@ def __init__(
elif self.has_ed_admission:
self.visit_type = DEFAULT_ED_CONCEPT_ID
else:
self.visit_type = self._infer_visit_type()

def _infer_visit_type(self) -> str:
for event in self.events:
for matching_rule in self.conversion.get_other_visit_matching_rules():
if re.match(matching_rule, event.code):
return event.code
return DEFAULT_OUTPATIENT_CONCEPT_ID
self.visit_type = DEFAULT_OUTPATIENT_CONCEPT_ID

def _has_ed_admission(self) -> bool:
"""
Expand Down Expand Up @@ -118,7 +110,7 @@ def _has_admission(self) -> bool:
return True
return False

def get_discharge_facility(self) -> Optional[str]:
def _has_discharge(self) -> bool:
"""
Determines if the visit includes a discharge event.
Expand All @@ -128,7 +120,23 @@ def get_discharge_facility(self) -> Optional[str]:
for event in self.events:
for matching_rule in self.conversion.get_discharge_matching_rules():
if re.match(matching_rule, event.code):
return event.code
return True
return False

def get_discharge_facility(self) -> Optional[str]:
"""
Extracts the discharge facility code from the discharge event, if present.
Returns:
Optional[str]: The sanitized discharge facility code, or None if no discharge event is found.
"""
if self._has_discharge():
for event in self.events:
for matching_rule in self.conversion.get_discharge_matching_rules():
if matching_rule in event.code:
discharge_facility = event.code.replace(matching_rule, "")
discharge_facility = re.sub(r"[^a-zA-Z]", "_", discharge_facility)
return discharge_facility
return None

def _convert_event(self, event) -> List[Event]:
Expand Down Expand Up @@ -188,9 +196,6 @@ def get_meds_events(self) -> Iterable[Event]:
"""
events = []
for e in self.events:
# We only convert the events that are not visit type and discharge facility events
if (e.code == self.visit_type) or (self.discharged_to is not None and e.code == self.discharged_to):
continue
events.extend(self._convert_event(e))
return events

Expand Down

0 comments on commit f43491e

Please sign in to comment.