Skip to content

Commit

Permalink
moved the logic for generating PatientBlocks to the corresponding con…
Browse files Browse the repository at this point in the history
…version rules
  • Loading branch information
ChaoPang committed Oct 9, 2024
1 parent bec317a commit a45532d
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 52 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime
from typing import List, Optional, Tuple

import meds_reader

from cehrbert.data_generators.hf_data_generator.meds_utils import PatientBlock, PatientDemographics


@dataclass
Expand Down Expand Up @@ -69,6 +74,24 @@ def __init__(self):
self._discharge_matching_rules = self._create_discharge_matching_rules()
self._text_event_numeric_event_map = {r.code: r for r in self._create_text_event_to_numeric_event_rules()}

@abstractmethod
def generate_demographics_and_patient_blocks(
self, patient: meds_reader.Subject, prediction_time: datetime = None
) -> Tuple[PatientDemographics, List[PatientBlock]]:
"""
Abstract method for generating demographics and a list of patient blocks from a meds_reader Subject.
Args:
patient:
prediction_time:
Returns:
Tuple[PatientDemographics, List[PatientBlock]]
"""
raise NotImplementedError(
"Must implement the method for generating the patient blocks from a meds_reader Subject"
)

@abstractmethod
def _create_ed_admission_matching_rules(self) -> List[str]:
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,70 @@
import re
from typing import List
from datetime import datetime
from typing import List, Tuple

import meds_reader

from cehrbert.data_generators.hf_data_generator.hf_dataset_mapping import birth_codes
from cehrbert.data_generators.hf_data_generator.meds_to_cehrbert_conversion_rules.meds_to_cehrbert_base import (
EventConversionRule,
MedsToCehrBertConversion,
)
from cehrbert.data_generators.hf_data_generator.meds_utils import PatientBlock, PatientDemographics


class MedsToBertMimic4(MedsToCehrBertConversion):

def __init__(self, default_visit_id):
super().__init__()
self.default_visit_id = default_visit_id

def generate_demographics_and_patient_blocks(
self, patient: meds_reader.Subject, prediction_time: datetime = None
) -> Tuple[PatientDemographics, List[PatientBlock]]:

birth_datetime = None
race = None
gender = None
ethnicity = None

visit_id = self.default_visit_id
current_date = None
events_for_current_date = []
patient_blocks = []
for e in patient.events:

# Skip out of the loop if the events' time stamps are beyond the prediction time
if prediction_time is not None and e.time is not None:
if e.time > prediction_time:
break

# This indicates demographics features
if e.code in birth_codes:
birth_datetime = e.time
elif e.code.upper().startswith("RACE"):
race = e.code
elif e.code.upper().startswith("GENDER"):
gender = e.code
elif e.code.upper().startswith("ETHNICITY"):
ethnicity = e.code
elif e.time is not None:
if not current_date:
current_date = e.time

if current_date.date() == e.time.date():
events_for_current_date.append(e)
else:
patient_blocks.append(PatientBlock(events_for_current_date, visit_id, self))
events_for_current_date = [e]
current_date = e.time
visit_id += 1

if events_for_current_date:
patient_blocks.append(PatientBlock(events_for_current_date, visit_id, self))

demographics = PatientDemographics(birth_datetime=birth_datetime, race=race, gender=gender, ethnicity=ethnicity)
return demographics, patient_blocks

def _create_ed_admission_matching_rules(self) -> List[str]:
return ["ED_REGISTRATION//", "TRANSFER_TO//ED"]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,67 @@
from typing import List
from datetime import datetime
from typing import List, Tuple

import meds_reader

from cehrbert.data_generators.hf_data_generator.hf_dataset_mapping import birth_codes
from cehrbert.data_generators.hf_data_generator.meds_to_cehrbert_conversion_rules.meds_to_cehrbert_base import (
EventConversionRule,
MedsToCehrBertConversion,
)
from cehrbert.data_generators.hf_data_generator.meds_utils import PatientBlock, PatientDemographics


class MedsToCehrbertOMOP(MedsToCehrBertConversion):

def generate_demographics_and_patient_blocks(
self, patient: meds_reader.Subject, prediction_time: datetime = None
) -> Tuple[PatientDemographics, List[PatientBlock]]:

birth_datetime = None
race = None
gender = None
ethnicity = None

current_visit_id = None
current_date = None
events_for_current_date = []
patient_blocks = []
for e in patient.events:

# Skip out of the loop if the events' time stamps are beyond the prediction time
if prediction_time is not None and e.time is not None:
if e.time > prediction_time:
break

# Try to set current_visit_id
if not current_visit_id:
current_visit_id = e.visit_id if hasattr(e, "visit_id") else None

# This indicates demographics features
if e.code in birth_codes:
birth_datetime = e.time
elif e.code.upper().startswith("RACE"):
race = e.code
elif e.code.upper().startswith("GENDER"):
gender = e.code
elif e.code.upper().startswith("ETHNICITY"):
ethnicity = e.code
elif e.time is not None:
if not current_date:
current_date = e.time
if current_date.date() == e.time.date():
events_for_current_date.append(e)
else:
patient_blocks.append(PatientBlock(events_for_current_date, current_visit_id, self))
events_for_current_date = [e]
current_date = e.time

if events_for_current_date:
patient_blocks.append(PatientBlock(events_for_current_date, current_visit_id, self))

demographics = PatientDemographics(birth_datetime=birth_datetime, race=race, gender=gender, ethnicity=ethnicity)
return demographics, patient_blocks

def _create_ed_admission_matching_rules(self) -> List[str]:
return ["Visit/ER"]

Expand Down
140 changes: 91 additions & 49 deletions src/cehrbert/data_generators/hf_data_generator/meds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import functools
import os
import re
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, Iterable, List, Optional, Tuple, Union

Expand All @@ -15,6 +16,9 @@
from cehrbert.data_generators.hf_data_generator.meds_to_cehrbert_conversion_rules.meds_to_cehrbert_base import (
MedsToCehrBertConversion,
)
from cehrbert.data_generators.hf_data_generator.meds_to_cehrbert_conversion_rules.meds_to_cehrbert_omop import (
MedsToCehrbertOMOP,
)
from cehrbert.med_extension.schema_extension import CehrBertPatient, Event, Visit
from cehrbert.runners.hf_runner_argument_dataclass import DataTrainingArguments, MedsToCehrBertConversionType

Expand Down Expand Up @@ -49,6 +53,14 @@ def get_subject_split(meds_reader_db_path: str) -> Dict[str, List[int]]:
return result


@dataclass
class PatientDemographics:
birth_datetime: datetime = None
race: str = None
gender: str = None
ethnicity: str = None


class PatientBlock:
"""
Represents a block of medical events for a single patient visit, including.
Expand Down Expand Up @@ -225,46 +237,74 @@ def convert_one_patient(
prediction_time: datetime = None,
label: Union[int, float] = None,
) -> CehrBertPatient:
birth_datetime = None
race = None
gender = None
ethnicity = None

visit_id = default_visit_id
current_date = None
events_for_current_date = []
patient_blocks = []
for e in patient.events:

# Skip out of the loop if the events's time stamps are beyond the prediction time
if prediction_time is not None and e.time is not None:
if e.time > prediction_time:
break

# This indicates demographics features
if e.code in birth_codes:
birth_datetime = e.time
elif e.code.upper().startswith("RACE"):
race = e.code
elif e.code.upper().startswith("GENDER"):
gender = e.code
elif e.code.upper().startswith("ETHNICITY"):
ethnicity = e.code
elif e.time is not None:
if not current_date:
current_date = e.time

if current_date.date() == e.time.date():
events_for_current_date.append(e)
else:
patient_blocks.append(PatientBlock(events_for_current_date, visit_id, conversion))
events_for_current_date = list()
events_for_current_date.append(e)
current_date = e.time
visit_id += 1

if events_for_current_date:
patient_blocks.append(PatientBlock(events_for_current_date, visit_id, conversion))
"""
Converts a patient's event data into a CehrBertPatient object, processing.
their medical history, visit details, and demographic information.
Parameters:
----------
patient : meds_reader.Subject
The patient's event data, including time-stamped medical events such as
demographic data (race, gender, ethnicity) and clinical visits (ED admissions,
hospital admissions, discharges).
conversion : MedsToCehrBertConversion
The conversion object to map and process medical event data into the format
required by CehrBert.
default_visit_id : int, optional (default=1)
The starting ID for patient visits. This is incremented as new visits are
identified in the event data.
prediction_time : datetime, optional (default=None)
The cutoff time for processing events. Events occurring after this time are
ignored.
label : Union[int, float], optional (default=None)
The prediction label associated with this patient, which could represent a
clinical outcome (e.g., survival or treatment response).
Returns:
-------
CehrBertPatient
An object containing the patient's transformed event data, visits, demographics,
and associated label in a structure compatible with CehrBert's input requirements.
Description:
-----------
This function processes a patient's medical history, including demographic
information (birth date, race, gender, and ethnicity) and visit details. It iterates
through the patient's events and groups them into visits (ED, admission, discharge).
Visits are formed based on timestamps, and certain logic is applied to merge ED visits
into hospital admissions if they occur within 24 hours of each other.
For each event, demographic attributes like birth date, race, gender, and ethnicity
are extracted. If the event has a timestamp, it is compared with `prediction_time` to
filter out events that occurred after the specified time.
The function handles ongoing (incomplete) visits and cases where multiple visits
should be merged (e.g., ED followed by hospital admission within 24 hours). After
processing the events, visits are built with details such as visit type, start/end
datetime, and events during the visit.
The function returns a `CehrBertPatient` object that includes the patient's medical
events, structured into visits, along with demographic information, and optionally
a prediction label.
Example Usage:
-------------
patient_data = convert_one_patient(
patient=some_patient_object,
conversion=some_conversion_object,
default_visit_id=1,
prediction_time=datetime.now(),
label=1
)
"""
demographics, patient_blocks = conversion.generate_demographics_and_patient_blocks(
patient=patient, prediction_time=prediction_time
)

admit_discharge_pairs = []
active_ed_index = None
Expand Down Expand Up @@ -359,24 +399,26 @@ def convert_one_patient(
)
)
age_at_index = -1
if prediction_time is not None and birth_datetime is not None:
age_at_index = prediction_time.year - birth_datetime.year
if prediction_time is not None and demographics.birth_datetime is not None:
age_at_index = prediction_time.year - demographics.birth_datetime.year
if (prediction_time.month, prediction_time.day) < (
birth_datetime.month,
birth_datetime.day,
demographics.birth_datetime.month,
demographics.birth_datetime.day,
):
age_at_index -= 1

# birth_datetime can not be None
assert birth_datetime is not None, f"patient_id: {patient.subject_id} does not have a valid birth_datetime"
assert (
demographics.birth_datetime is not None
), f"patient_id: {patient.subject_id} does not have a valid birth_datetime"

return CehrBertPatient(
patient_id=patient.subject_id,
birth_datetime=birth_datetime,
birth_datetime=demographics.birth_datetime,
visits=visits,
race=race if race else UNKNOWN_VALUE,
gender=gender if gender else UNKNOWN_VALUE,
ethnicity=ethnicity if ethnicity else UNKNOWN_VALUE,
race=demographics.race if demographics.race else UNKNOWN_VALUE,
gender=demographics.gender if demographics.gender else UNKNOWN_VALUE,
ethnicity=demographics.ethnicity if demographics.ethnicity else UNKNOWN_VALUE,
index_date=prediction_time,
age_at_index=age_at_index,
label=label,
Expand Down

0 comments on commit a45532d

Please sign in to comment.