From 8d453cbb68132cf299f17c604a1dc292be6f9a19 Mon Sep 17 00:00:00 2001 From: Chao Pang Date: Fri, 25 Oct 2024 13:17:31 -0400 Subject: [PATCH] check whether patient_id exists in the meds_reader database because subject_splits can contain more patients than the dataset --- src/cehrbert/data_generators/hf_data_generator/meds_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cehrbert/data_generators/hf_data_generator/meds_utils.py b/src/cehrbert/data_generators/hf_data_generator/meds_utils.py index 1eb16e0..6f0e281 100644 --- a/src/cehrbert/data_generators/hf_data_generator/meds_utils.py +++ b/src/cehrbert/data_generators/hf_data_generator/meds_utils.py @@ -221,8 +221,9 @@ def _meds_to_cehrbert_generator( with meds_reader.SubjectDatabase(path_to_db) as patient_database: for shard in shards: for patient_id, prediction_time, label in shard: - patient = patient_database[patient_id] - yield convert_one_patient(patient, conversion, prediction_time, label) + if patient_id in patient_database: + patient = patient_database[patient_id] + yield convert_one_patient(patient, conversion, prediction_time, label) def _create_cehrbert_data_from_meds(