From 0bfdc395c0041b2c56daedba2e684c16ee10b45d Mon Sep 17 00:00:00 2001 From: Chao Pang Date: Thu, 12 Sep 2024 10:08:07 -0400 Subject: [PATCH] added patient_id to the pretraining data for debugging --- .../data_generators/hf_data_generator/hf_dataset_mapping.py | 2 +- src/cehrbert/utils/stat_utils.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 src/cehrbert/utils/stat_utils.py diff --git a/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py b/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py index f5cf17d0..f3377e9c 100644 --- a/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py +++ b/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py @@ -115,7 +115,7 @@ def __init__(self, data_args: DataTrainingArguments, is_pretraining: bool = True def remove_columns(self): if self._is_pretraining: - return ["visits", "patient_id", "birth_datetime", "index_date"] + return ["visits", "birth_datetime", "index_date"] else: return [ "visits", diff --git a/src/cehrbert/utils/stat_utils.py b/src/cehrbert/utils/stat_utils.py new file mode 100644 index 00000000..e69de29b