diff --git a/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py b/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py index f5cf17d..f3377e9 100644 --- a/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py +++ b/src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py @@ -115,7 +115,7 @@ def __init__(self, data_args: DataTrainingArguments, is_pretraining: bool = True def remove_columns(self): if self._is_pretraining: - return ["visits", "patient_id", "birth_datetime", "index_date"] + return ["visits", "birth_datetime", "index_date"] else: return [ "visits", diff --git a/src/cehrbert/utils/stat_utils.py b/src/cehrbert/utils/stat_utils.py new file mode 100644 index 0000000..e69de29