Skip to content

Commit

Permalink
put back logging_utils to enable custom logging for spark applications (
Browse files Browse the repository at this point in the history
  • Loading branch information
ChaoPang authored Oct 14, 2024
1 parent 2664508 commit c7c9e76
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/cehrbert_data/apps/generate_training_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
get_measurement_table,
validate_table_names,
)
from cehrbert_data.utils.logging_utils import add_console_logging


def main(
Expand Down Expand Up @@ -333,6 +334,9 @@ def main(

ARGS = parser.parse_args()

# Enable logging
add_console_logging()

main(
ARGS.input_folder,
ARGS.output_folder,
Expand Down
5 changes: 5 additions & 0 deletions src/cehrbert_data/cohorts/spark_app_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
get_descendant_concept_ids,
preprocess_domain_table,
)
from cehrbert_data.utils.logging_utils import add_console_logging

from ..cohorts.query_builder import ENTRY_COHORT, NEGATIVE_COHORT, QueryBuilder

Expand Down Expand Up @@ -677,6 +678,10 @@ def create_prediction_cohort(
:param ehr_table_list:
:return:
"""
# Add logging to spark application output when enable_logging is set to True
if spark_args.enable_logging:
add_console_logging()

# Toggle the prior/post observation_period depending on the is_window_post_index flag
prior_observation_period = (
0 if spark_args.is_window_post_index else spark_args.observation_window + spark_args.hold_off_window
Expand Down
12 changes: 12 additions & 0 deletions src/cehrbert_data/utils/logging_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import logging
import sys


def add_console_logging():
root = logging.getLogger()
root.setLevel(logging.INFO)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
6 changes: 6 additions & 0 deletions src/cehrbert_data/utils/spark_parse_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ def create_spark_args():
--single_contribution: Whether patients should contribute only once to the training data.
"""
parser = argparse.ArgumentParser(description="Arguments for spark applications for generating cohort definitions")
parser.add_argument(
"--enable_logging",
dest="enable_logging",
action="store_true",
help="Indicate whether we should log the info to the spark-submit output",
)
parser.add_argument(
"-c",
"--cohort_name",
Expand Down

0 comments on commit c7c9e76

Please sign in to comment.