diff --git a/oonipipeline/src/oonipipeline/temporal/activities/analysis.py b/oonipipeline/src/oonipipeline/temporal/activities/analysis.py index 7c39c89c..83af92ad 100644 --- a/oonipipeline/src/oonipipeline/temporal/activities/analysis.py +++ b/oonipipeline/src/oonipipeline/temporal/activities/analysis.py @@ -117,6 +117,9 @@ def make_analysis_in_a_day(params: MakeAnalysisParams) -> dict: column_names_wa = [f.name for f in dataclasses.fields(WebAnalysis)] column_names_er = [f.name for f in dataclasses.fields(MeasurementExperimentResult)] + # TODO(art): this previous range search and deletion makes the idempotence + # of the activity not 100% accurate. + # We should look into fixing it. prev_range_list = [ get_prev_range( db=db_lookup, diff --git a/oonipipeline/src/oonipipeline/temporal/activities/observations.py b/oonipipeline/src/oonipipeline/temporal/activities/observations.py index 279b3776..8ea28288 100644 --- a/oonipipeline/src/oonipipeline/temporal/activities/observations.py +++ b/oonipipeline/src/oonipipeline/temporal/activities/observations.py @@ -141,6 +141,9 @@ def make_observations_for_file_entry_batch( def make_observation_in_day(params: MakeObservationsParams) -> dict: day = datetime.strptime(params.bucket_date, "%Y-%m-%d").date() + # TODO(art): this previous range search and deletion makes the idempotence + # of the activity not 100% accurate. + # We should look into fixing it. with ClickhouseConnection(params.clickhouse, row_buffer_size=10_000) as db: prev_ranges = [] for table_name in ["obs_web"]: