Skip to content

Commit

Permalink
Implement temporary workaround to obtain the timestamp of the run
Browse files Browse the repository at this point in the history
  • Loading branch information
hellais committed Apr 25, 2024
1 parent c3e3d41 commit 386b588
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 4 deletions.
16 changes: 14 additions & 2 deletions oonipipeline/src/oonipipeline/temporal/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,20 @@ class ObservationsWorkflow:
async def run(self, params: ObservationsWorkflowParams) -> dict:
# optimize_all_tables(params.clickhouse)

read_time = workflow.now() - timedelta(days=1)
bucket_date = f"{read_time.year}-{read_time.month:02}-{read_time.day:02}"
workflow_id = workflow.info().workflow_id

# TODO(art): this is quite sketchy. Waiting on temporal slack question:
# https://temporalio.slack.com/archives/CTT84RS0P/p1714040382186429
bucket_date = "-".join(workflow_id.split("-")[-3:]).split("T")[0]

# read_time = workflow_info.start_time - timedelta(days=1)
# log.info(f"workflow.info().start_time={workflow.info().start_time} ")
# log.info(f"workflow.info().cron_schedule={workflow.info().cron_schedule} ")
# log.info(f"workflow_info.workflow_id={workflow_info.workflow_id} ")
# log.info(f"workflow_info.run_id={workflow_info.run_id} ")
# log.info(f"workflow.now()={workflow.now()}")
# print(workflow)
# bucket_date = f"{read_time.year}-{read_time.month:02}-{read_time.day:02}"

t = PerfTimer()
log.info(
Expand Down
9 changes: 7 additions & 2 deletions oonipipeline/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,15 @@ def test_full_workflow(
)
assert result.exit_code == 0
# assert len(list(tmp_path.glob("*.warc.gz"))) == 1
import ipdb

ipdb.set_trace()
res = db.execute(
"SELECT COUNT(DISTINCT(measurement_uid)) FROM obs_web WHERE bucket_date = '2022-10-20' AND probe_cc = 'BA'"
"SELECT bucket_date, COUNT(DISTINCT(measurement_uid)) FROM obs_web WHERE probe_cc = 'BA' GROUP BY bucket_date"
)
assert res[0][0] == 200 # type: ignore
bucket_dict = dict(res[0])
assert bucket_dict["2022-10-20"] == 200

res = db.execute(
"SELECT COUNT() FROM obs_web WHERE bucket_date = '2022-10-20' AND probe_cc = 'BA'"
)
Expand Down

0 comments on commit 386b588

Please sign in to comment.