Skip to content

Commit

Permalink
Improvements related to timestamp and timezones
Browse files Browse the repository at this point in the history
* Make utcnow() calls timezone aware
* Implement workardound for clickhouse bug mymarilyn/clickhouse-driver#388
* Implement more tests for range deletions
* Refactoring of get_prev_range functions
* Fix problem in experiment result generation
  • Loading branch information
hellais committed Nov 21, 2023
1 parent e230b25 commit 6f628df
Show file tree
Hide file tree
Showing 16 changed files with 203 additions and 119 deletions.
4 changes: 2 additions & 2 deletions oonidata/analysis/web_analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import defaultdict
from dataclasses import dataclass
import dataclasses
from datetime import datetime
from datetime import datetime, timezone
import ipaddress
from typing import (
Generator,
Expand Down Expand Up @@ -674,7 +674,7 @@ def make_web_analysis(
fingerprintdb=fingerprintdb,
)

created_at = datetime.utcnow()
created_at = datetime.now(timezone.utc).replace(tzinfo=None)
website_analysis = WebAnalysis(
measurement_uid=web_o.measurement_uid,
observation_id=web_o.observation_id,
Expand Down
5 changes: 2 additions & 3 deletions oonidata/analysis/website_experiment_results.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from datetime import datetime
from datetime import datetime, timezone
import logging
from typing import Dict, Generator, List, NamedTuple, Optional, Tuple

Expand Down Expand Up @@ -796,7 +796,6 @@ def make_website_experiment_results(
Takes as input a list of web_analysis and outputs a list of
ExperimentResults for the website.
"""
experiment_result_id = "XXXX"
observation_id_list = []
first_analysis = web_analysis[0]

Expand Down Expand Up @@ -974,7 +973,7 @@ def get_agg_outcome(loni_list, category, agg_func) -> Optional[OutcomeStatus]:
measurement_uid=measurement_uid,
observation_id_list=observation_id_list,
timeofday=timeofday,
created_at=datetime.utcnow(),
created_at=datetime.now(timezone.utc).replace(tzinfo=None),
location_network_type=first_analysis.network_type,
location_network_asn=first_analysis.probe_asn,
location_network_cc=first_analysis.probe_cc,
Expand Down
13 changes: 11 additions & 2 deletions oonidata/cli/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import List, Optional

import click
from click_loglevel import LogLevel

from oonidata import __version__
from oonidata.dataclient import (
Expand Down Expand Up @@ -70,10 +71,18 @@ def _parse_csv(ctx, param, s: Optional[str]) -> List[str]:

@click.group()
@click.option("--error-log-file", type=Path)
@click.option(
"-l",
"--log-level",
type=LogLevel(),
default="INFO",
help="Set logging level",
show_default=True,
)
@click.version_option(__version__)
def cli(error_log_file: Path):
def cli(error_log_file: Path, log_level: int):
log.addHandler(logging.StreamHandler(sys.stderr))
log.setLevel(logging.INFO)
log.setLevel(log_level)
if error_log_file:
logging.basicConfig(
filename=error_log_file, encoding="utf-8", level=logging.ERROR
Expand Down
4 changes: 2 additions & 2 deletions oonidata/db/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import time

from collections import defaultdict, namedtuple
from datetime import datetime
from datetime import datetime, timezone
from pprint import pformat
import logging

Expand Down Expand Up @@ -114,7 +114,7 @@ def __init__(self, output_dir):

def write_rows(self, table_name, rows, column_names):
if table_name not in self.open_writers:
ts = datetime.utcnow().strftime("%Y%m%dT%H%M%S")
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")

Check warning on line 117 in oonidata/db/connections.py

View check run for this annotation

Codecov / codecov/patch

oonidata/db/connections.py#L117

Added line #L117 was not covered by tests
fh = (self.output_dir / f"{table_name}-{ts}.csv").open("w")
csv_writer = csv.writer(fh)
csv_writer.writerow(column_names)
Expand Down
4 changes: 2 additions & 2 deletions oonidata/models/experiment_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from typing import Any, Dict, Generator, List, Optional, NamedTuple, Mapping, Tuple
from enum import Enum
from datetime import datetime
from datetime import datetime, timezone

from tabulate import tabulate
from oonidata.datautils import maybe_elipse
Expand Down Expand Up @@ -277,7 +277,7 @@ def iter_experiment_results(
target_name: str,
outcomes: List[Outcome],
) -> Generator[ExperimentResult, None, None]:
created_at = datetime.utcnow()
created_at = datetime.now(timezone.utc).replace(tzinfo=None)
for idx, outcome in enumerate(outcomes):
yield ExperimentResult(
measurement_uid=obs.measurement_uid,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import dataclasses
from datetime import datetime
from datetime import datetime, timezone
import orjson
from typing import List, Tuple
from oonidata.models.nettests import HTTPHeaderFieldManipulation
Expand All @@ -14,7 +14,7 @@ def make_observations(
mb_obs = HTTPMiddleboxObservation(
hfm_success=True,
observation_id=f"{msmt.measurement_uid}_0",
created_at=datetime.utcnow().replace(microsecond=0),
created_at=datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None),
**dataclasses.asdict(self.measurement_meta),
)

Expand Down
4 changes: 2 additions & 2 deletions oonidata/transforms/nettests/http_invalid_request_line.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import dataclasses
from datetime import datetime
from datetime import datetime, timezone
from typing import List, Tuple
from oonidata.models.dataformats import maybe_binary_data_to_bytes
from oonidata.models.nettests import HTTPInvalidRequestLine
Expand Down Expand Up @@ -54,7 +54,7 @@ def make_observations(
mb_obs = HTTPMiddleboxObservation(
hirl_success=True,
observation_id=f"{msmt.measurement_uid}_0",
created_at=datetime.utcnow().replace(microsecond=0),
created_at=datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None),
**dataclasses.asdict(self.measurement_meta),
)
if not msmt.test_keys.sent:
Expand Down
11 changes: 7 additions & 4 deletions oonidata/transforms/nettests/measurement_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dataclasses
import logging
from urllib.parse import urlparse, urlsplit
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import (
Callable,
Optional,
Expand Down Expand Up @@ -134,11 +134,14 @@
)


def normalize_failure(failure: Failure):
def normalize_failure(failure: Union[Failure, bool]) -> Failure:
if not failure:
# This will set it to None even when it's false
return None

if failure is True:
return "true"

Check warning on line 143 in oonidata/transforms/nettests/measurement_transformer.py

View check run for this annotation

Codecov / codecov/patch

oonidata/transforms/nettests/measurement_transformer.py#L143

Added line #L143 was not covered by tests

if failure.startswith("unknown_failure"):
for substring, new_failure in unknown_failure_map:
if substring in failure:
Expand Down Expand Up @@ -445,7 +448,7 @@ def maybe_set_web_fields(
],
web_obs: WebObservation,
prefix: str,
field_names: Tuple[str],
field_names: Tuple[str, ...],
):
# TODO: the fact we have to do this is an artifact of the original
# one-observation per type model. Once we decide we don't want to go for
Expand Down Expand Up @@ -899,7 +902,7 @@ def consume_web_observations(

for idx, obs in enumerate(web_obs_list):
obs.observation_id = f"{obs.measurement_uid}_{idx}"
obs.created_at = datetime.utcnow().replace(microsecond=0)
obs.created_at = datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None)

return web_obs_list

Expand Down
4 changes: 2 additions & 2 deletions oonidata/transforms/nettests/web_connectivity.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from copy import deepcopy
from datetime import datetime
from datetime import datetime, timezone
from typing import Dict, List, Tuple
from urllib.parse import urlparse
from oonidata.datautils import is_ip_bogon
Expand Down Expand Up @@ -41,7 +41,7 @@ def make_web_control_observations(
test_name=msmt.test_name,
test_version=msmt.test_version,
hostname=hostname,
created_at=datetime.utcnow(),
created_at=datetime.now(timezone.utc).replace(tzinfo=None),
)
# Reference for new-style web_connectivity:
# https://explorer.ooni.org/measurement/20220924T215758Z_webconnectivity_IR_206065_n1_2CRoWBNJkWc7VyAs?input=https%3A%2F%2Fdoh.dns.apple.com%2Fdns-query%3Fdns%3Dq80BAAABAAAAAAAAA3d3dwdleGFtcGxlA2NvbQAAAQAB
Expand Down
36 changes: 25 additions & 11 deletions oonidata/workers/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,25 @@ def make_analysis_in_a_day(
column_names_wa = [f.name for f in dataclasses.fields(WebAnalysis)]
column_names_er = [f.name for f in dataclasses.fields(MeasurementExperimentResult)]

prev_range = get_prev_range(
db=db_lookup,
table_name=WebAnalysis.__table_name__,
timestamp=datetime.combine(day, datetime.min.time()),
test_name=[],
probe_cc=probe_cc,
timestamp_column="measurement_start_time",
)
prev_range_list = [
get_prev_range(
db=db_lookup,
table_name=WebAnalysis.__table_name__,
timestamp=datetime.combine(day, datetime.min.time()),
test_name=[],
probe_cc=probe_cc,
timestamp_column="measurement_start_time",
),
get_prev_range(
db=db_lookup,
table_name=MeasurementExperimentResult.__table_name__,
timestamp=datetime.combine(day, datetime.min.time()),
test_name=[],
probe_cc=probe_cc,
timestamp_column="timeofday",
probe_cc_column="location_network_cc",
),
]

log.info(f"loading ground truth DB for {day}")
t = PerfTimer()
Expand Down Expand Up @@ -118,6 +129,7 @@ def make_analysis_in_a_day(
fingerprintdb=fingerprintdb,
)
)
log.info(f"generated {len(website_analysis)} website_analysis")
if len(website_analysis) == 0:
log.info(f"no website analysis for {probe_cc}, {test_name}")
continue

Check warning on line 135 in oonidata/workers/analysis.py

View check run for this annotation

Codecov / codecov/patch

oonidata/workers/analysis.py#L134-L135

Added lines #L134 - L135 were not covered by tests
Expand All @@ -138,6 +150,7 @@ def make_analysis_in_a_day(

with statsd_client.timer("oonidata.web_analysis.experiment_results.timing"):
website_er = list(make_website_experiment_results(website_analysis))
log.info(f"generated {len(website_er)} website_er")
table_name, rows = make_db_rows(
dc_list=website_er,
column_names=column_names_er,
Expand All @@ -154,9 +167,9 @@ def make_analysis_in_a_day(
web_obs_ids = ",".join(map(lambda wo: wo.observation_id, web_obs))
log.error(f"failed to generate analysis for {web_obs_ids}", exc_info=True)

Check warning on line 168 in oonidata/workers/analysis.py

View check run for this annotation

Codecov / codecov/patch

oonidata/workers/analysis.py#L166-L168

Added lines #L166 - L168 were not covered by tests

maybe_delete_prev_range(
db=db_lookup, prev_range=prev_range, table_name=WebAnalysis.__table_name__
)
for prev_range in prev_range_list:
maybe_delete_prev_range(db=db_lookup, prev_range=prev_range)
db_writer.close()
return idx


Expand Down Expand Up @@ -285,3 +298,4 @@ def start_analysis(
obs_per_sec = round(total_obs_count / t_total.s)
log.info(f"finished processing {start_day} - {end_day} speed: {obs_per_sec}obs/s)")
log.info(f"{total_obs_count} msmts in {t_total.pretty}")
dask_client.shutdown()
Loading

0 comments on commit 6f628df

Please sign in to comment.