Skip to content

Commit

Permalink
Move parsing logic of SampleWindowType to SampleWindow
Browse files Browse the repository at this point in the history
  • Loading branch information
QMalcolm committed Jan 29, 2025
1 parent 96e3b7a commit 825de61
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 39 deletions.
48 changes: 9 additions & 39 deletions core/dbt/cli/option_types.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from datetime import datetime
from typing import Optional

import pytz
from click import Choice, ParamType
from click import Choice, Context, Parameter, ParamType

from dbt.artifacts.resources.types import BatchSize
from dbt.config.utils import normalize_warn_error_options, parse_cli_yaml_string
from dbt.event_time.event_time import offset_timestamp
from dbt.event_time.sample_window import SampleWindow
from dbt.events import ALL_EVENT_NAMES
from dbt.exceptions import OptionNotYamlDictError, ValidationError
Expand Down Expand Up @@ -99,43 +96,16 @@ def convert(self, value, param, ctx):
class SampleWindowType(ParamType):
name = "SAMPLE_WINDOW"

def convert(self, value, param, ctx):
def convert(
self, value, param: Optional[Parameter], ctx: Optional[Context]
) -> Optional[SampleWindow]:
if value is None:
return
return None

if isinstance(value, str):
end = datetime.now(tz=pytz.UTC)

relative_window = value.split(" ")
if len(relative_window) != 2:
self.fail(
f"Cannot load SAMPLE_WINDOW from '{value}'. Must be of form 'DAYS_INT GRAIN_SIZE'.",
param,
ctx,
)

try:
lookback = int(relative_window[0])
except Exception:
raise self.fail(
f"Unable to convert '{relative_window[0]}' to an integer", param, ctx
)

try:
batch_size_string = relative_window[1].lower().rstrip("s")
batch_size = BatchSize[batch_size_string]
except Exception:
grains = [size.value for size in BatchSize]
grain_plurals = [BatchSize.plural(size) for size in BatchSize]
valid_grains = grains + grain_plurals
self.fail(
f"Invalid grain size '{relative_window[1]}'. Must be one of {valid_grains}",
param,
ctx,
)

start = offset_timestamp(timestamp=end, batch_size=batch_size, offset=-1 * lookback)

return SampleWindow(start=start, end=end)
return SampleWindow.from_relative_string(value)
except Exception as e:
self.fail(e.__str__(), param, ctx)
else:
self.fail(f"Cannot load SAMPLE_WINDOW from type {type(value)}", param, ctx)
36 changes: 36 additions & 0 deletions core/dbt/event_time/sample_window.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
from __future__ import annotations

from datetime import datetime

import pytz
from attr import dataclass

from dbt.artifacts.resources.types import BatchSize
from dbt.event_time.event_time import offset_timestamp
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_common.exceptions import DbtRuntimeError


@dataclass
Expand All @@ -22,3 +28,33 @@ def __post_serialize__(self, data, context):
new_data["start"] = self.start
new_data["end"] = self.end
return new_data

@classmethod
def from_relative_string(cls, relative_string: str) -> SampleWindow:
end = datetime.now(tz=pytz.UTC)

relative_window = relative_string.split(" ")
if len(relative_window) != 2:
raise DbtRuntimeError(
f"Cannot load SAMPLE_WINDOW from '{relative_string}'. Must be of form 'DAYS_INT GRAIN_SIZE'."
)

try:
lookback = int(relative_window[0])
except Exception:
raise DbtRuntimeError(f"Unable to convert '{relative_window[0]}' to an integer.")

try:
batch_size_string = relative_window[1].lower().rstrip("s")
batch_size = BatchSize[batch_size_string]
except Exception:
grains = [size.value for size in BatchSize]
grain_plurals = [BatchSize.plural(size) for size in BatchSize]
valid_grains = grains + grain_plurals
raise DbtRuntimeError(
f"Invalid grain size '{relative_window[1]}'. Must be one of {valid_grains}."
)

start = offset_timestamp(timestamp=end, batch_size=batch_size, offset=-1 * lookback)

return cls(start=start, end=end)
163 changes: 163 additions & 0 deletions tests/unit/event_time/test_sample_mode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from datetime import datetime
from typing import Union

import freezegun
import pytest
import pytz

from dbt.event_time.sample_window import SampleWindow
from dbt_common.exceptions import DbtRuntimeError


@pytest.mark.parametrize(
"relative_string,expected_result",
[
(
"4 years",
SampleWindow(
start=datetime(2021, 1, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 year",
SampleWindow(
start=datetime(2024, 1, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"4 YEARS",
SampleWindow(
start=datetime(2021, 1, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 YEAR",
SampleWindow(
start=datetime(2024, 1, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"4 months",
SampleWindow(
start=datetime(2024, 9, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 month",
SampleWindow(
start=datetime(2024, 12, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"4 MONTHS",
SampleWindow(
start=datetime(2024, 9, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 MONTH",
SampleWindow(
start=datetime(2024, 12, 28, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"4 days",
SampleWindow(
start=datetime(2025, 1, 24, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 day",
SampleWindow(
start=datetime(2025, 1, 27, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"4 DAYS",
SampleWindow(
start=datetime(2025, 1, 24, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 DAY",
SampleWindow(
start=datetime(2025, 1, 27, 18, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"4 hours",
SampleWindow(
start=datetime(2025, 1, 28, 14, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 hour",
SampleWindow(
start=datetime(2025, 1, 28, 17, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"4 HOURS",
SampleWindow(
start=datetime(2025, 1, 28, 14, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 HOUR",
SampleWindow(
start=datetime(2025, 1, 28, 17, 4, 0, 0, pytz.UTC),
end=datetime(2025, 1, 28, 18, 4, 0, 0, pytz.UTC),
),
),
(
"1 week",
DbtRuntimeError(
"Invalid grain size 'week'. Must be one of ['hour', 'day', 'month', 'year', 'hours', 'days', 'months', 'years']."
),
),
("an hour", DbtRuntimeError("Unable to convert 'an' to an integer.")),
(
"3",
DbtRuntimeError(
"Cannot load SAMPLE_WINDOW from '3'. Must be of form 'DAYS_INT GRAIN_SIZE'."
),
),
(
"True",
DbtRuntimeError(
"Cannot load SAMPLE_WINDOW from 'True'. Must be of form 'DAYS_INT GRAIN_SIZE'."
),
),
("days 3", DbtRuntimeError("Unable to convert 'days' to an integer.")),
(
"{}",
DbtRuntimeError(
"Cannot load SAMPLE_WINDOW from '{}'. Must be of form 'DAYS_INT GRAIN_SIZE'."
),
),
],
)
@freezegun.freeze_time("2025-01-28T18:04:0Z")
def test_from_relative_string(
relative_string: str, expected_result: Union[SampleWindow, Exception]
):
try:
result = SampleWindow.from_relative_string(relative_string)
assert result == expected_result
except Exception as e:
assert str(e) == str(expected_result)

0 comments on commit 825de61

Please sign in to comment.