-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create an
offset_timestamp
separate from MicrobatchBuilder
The `MicrobatchBuilder.offset_timestamp` _truncates_ the timestamp before offsetting it. We don't want to do that, we want to offset the "raw" timestamp. We could have split renamed the microbatch builder function name to `truncate_and_offset_timestamp` and separated the offset logic into a separate abstract function. However, the offset logic in the MicrobatchBuilder context depends on the truncation. We might later on be able to refactor the Microbatch provided function by instead truncating _after_ offsetting instead of before. But that is out of scope for this initial work, and we should instead revisit it later.
- Loading branch information
Showing
4 changed files
with
136 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from datetime import datetime | ||
|
||
from dateutil.relativedelta import relativedelta | ||
|
||
from dbt.artifacts.resources.types import BatchSize | ||
from dbt_common.exceptions import DbtRuntimeError | ||
|
||
|
||
def offset_timestamp(timestamp=datetime, batch_size=BatchSize, offset=int) -> datetime: | ||
"""Offsets the passed in timestamp based on the batch_size and offset. | ||
Note: THIS IS DIFFERENT FROM MicrobatchBuilder.offset_timestamp. That function first | ||
`truncates` the timestamp, and then does delta addition subtraction from there. This | ||
function _doesn't_ truncate the timestamp and uses `relativedelta` for specific edge | ||
case handling (months, years), which may produce different results than the delta math | ||
done in `MicrobatchBuilder.offset_timestamp` | ||
Examples | ||
2024-09-17 16:06:00 + Batchsize.hour -1 -> 2024-09-17 15:06:00 | ||
2024-09-17 16:06:00 + Batchsize.hour +1 -> 2024-09-17 17:06:00 | ||
2024-09-17 16:06:00 + Batchsize.day -1 -> 2024-09-16 16:06:00 | ||
2024-09-17 16:06:00 + Batchsize.day +1 -> 2024-09-18 16:06:00 | ||
2024-09-17 16:06:00 + Batchsize.month -1 -> 2024-08-17 16:06:00 | ||
2024-09-17 16:06:00 + Batchsize.month +1 -> 2024-10-17 16:06:00 | ||
2024-09-17 16:06:00 + Batchsize.year -1 -> 2023-09-17 16:06:00 | ||
2024-09-17 16:06:00 + Batchsize.year +1 -> 2025-09-17 16:06:00 | ||
2024-01-31 16:06:00 + Batchsize.month +1 -> 2024-02-29 16:06:00 | ||
2024-02-29 16:06:00 + Batchsize.year +1 -> 2025-02-28 16:06:00 | ||
""" | ||
|
||
if batch_size == BatchSize.hour: | ||
return timestamp + relativedelta(hours=offset) | ||
elif batch_size == BatchSize.day: | ||
return timestamp + relativedelta(days=offset) | ||
elif batch_size == BatchSize.month: | ||
return timestamp + relativedelta(months=offset) | ||
elif batch_size == BatchSize.year: | ||
return timestamp + relativedelta(years=offset) | ||
else: | ||
raise DbtRuntimeError(f"Unhandled batch_size '{batch_size}'") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from datetime import datetime | ||
|
||
import pytest | ||
import pytz | ||
|
||
from dbt.artifacts.resources.types import BatchSize | ||
from dbt.event_time.event_time import offset_timestamp | ||
|
||
|
||
class TestEventTime: | ||
|
||
@pytest.mark.parametrize( | ||
"timestamp,batch_size,offset,expected_timestamp", | ||
[ | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.year, | ||
1, | ||
datetime(2025, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.year, | ||
-1, | ||
datetime(2023, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.month, | ||
1, | ||
datetime(2024, 10, 5, 3, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.month, | ||
-1, | ||
datetime(2024, 8, 5, 3, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.day, | ||
1, | ||
datetime(2024, 9, 6, 3, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.day, | ||
-1, | ||
datetime(2024, 9, 4, 3, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.hour, | ||
1, | ||
datetime(2024, 9, 5, 4, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC), | ||
BatchSize.hour, | ||
-1, | ||
datetime(2024, 9, 5, 2, 56, 1, 1, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 1, 31, 16, 6, 0, 0, pytz.UTC), | ||
BatchSize.month, | ||
1, | ||
datetime(2024, 2, 29, 16, 6, 0, 0, pytz.UTC), | ||
), | ||
( | ||
datetime(2024, 2, 29, 16, 6, 0, 0, pytz.UTC), | ||
BatchSize.year, | ||
1, | ||
datetime(2025, 2, 28, 16, 6, 0, 0, pytz.UTC), | ||
), | ||
], | ||
) | ||
def test_offset_timestamp(self, timestamp, batch_size, offset, expected_timestamp): | ||
assert offset_timestamp(timestamp, batch_size, offset) == expected_timestamp |