Skip to content

Commit

Permalink
Allow querying SCDs without metric_time (#1621)
Browse files Browse the repository at this point in the history
Remove query validation that requires including `metric_time` in the
group by if your query includes an SCD. After much product discussion,
we've decided this aligns the behavior users expect for SCDs.
  • Loading branch information
courtneyholcomb authored Jan 25, 2025
1 parent 8569035 commit fb421a5
Show file tree
Hide file tree
Showing 53 changed files with 11,315 additions and 122 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20250124-133535.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Allow querying SCDs without metric_time.
time: 2025-01-24T13:35:35.800322-08:00
custom:
Author: courtneyholcomb
Issue: "1621"
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet
from metricflow_semantics.query.group_by_item.resolution_path import MetricFlowQueryResolutionPath
from metricflow_semantics.query.issues.issues_base import (
MetricFlowQueryResolutionIssue,
Expand All @@ -27,9 +26,6 @@
from metricflow_semantics.query.issues.parsing.offset_metric_requires_metric_time import (
OffsetMetricRequiresMetricTimeIssue,
)
from metricflow_semantics.query.issues.parsing.scd_requires_metric_time import (
ScdRequiresMetricTimeIssue,
)
from metricflow_semantics.query.resolver_inputs.query_resolver_inputs import ResolverInputForQuery
from metricflow_semantics.query.validation_rules.base_validation_rule import PostResolutionQueryValidationRule

Expand All @@ -44,7 +40,6 @@ class MetricTimeQueryValidationRule(PostResolutionQueryValidationRule):
* Cumulative metrics.
* Derived metrics with an offset time.
* Slowly changing dimensions
"""

def __init__( # noqa: D107
Expand Down Expand Up @@ -134,14 +129,6 @@ def _validate_derived_metric(
),
)

def _scd_linkable_element_set_for_measure(self, measure_reference: MeasureReference) -> LinkableElementSet:
"""Returns subset of the query's `LinkableElements` that are SCDs and associated with the measure."""
measure_semantic_model = self._manifest_lookup.semantic_model_lookup.measure_lookup.get_properties(
measure_reference
).model_reference

return self._scd_linkable_element_set.filter_by_left_semantic_model(measure_semantic_model)

@override
def validate_metric_in_resolution_dag(
self,
Expand Down Expand Up @@ -192,19 +179,4 @@ def validate_measure_in_resolution_dag(
measure_reference: MeasureReference,
resolution_path: MetricFlowQueryResolutionPath,
) -> MetricFlowQueryResolutionIssueSet:
scd_linkable_elemenent_set_for_measure = self._scd_linkable_element_set_for_measure(measure_reference)

if scd_linkable_elemenent_set_for_measure.spec_count == 0:
return MetricFlowQueryResolutionIssueSet.empty_instance()

if self._query_includes_metric_time:
return MetricFlowQueryResolutionIssueSet.empty_instance()

# Queries that join to an SCD don't support direct references to agg_time_dimension, so we
# only check for metric_time. If we decide to support agg_time_dimension, we should add a check

return MetricFlowQueryResolutionIssueSet.from_issue(
ScdRequiresMetricTimeIssue.from_parameters(
scds_in_query=scd_linkable_elemenent_set_for_measure.specs, query_resolution_path=resolution_path
)
)
return MetricFlowQueryResolutionIssueSet.empty_instance()
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE,
)
from metricflow_semantics.test_helpers.metric_time_dimension import MTD
from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal, assert_str_snapshot_equal
from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -465,52 +465,6 @@ def test_cumulative_metric_agg_time_dimension_name_validation(
assert_object_snapshot_equal(request=request, mf_test_configuration=mf_test_configuration, obj=result)


def test_join_to_scd_no_time_dimension_validation(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
scd_query_parser: MetricFlowQueryParser,
) -> None:
"""Test that queries that join to SCD semantic models fail if no time dimensions are selected."""
with pytest.raises(InvalidQueryException) as exc_info:
scd_query_parser.parse_and_validate_query(
metric_names=["bookings"],
group_by_names=["listing__country"],
)

assert len(exc_info.value.args) == 1
assert isinstance(exc_info.value.args[0], str)
assert_str_snapshot_equal(
request=request,
mf_test_configuration=mf_test_configuration,
snapshot_id="error",
snapshot_str=exc_info.value.args[0],
)


def test_join_through_scd_no_time_dimension_validation(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
scd_query_parser: MetricFlowQueryParser,
) -> None:
"""Test that queries that join through SCDs semantic models fail if no time dimensions are selected."""
with pytest.raises(InvalidQueryException) as exc_info:
# "user__home_state_latest" is not an SCD itself, but since we go through
# "listing" and that is an SCD, we should raise an exception here as well
scd_query_parser.parse_and_validate_query(
metric_names=["bookings"],
group_by_names=["listing__user__home_state_latest"],
)

assert len(exc_info.value.args) == 1
assert isinstance(exc_info.value.args[0], str)
assert_str_snapshot_equal(
request=request,
mf_test_configuration=mf_test_configuration,
snapshot_id="error",
snapshot_str=exc_info.value.args[0],
)


def test_derived_metric_query_parsing(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
Expand Down

This file was deleted.

This file was deleted.

21 changes: 21 additions & 0 deletions tests_metricflow/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,24 @@ def it_helpers( # noqa: D103
source_schema=mf_test_configuration.mf_source_schema,
sql_client=sql_client,
)


@pytest.fixture
def scd_it_helpers( # noqa: D103
sql_client: SqlClient,
create_source_tables: bool,
scd_semantic_manifest_lookup: SemanticManifestLookup,
time_spine_sources: Mapping[TimeGranularity, TimeSpineSource],
mf_test_configuration: MetricFlowTestConfiguration,
) -> IntegrationTestHelpers:
return IntegrationTestHelpers(
mf_engine=MetricFlowEngine(
semantic_manifest_lookup=scd_semantic_manifest_lookup,
sql_client=sql_client,
column_association_resolver=DunderColumnAssociationResolver(),
time_source=ConfigurableTimeSource(as_datetime("2020-01-01")),
),
mf_system_schema=mf_test_configuration.mf_system_schema,
source_schema=mf_test_configuration.mf_source_schema,
sql_client=sql_client,
)
77 changes: 77 additions & 0 deletions tests_metricflow/integration/query_output/test_query_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,80 @@ def test_derived_metric_alias( # noqa: D103
snapshot_str=query_result.result_df.text_format(),
sql_engine=sql_client.sql_engine_type,
)


@pytest.mark.sql_engine_snapshot
@pytest.mark.duckdb_only
def test_scd_with_coarser_grain( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
sql_client: SqlClient,
scd_it_helpers: IntegrationTestHelpers,
) -> None:
query_result = scd_it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metrics=(MetricParameter(name="family_bookings"),),
group_by_names=["listing__capacity", "metric_time__month"],
order_by_names=["listing__capacity", "metric_time__month"],
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_str_snapshot_equal(
request=request,
mf_test_configuration=mf_test_configuration,
snapshot_id="query_output",
snapshot_str=query_result.result_df.text_format(),
sql_engine=sql_client.sql_engine_type,
)


@pytest.mark.sql_engine_snapshot
@pytest.mark.duckdb_only
def test_scd_group_by_without_metric_time( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
sql_client: SqlClient,
scd_it_helpers: IntegrationTestHelpers,
) -> None:
query_result = scd_it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metrics=(MetricParameter(name="family_bookings"),),
group_by_names=["listing__capacity"],
order_by_names=["listing__capacity"],
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_str_snapshot_equal(
request=request,
mf_test_configuration=mf_test_configuration,
snapshot_id="query_output",
snapshot_str=query_result.result_df.text_format(),
sql_engine=sql_client.sql_engine_type,
)


@pytest.mark.sql_engine_snapshot
@pytest.mark.duckdb_only
def test_scd_filter_without_metric_time( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
sql_client: SqlClient,
scd_it_helpers: IntegrationTestHelpers,
) -> None:
query_result = scd_it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metrics=(MetricParameter(name="family_bookings"),),
where_constraints=("{{ Dimension('listing__capacity') }} > 2",),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_str_snapshot_equal(
request=request,
mf_test_configuration=mf_test_configuration,
snapshot_id="query_output",
snapshot_str=query_result.result_df.text_format(),
sql_engine=sql_client.sql_engine_type,
)
78 changes: 78 additions & 0 deletions tests_metricflow/query_rendering/test_query_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,3 +668,81 @@ def test_derived_metric_alias(
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_scd_dimension_filter_without_metric_time( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
scd_column_association_resolver: ColumnAssociationResolver,
scd_query_parser: MetricFlowQueryParser,
scd_dataflow_plan_builder: DataflowPlanBuilder,
scd_dataflow_to_sql_converter: DataflowToSqlPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = scd_query_parser.parse_and_validate_query(
metric_names=("family_bookings",),
where_constraints=[
PydanticWhereFilter(
where_sql_template="{{ Dimension('listing__capacity') }} > 2",
)
],
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=scd_dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=scd_dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_scd_dimension_group_by_without_metric_time( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
scd_column_association_resolver: ColumnAssociationResolver,
scd_query_parser: MetricFlowQueryParser,
scd_dataflow_plan_builder: DataflowPlanBuilder,
scd_dataflow_to_sql_converter: DataflowToSqlPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = scd_query_parser.parse_and_validate_query(
metric_names=("family_bookings",),
group_by_names=("listing__capacity",),
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=scd_dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=scd_dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_scd_group_by_and_coarser_grain( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
scd_query_parser: MetricFlowQueryParser,
scd_dataflow_plan_builder: DataflowPlanBuilder,
scd_dataflow_to_sql_converter: DataflowToSqlPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = scd_query_parser.parse_and_validate_query(
metric_names=("family_bookings",),
group_by_names=("listing__capacity", "metric_time__month"),
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=scd_dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=scd_dataflow_plan_builder,
query_spec=query_spec,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
test_name: test_scd_filter_without_metric_time
test_filename: test_query_output.py
---
family_bookings
-----------------
9
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
test_name: test_scd_group_by_without_metric_time
test_filename: test_query_output.py
---
listing__capacity family_bookings
------------------- -----------------
3 2
4 5
5 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
test_name: test_scd_with_coarser_grain
test_filename: test_query_output.py
---
metric_time__month listing__capacity family_bookings
-------------------- ------------------- -----------------
2020-01-01T00:00:00 3 2
2020-01-01T00:00:00 4 5
2020-01-01T00:00:00 5 2
Loading

0 comments on commit fb421a5

Please sign in to comment.