Skip to content

Commit

Permalink
fix(schema-compiler): Fix sql generation for rolling_window queries w…
Browse files Browse the repository at this point in the history
…ith multiple time dimensions (#9124)

* just typos in comments and moving code around

* fill missing dateRanges for TDs

* fix multiple TDs request SQL generation

* add tests

* fill daterange for all time dimension items in query

* remove this.fillMissingDateRangesForTimeDimensions from gateway

* Add guard for time series without daterange

* cargo fmt

* fix test for cubesql fix

* fix notes after review
  • Loading branch information
KSDaemon authored Feb 24, 2025
1 parent fa318a1 commit 52a664e
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 32 deletions.
93 changes: 68 additions & 25 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -945,8 +945,8 @@ export class BaseQuery {
).concat(multiStageMembers.map(m => `SELECT * FROM ${m.alias}`));
}

// Move regular measures to multiplied ones if there're same
// cubes to calculate. Most of the times it'll be much faster to
// Move regular measures to multiplied ones if there are same
// cubes to calculate. Most of the time it'll be much faster to
// calculate as there will be only single scan per cube.
if (
regularMeasures.length &&
Expand Down Expand Up @@ -1464,18 +1464,62 @@ export class BaseQuery {

overTimeSeriesQuery(baseQueryFn, cumulativeMeasure, fromRollup) {
const dateJoinCondition = cumulativeMeasure.dateJoinCondition();
const uniqDateJoinCondition = R.uniqBy(djc => djc[0].dimension, dateJoinCondition);
const cumulativeMeasures = [cumulativeMeasure];
if (!this.timeDimensions.find(d => d.granularity)) {
const filters = this.segments.concat(this.filters).concat(this.dateFromStartToEndConditionSql(dateJoinCondition, fromRollup, false));
const filters = this.segments
.concat(this.filters)
.concat(this.dateFromStartToEndConditionSql(
// If the same time dimension is passed more than once, no need to build the same
// filter condition again and again. Different granularities don't play role here,
// as rollingWindow.granularity is used for filtering.
uniqDateJoinCondition,
fromRollup,
false
));
return baseQueryFn(cumulativeMeasures, filters, false);
}
const dateSeriesSql = this.timeDimensions.map(d => this.dateSeriesSql(d)).join(', ');
const filters = this.segments.concat(this.filters).concat(this.dateFromStartToEndConditionSql(dateJoinCondition, fromRollup, true));

if (this.timeDimensions.filter(d => !d.dateRange && d.granularity).length > 0) {
throw new UserError('Time series queries without dateRange aren\'t supported');
}

// We can't do meaningful query if few time dimensions with different ranges passed,
// it won't be possible to join them together without losing some rows.
const rangedTimeDimensions = this.timeDimensions.filter(d => d.dateRange && d.granularity);
const uniqTimeDimensionWithRanges = R.uniqBy(d => d.dateRange, rangedTimeDimensions);
if (uniqTimeDimensionWithRanges.length > 1) {
throw new Error('Can\'t build query for time dimensions with different date ranges');
}

// We need to generate time series table for the lowest granularity among all time dimensions
const [dateSeriesDimension, dateSeriesGranularity] = this.timeDimensions.filter(d => d.granularity)
.reduce(([prevDim, prevGran], d) => {
const mg = this.minGranularity(prevGran, d.resolvedGranularity());
if (mg === d.resolvedGranularity()) {
return [d, mg];
}
return [prevDim, mg];
}, [null, null]);

const dateSeriesSql = this.dateSeriesSql(dateSeriesDimension);

// If the same time dimension is passed more than once, no need to build the same
// filter condition again and again. Different granularities don't play role here,
// as rollingWindow.granularity is used for filtering.
const filters = this.segments
.concat(this.filters)
.concat(this.dateFromStartToEndConditionSql(
uniqDateJoinCondition,
fromRollup,
true
));
const baseQuery = this.groupedUngroupedSelect(
() => baseQueryFn(cumulativeMeasures, filters),
cumulativeMeasure.shouldUngroupForCumulative(),
!cumulativeMeasure.shouldUngroupForCumulative() && this.minGranularity(
cumulativeMeasure.windowGranularity(), this.timeDimensions.find(d => d.granularity).resolvedGranularity()
cumulativeMeasure.windowGranularity(),
dateSeriesGranularity
) || undefined
);
const baseQueryAlias = this.cubeAlias('base');
Expand All @@ -1495,10 +1539,27 @@ export class BaseQuery {
dateSeriesSql,
baseQuery,
dateJoinConditionSql,
baseQueryAlias
baseQueryAlias,
dateSeriesDimension.granularity,
);
}

overTimeSeriesSelect(cumulativeMeasures, dateSeriesSql, baseQuery, dateJoinConditionSql, baseQueryAlias, dateSeriesGranularity) {
const forSelect = this.overTimeSeriesForSelect(cumulativeMeasures, dateSeriesGranularity);
return `SELECT ${forSelect} FROM ${dateSeriesSql}` +
` LEFT JOIN (${baseQuery}) ${this.asSyntaxJoin} ${baseQueryAlias} ON ${dateJoinConditionSql}` +
this.groupByClause();
}

overTimeSeriesForSelect(cumulativeMeasures, dateSeriesGranularity) {
return this.dimensions
.map(s => s.cumulativeSelectColumns())
.concat(this.timeDimensions.map(d => d.dateSeriesSelectColumn(null, dateSeriesGranularity)))
.concat(cumulativeMeasures.map(s => s.cumulativeSelectColumns()))
.filter(c => !!c)
.join(', ');
}

dateFromStartToEndConditionSql(dateJoinCondition, fromRollup, isFromStartToEnd) {
return dateJoinCondition.map(
// TODO these weird conversions to be strict typed for big query.
Expand All @@ -1523,24 +1584,6 @@ export class BaseQuery {
);
}

overTimeSeriesSelect(cumulativeMeasures, dateSeriesSql, baseQuery, dateJoinConditionSql, baseQueryAlias) {
const forSelect = this.overTimeSeriesForSelect(cumulativeMeasures);
return `SELECT ${forSelect} FROM ${dateSeriesSql}` +
` LEFT JOIN (${baseQuery}) ${this.asSyntaxJoin} ${baseQueryAlias} ON ${dateJoinConditionSql}` +
this.groupByClause();
}

overTimeSeriesForSelect(cumulativeMeasures) {
return this.dimensions.map(s => s.cumulativeSelectColumns()).concat(this.dateSeriesSelect()).concat(
cumulativeMeasures.map(s => s.cumulativeSelectColumns()),
).filter(c => !!c)
.join(', ');
}

dateSeriesSelect() {
return this.timeDimensions.map(d => d.dateSeriesSelectColumn());
}

/**
* @param {import('./BaseTimeDimension').BaseTimeDimension} timeDimension
* @return {string}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,18 @@ export class BaseTimeDimension extends BaseFilter {
return this.query.escapeColumnName(`${this.dimension}_series`);
}

public dateSeriesSelectColumn(dateSeriesAliasName) {
public dateSeriesSelectColumn(dateSeriesAliasName: string | null, dateSeriesGranularity?: string) {
if (!this.granularityObj) {
return null;
}

// In case of query with more than one granularity, the time series table was generated
// with the minimal granularity among all. If this is our granularity, we can save
// some cpu cycles without 'date_from' truncation. But if this is not our granularity,
// we need to truncate it to desired.
if (dateSeriesGranularity && this.granularityObj?.granularity !== dateSeriesGranularity) {
return `${this.query.dimensionTimeGroupedColumn(`${dateSeriesAliasName || this.dateSeriesAliasName()}.${this.query.escapeColumnName('date_from')}`, this.granularityObj)} ${this.aliasName()}`;
}
return `${dateSeriesAliasName || this.dateSeriesAliasName()}.${this.query.escapeColumnName('date_from')} ${this.aliasName()}`;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@ describe('SQL Generation', () => {
offset: 'start'
}
},
countRollingUnbounded: {
type: 'count',
rollingWindow: {
trailing: 'unbounded'
}
},
countRollingWeekToDate: {
type: 'count',
rollingWindow: {
type: 'to_date',
granularity: 'week'
}
},
revenue_qtd: {
type: 'sum',
sql: 'amount',
Expand Down Expand Up @@ -220,7 +233,14 @@ describe('SQL Generation', () => {
},
created_at: {
type: 'time',
sql: 'created_at'
sql: 'created_at',
granularities: {
three_days: {
interval: '3 days',
title: '3 days',
origin: '2017-01-01'
}
}
},
updated_at: {
type: 'time',
Expand Down Expand Up @@ -778,6 +798,163 @@ describe('SQL Generation', () => {
}
]));

it('rolling window with two time dimension granularities', async () => runQueryTest({
measures: [
'visitors.countRollingWeekToDate'
],
timeDimensions: [
{
dimension: 'visitors.created_at',
granularity: 'three_days',
dateRange: ['2017-01-01', '2017-01-10']
},
{
dimension: 'visitors.created_at',
granularity: 'day',
dateRange: ['2017-01-01', '2017-01-10']
}
],
order: [{
id: 'visitors.created_at'
}],
timezone: 'America/Los_Angeles'
}, [
{
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-01T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-02T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-03T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '2',
visitors__created_at_day: '2017-01-04T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '3',
visitors__created_at_day: '2017-01-05T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-06T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-07T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-08T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-09T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-10T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-10T00:00:00.000Z',
}
]));

it('two rolling windows with two time dimension granularities', async () => runQueryTest({
measures: [
'visitors.countRollingUnbounded',
'visitors.countRollingWeekToDate'
],
timeDimensions: [
{
dimension: 'visitors.created_at',
granularity: 'three_days',
dateRange: ['2017-01-01', '2017-01-10']
},
{
dimension: 'visitors.created_at',
granularity: 'day',
dateRange: ['2017-01-01', '2017-01-10']
}
],
order: [{
id: 'visitors.created_at'
}],
timezone: 'America/Los_Angeles'
}, [
{
visitors__count_rolling_unbounded: '1',
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-01T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '2',
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-03T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '2',
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-02T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '3',
visitors__count_rolling_week_to_date: '2',
visitors__created_at_day: '2017-01-04T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '4',
visitors__count_rolling_week_to_date: '3',
visitors__created_at_day: '2017-01-05T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-06T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-08T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-07T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-09T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-10T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-10T00:00:00.000Z',
}
]));

it('rolling month', async () => runQueryTest({
measures: [
'visitors.revenueRollingThreeDay'
Expand Down
5 changes: 4 additions & 1 deletion rust/cubesql/cubesql/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2219,7 +2219,10 @@ limit
V1LoadRequestQueryTimeDimension {
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
granularity: Some("month".to_string()),
date_range: None,
date_range: Some(json!(vec![
"2023-07-08T00:00:00.000Z".to_string(),
"2023-10-07T23:59:59.999Z".to_string()
])),
}
]),
order: Some(vec![]),
Expand Down
10 changes: 6 additions & 4 deletions rust/cubesql/cubesql/src/compile/rewrite/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1823,16 +1823,18 @@ impl LanguageToLogicalPlanConverter {
let values =
match_data_node!(node_by_id, params[2], FilterMemberValues);
if !is_in_or && op == "inDateRange" {
let existing_time_dimension =
query_time_dimensions.iter_mut().find_map(|td| {
let existing_time_dimensions: Vec<_> = query_time_dimensions
.iter_mut()
.filter_map(|td| {
if td.dimension == member && td.date_range.is_none() {
td.date_range = Some(json!(values));
Some(td)
} else {
None
}
});
if existing_time_dimension.is_none() {
})
.collect();
if existing_time_dimensions.len() == 0 {
let dimension = V1LoadRequestQueryTimeDimension {
dimension: member.to_string(),
granularity: None,
Expand Down

0 comments on commit 52a664e

Please sign in to comment.