Skip to content

Commit

Permalink
fix(cubesql): Generate proper projection wrapper for duplicated membe…
Browse files Browse the repository at this point in the history
…rs in CubeScanNode (#9233)

`CubeScanWrapperNode::generate_sql_for_node` does not handle duplicated members in `member_fields`. It results in missing realiasing projection, as it generate only one column for each member. This introduces realiasing to make those columns reappear.

`CubeScanExecutionPlan` implements this by iterating over `member_fields` while building columns, so each member in `member_fields` will get it’s own column in DF anyway.
  • Loading branch information
mcheshkov authored Feb 24, 2025
1 parent 31a7a04 commit aba6430
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,13 @@ Array [
},
]
`;

exports[`SQL API Postgres (Data) wrapper with duplicated members: wrapper-duplicated-members 1`] = `
Array [
Object {
"bar": "shipped",
"bar_expr": "0",
"foo": "shipped",
},
]
`;
35 changes: 35 additions & 0 deletions packages/cubejs-testing/test/smoke-cubesql.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -660,5 +660,40 @@ filter_subq AS (
const res = await connection.query(query);
expect(res.rows).toMatchSnapshot('query-view-deep-joins');
});

test('wrapper with duplicated members', async () => {
const query = `
SELECT
"foo",
"bar",
CASE
WHEN "bar" = 'new'
THEN 1
ELSE 0
END
AS "bar_expr"
FROM (
SELECT
"rows"."foo" AS "foo",
"rows"."bar" AS "bar"
FROM (
SELECT
"status" AS "foo",
"status" AS "bar"
FROM Orders
) "rows"
GROUP BY
"foo",
"bar"
) "_"
ORDER BY
"bar_expr"
LIMIT 1
;
`;

const res = await connection.query(query);
expect(res.rows).toMatchSnapshot('wrapper-duplicated-members');
});
});
});
20 changes: 15 additions & 5 deletions rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize};
use std::{
any::Any,
cmp::min,
collections::{HashMap, HashSet},
collections::{hash_map::Entry, HashMap, HashSet},
convert::TryInto,
fmt,
future::Future,
Expand Down Expand Up @@ -733,6 +733,7 @@ impl CubeScanWrapperNode {
// And tuning literals to measures would require ugly wrapping with noop aggregation function
// TODO investigate Cube.js joins, try to implement dimension member expression
let mut has_literal_members = false;
let mut has_duplicated_members = false;
let mut wrapper_exprs = vec![];

for (member, field) in
Expand All @@ -741,9 +742,18 @@ impl CubeScanWrapperNode {
let alias = remapper.add_column(&field.qualified_column())?;
let expr = match member {
MemberField::Member(f) => {
member_to_alias.insert(f.to_string(), alias.clone());
// `alias` is column name that would be generated by Cube.js, just reference that
Expr::Column(Column::from_name(alias.clone()))
match member_to_alias.entry(f.to_string()) {
Entry::Vacant(entry) => {
entry.insert(alias.clone());
// `alias` is column name that would be generated by Cube.js, just reference that
Expr::Column(Column::from_name(alias.clone()))
}
Entry::Occupied(entry) => {
// This member already has an alias, generate wrapper that would use it
has_duplicated_members = true;
Expr::Column(Column::from_name(entry.get().clone()))
}
}
}
MemberField::Literal(value) => {
has_literal_members = true;
Expand All @@ -770,7 +780,7 @@ impl CubeScanWrapperNode {
.await?;

// TODO is this check necessary?
let sql = if has_literal_members {
let sql = if has_literal_members || has_duplicated_members {
// Need to generate wrapper SELECT with literal columns
// Generated columns need to have same aliases as targets in `remapper`
// Because that's what plans higher up would use in generated SQL
Expand Down
65 changes: 65 additions & 0 deletions rust/cubesql/cubesql/src/compile/test/test_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1456,3 +1456,68 @@ WHERE
assert_eq!(request.measures.unwrap().len(), 1);
assert_eq!(request.dimensions.unwrap().len(), 0);
}

#[tokio::test]
async fn wrapper_duplicated_members() {
if !Rewriter::sql_push_down_enabled() {
return;
}
init_testing_logger();

let query_plan = convert_select_to_query_plan(
// language=PostgreSQL
format!(
r#"
SELECT
"foo",
"bar",
CASE
WHEN "bar" IS NOT NULL
THEN 1
ELSE 0
END
AS "bar_expr"
FROM (
SELECT
"rows"."foo" AS "foo",
"rows"."bar" AS "bar"
FROM (
SELECT
"dim_str0" AS "foo",
"dim_str0" AS "bar"
FROM MultiTypeCube
) "rows"
GROUP BY
"foo",
"bar"
) "_"
ORDER BY
"bar_expr"
LIMIT 1
;
"#
)
.to_string(),
DatabaseProtocol::PostgreSQL,
)
.await;

let physical_plan = query_plan.as_physical_plan().await.unwrap();
println!(
"Physical plan: {}",
displayable(physical_plan.as_ref()).indent()
);

let logical_plan = query_plan.as_logical_plan();
// Generated SQL should contain realiasing of one member to two columns
assert!(logical_plan
.find_cube_scan_wrapped_sql()
.wrapped_sql
.sql
.contains(r#""foo" "foo""#));
assert!(logical_plan
.find_cube_scan_wrapped_sql()
.wrapped_sql
.sql
.contains(r#""foo" "bar""#));
}

0 comments on commit aba6430

Please sign in to comment.