From aba643082acc440cf5b3fe9828c2c38ac1a833c9 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 25 Feb 2025 00:37:30 +0200 Subject: [PATCH] fix(cubesql): Generate proper projection wrapper for duplicated members in CubeScanNode (#9233) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `CubeScanWrapperNode::generate_sql_for_node` does not handle duplicated members in `member_fields`. It results in missing realiasing projection, as it generate only one column for each member. This introduces realiasing to make those columns reappear. `CubeScanExecutionPlan` implements this by iterating over `member_fields` while building columns, so each member in `member_fields` will get it’s own column in DF anyway. --- .../__snapshots__/smoke-cubesql.test.ts.snap | 10 +++ .../cubejs-testing/test/smoke-cubesql.test.ts | 35 ++++++++++ .../cubesql/src/compile/engine/df/wrapper.rs | 20 ++++-- .../cubesql/src/compile/test/test_wrapper.rs | 65 +++++++++++++++++++ 4 files changed, 125 insertions(+), 5 deletions(-) diff --git a/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap b/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap index 969d2d139e00c..4401d87097314 100644 --- a/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap +++ b/packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap @@ -378,3 +378,13 @@ Array [ }, ] `; + +exports[`SQL API Postgres (Data) wrapper with duplicated members: wrapper-duplicated-members 1`] = ` +Array [ + Object { + "bar": "shipped", + "bar_expr": "0", + "foo": "shipped", + }, +] +`; diff --git a/packages/cubejs-testing/test/smoke-cubesql.test.ts b/packages/cubejs-testing/test/smoke-cubesql.test.ts index 514686806c444..7a780766b963a 100644 --- a/packages/cubejs-testing/test/smoke-cubesql.test.ts +++ b/packages/cubejs-testing/test/smoke-cubesql.test.ts @@ -660,5 +660,40 @@ filter_subq AS ( const res = await connection.query(query); expect(res.rows).toMatchSnapshot('query-view-deep-joins'); }); + + test('wrapper with duplicated members', async () => { + const query = ` + SELECT + "foo", + "bar", + CASE + WHEN "bar" = 'new' + THEN 1 + ELSE 0 + END + AS "bar_expr" + FROM ( + SELECT + "rows"."foo" AS "foo", + "rows"."bar" AS "bar" + FROM ( + SELECT + "status" AS "foo", + "status" AS "bar" + FROM Orders + ) "rows" + GROUP BY + "foo", + "bar" + ) "_" + ORDER BY + "bar_expr" + LIMIT 1 + ; + `; + + const res = await connection.query(query); + expect(res.rows).toMatchSnapshot('wrapper-duplicated-members'); + }); }); }); diff --git a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs index c52591ffed03d..e0cd241c238c7 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs @@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize}; use std::{ any::Any, cmp::min, - collections::{HashMap, HashSet}, + collections::{hash_map::Entry, HashMap, HashSet}, convert::TryInto, fmt, future::Future, @@ -733,6 +733,7 @@ impl CubeScanWrapperNode { // And tuning literals to measures would require ugly wrapping with noop aggregation function // TODO investigate Cube.js joins, try to implement dimension member expression let mut has_literal_members = false; + let mut has_duplicated_members = false; let mut wrapper_exprs = vec![]; for (member, field) in @@ -741,9 +742,18 @@ impl CubeScanWrapperNode { let alias = remapper.add_column(&field.qualified_column())?; let expr = match member { MemberField::Member(f) => { - member_to_alias.insert(f.to_string(), alias.clone()); - // `alias` is column name that would be generated by Cube.js, just reference that - Expr::Column(Column::from_name(alias.clone())) + match member_to_alias.entry(f.to_string()) { + Entry::Vacant(entry) => { + entry.insert(alias.clone()); + // `alias` is column name that would be generated by Cube.js, just reference that + Expr::Column(Column::from_name(alias.clone())) + } + Entry::Occupied(entry) => { + // This member already has an alias, generate wrapper that would use it + has_duplicated_members = true; + Expr::Column(Column::from_name(entry.get().clone())) + } + } } MemberField::Literal(value) => { has_literal_members = true; @@ -770,7 +780,7 @@ impl CubeScanWrapperNode { .await?; // TODO is this check necessary? - let sql = if has_literal_members { + let sql = if has_literal_members || has_duplicated_members { // Need to generate wrapper SELECT with literal columns // Generated columns need to have same aliases as targets in `remapper` // Because that's what plans higher up would use in generated SQL diff --git a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs index 74ad0fa7b65f5..b29214f81ffb8 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs @@ -1456,3 +1456,68 @@ WHERE assert_eq!(request.measures.unwrap().len(), 1); assert_eq!(request.dimensions.unwrap().len(), 0); } + +#[tokio::test] +async fn wrapper_duplicated_members() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + format!( + r#" +SELECT + "foo", + "bar", + CASE + WHEN "bar" IS NOT NULL + THEN 1 + ELSE 0 + END + AS "bar_expr" +FROM ( + SELECT + "rows"."foo" AS "foo", + "rows"."bar" AS "bar" + FROM ( + SELECT + "dim_str0" AS "foo", + "dim_str0" AS "bar" + FROM MultiTypeCube + ) "rows" + GROUP BY + "foo", + "bar" +) "_" +ORDER BY + "bar_expr" +LIMIT 1 +; + "# + ) + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let logical_plan = query_plan.as_logical_plan(); + // Generated SQL should contain realiasing of one member to two columns + assert!(logical_plan + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#""foo" "foo""#)); + assert!(logical_plan + .find_cube_scan_wrapped_sql() + .wrapped_sql + .sql + .contains(r#""foo" "bar""#)); +}