Skip to content

Commit

Permalink
Run optimzer rules on subqueries by default (apache#13066)
Browse files Browse the repository at this point in the history
This patch makes it so that rules the configure an `apply_order` will
also include subqueries in their traversel.

This is a step twoards being able to run TPC-DS q41 (apache#4763) which has
an expressions that needs simplification before we can decorrelate the
subquery.

This closes apache#3770 and maybe apache#2480
  • Loading branch information
eejbyfeldt authored Oct 23, 2024
1 parent d2a5e27 commit 3aa9714
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 26 deletions.
14 changes: 4 additions & 10 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use log::{debug, warn};
use datafusion_common::alias::AliasGenerator;
use datafusion_common::config::ConfigOptions;
use datafusion_common::instant::Instant;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
use datafusion_expr::logical_plan::LogicalPlan;

Expand Down Expand Up @@ -250,10 +250,6 @@ impl Optimizer {
Arc::new(DecorrelatePredicateSubquery::new()),
Arc::new(ScalarSubqueryToJoin::new()),
Arc::new(ExtractEquijoinPredicate::new()),
// simplify expressions does not simplify expressions in subqueries, so we
// run it again after running the optimizations that potentially converted
// subqueries to joins
Arc::new(SimplifyExpressions::new()),
Arc::new(EliminateDuplicatedExpr::new()),
Arc::new(EliminateFilter::new()),
Arc::new(EliminateCrossJoin::new()),
Expand Down Expand Up @@ -384,11 +380,9 @@ impl Optimizer {

let result = match rule.apply_order() {
// optimizer handles recursion
Some(apply_order) => new_plan.rewrite(&mut Rewriter::new(
apply_order,
rule.as_ref(),
config,
)),
Some(apply_order) => new_plan.rewrite_with_subqueries(
&mut Rewriter::new(apply_order, rule.as_ref(), config),
),
// rule handles recursion itself
None => optimize_plan_node(new_plan, rule.as_ref(), config),
}
Expand Down
2 changes: 0 additions & 2 deletions datafusion/sqllogictest/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
Expand All @@ -214,7 +213,6 @@ logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
Expand Down
28 changes: 14 additions & 14 deletions datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ logical_plan
01)Filter: EXISTS (<subquery>)
02)--Subquery:
03)----Projection: t1.t1_int
04)------Filter: t1.t1_id > t1.t1_int
04)------Filter: t1.t1_int < t1.t1_id
05)--------TableScan: t1
06)--TableScan: t1 projection=[t1_id, t1_name, t1_int]

Expand Down Expand Up @@ -462,8 +462,8 @@ explain SELECT t1_id, (SELECT t2_int FROM t2 WHERE t2.t2_int = t1.t1_int limit 1
logical_plan
01)Projection: t1.t1_id, (<subquery>) AS t2_int
02)--Subquery:
03)----Limit: skip=0, fetch=1
04)------Projection: t2.t2_int
03)----Projection: t2.t2_int
04)------Limit: skip=0, fetch=1
05)--------Filter: t2.t2_int = outer_ref(t1.t1_int)
06)----------TableScan: t2
07)--TableScan: t1 projection=[t1_id, t1_int]
Expand All @@ -475,8 +475,8 @@ logical_plan
01)Projection: t1.t1_id
02)--Filter: t1.t1_int = (<subquery>)
03)----Subquery:
04)------Limit: skip=0, fetch=1
05)--------Projection: t2.t2_int
04)------Projection: t2.t2_int
05)--------Limit: skip=0, fetch=1
06)----------Filter: t2.t2_int = outer_ref(t1.t1_int)
07)------------TableScan: t2
08)----TableScan: t1 projection=[t1_id, t1_int]
Expand Down Expand Up @@ -542,13 +542,13 @@ query TT
explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN t2 ON(t1.t1_id = t2.t2_id and t1.t1_name = t0.t0_name))
----
logical_plan
01)Filter: EXISTS (<subquery>)
02)--Subquery:
03)----Projection: Int64(1)
04)------Inner Join: Filter: t1.t1_id = t2.t2_id AND t1.t1_name = outer_ref(t0.t0_name)
05)--------TableScan: t1
06)--------TableScan: t2
07)--TableScan: t0 projection=[t0_id, t0_name]
01)LeftSemi Join: t0.t0_name = __correlated_sq_2.t1_name
02)--TableScan: t0 projection=[t0_id, t0_name]
03)--SubqueryAlias: __correlated_sq_2
04)----Projection: t1.t1_name
05)------Inner Join: t1.t1_id = t2.t2_id
06)--------TableScan: t1 projection=[t1_id, t1_name]
07)--------TableScan: t2 projection=[t2_id]

#subquery_contains_join_contains_correlated_columns
query TT
Expand Down Expand Up @@ -656,8 +656,8 @@ explain SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 where
logical_plan
01)Filter: t1.t1_id IN (<subquery>)
02)--Subquery:
03)----Limit: skip=0, fetch=10
04)------Projection: t2.t2_id
03)----Projection: t2.t2_id
04)------Limit: skip=0, fetch=10
05)--------Filter: outer_ref(t1.t1_name) = t2.t2_name
06)----------TableScan: t2
07)--TableScan: t1 projection=[t1_id, t1_name]
Expand Down

0 comments on commit 3aa9714

Please sign in to comment.