From 5e9b2db16b9a2b012ce07aa86fb7e5ca9759710c Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Thu, 30 Jan 2025 15:54:32 +0300 Subject: [PATCH] Improve comments --- datafusion/expr/src/udaf.rs | 41 ++++++++----------- datafusion/functions-aggregate/src/count.rs | 2 + datafusion/functions-aggregate/src/min_max.rs | 8 ++-- datafusion/functions-aggregate/src/sum.rs | 4 +- datafusion/physical-expr/src/aggregate.rs | 18 ++++---- 5 files changed, 35 insertions(+), 38 deletions(-) diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index e9cdd754e136..31678ad165e3 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -636,16 +636,8 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { None } - /// Indicates whether the aggregation function is monotonic as a set function. A set - /// function is monotonically increasing if its value increases as its argument grows - /// (as a set). Formally, `f` is a monotonically increasing set function if `f(S) >= f(T)` - /// whenever `S` is a superset of `T`. - /// - /// For example `count` and `max` are monotonically increasing as their values always - /// increase (or stay the same) as new values are seen. - /// - /// `min` is monotonically decreasing as its value always decreases or stays - /// the same as new values are seen. + /// Indicates whether the aggregation function is monotonic as a set + /// function. See [`AggregateExprSetMonotonicity`] for details. fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { AggregateExprSetMonotonicity::NotMonotonic } @@ -832,26 +824,27 @@ pub mod aggregate_doc_sections { }; } -/// Status of an Aggregate Expression's Set Monotonicity -#[derive(Debug, Clone)] +/// Indicates whether an aggregation function is monotonic as a set +/// function. A set function is monotonically increasing if its value +/// increases as its argument grows (as a set). Formally, `f` is a +/// monotonically increasing set function if `f(S) >= f(T)` whenever `S` +/// is a superset of `T`. +/// +/// For example `COUNT` and `MAX` are monotonically increasing as their +/// values always increase (or stay the same) as new values are seen. On +/// the other hand, `MIN` is monotonically decreasing as its value always +/// decreases or stays the same as new values are seen. +#[derive(Debug, Clone, PartialEq)] pub enum AggregateExprSetMonotonicity { - /// Ordering exists as ascending + /// Aggregate value increases or stays the same as the input set grows. Increasing, - /// Ordering exists as descending + /// Aggregate value decreases or stays the same as the input set grows. Decreasing, - /// No ordering + /// Aggregate value may increase, decrease, or stay the same as the input + /// set grows. NotMonotonic, } -impl AggregateExprSetMonotonicity { - pub fn is_decreasing(&self) -> bool { - matches!(self, Self::Decreasing) - } - pub fn is_monotonic(&self) -> bool { - !matches!(self, Self::NotMonotonic) - } -} - #[cfg(test)] mod test { use crate::{AggregateUDF, AggregateUDFImpl}; diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs index 48eabe7b9cce..992004c66981 100644 --- a/datafusion/functions-aggregate/src/count.rs +++ b/datafusion/functions-aggregate/src/count.rs @@ -354,6 +354,8 @@ impl AggregateUDFImpl for Count { } fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { + // `COUNT` is monotonically increasing as it always increases or stays + // the same as new values are seen. AggregateExprSetMonotonicity::Increasing } } diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index a36a2f184593..6ef97f8d61a6 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -363,8 +363,8 @@ impl AggregateUDFImpl for Max { } fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { - // max is monotonically increasing as it always increases or - // stays the same as new values are seen + // `MAX` is monotonically increasing as it always increases or stays + // the same as new values are seen. AggregateExprSetMonotonicity::Increasing } } @@ -1191,8 +1191,8 @@ impl AggregateUDFImpl for Min { } fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { - // min is monotonically decreasing as it always decreases or - // stays the same as new values are seen + // `MIN` is monotonically decreasing as it always decreases or stays + // the same as new values are seen. AggregateExprSetMonotonicity::Decreasing } } diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs index 1bb54734fb69..e5d7ef8555eb 100644 --- a/datafusion/functions-aggregate/src/sum.rs +++ b/datafusion/functions-aggregate/src/sum.rs @@ -256,8 +256,8 @@ impl AggregateUDFImpl for Sum { } fn set_monotonicity(&self, data_type: &DataType) -> AggregateExprSetMonotonicity { - // Sum is only monotonic if its input is unsigned - // TODO: Expand these utilizing statistics + // `SUM` is only monotonically increasing when its input is unsigned. + // TODO: Expand these utilizing statistics. match data_type { DataType::UInt8 => AggregateExprSetMonotonicity::Increasing, DataType::UInt16 => AggregateExprSetMonotonicity::Increasing, diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index ac86939f8cbb..3087bfbf9a4b 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -47,8 +47,9 @@ use datafusion_expr::{AggregateExprSetMonotonicity, AggregateUDF, ReversedUDAF}; use datafusion_expr_common::accumulator::Accumulator; use datafusion_expr_common::groups_accumulator::GroupsAccumulator; use datafusion_expr_common::type_coercion::aggregates::check_arg_count; -use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs; -use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs; +use datafusion_functions_aggregate_common::accumulator::{ + AccumulatorArgs, StateFieldsArgs, +}; use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; @@ -535,10 +536,8 @@ impl AggregateFunctionExpr { self.fun.default_value(data_type) } - /// Indicates whether the aggregation function is monotonic as a set function. A set - /// function is monotonically increasing if its value increases as its argument grows - /// (as a set). Formally, `f` is a monotonically increasing set function if `f(S) >= f(T)` - /// whenever `S` is a superset of `T`. + /// Indicates whether the aggregation function is monotonic as a set + /// function. See [`AggregateExprSetMonotonicity`] for details. pub fn set_monotonicity(&self) -> AggregateExprSetMonotonicity { let field = self.field(); let data_type = field.data_type(); @@ -550,11 +549,14 @@ impl AggregateFunctionExpr { // If the aggregate expressions are set-monotonic, the output data is // naturally ordered with it per group or partition. let monotonicity = self.set_monotonicity(); - if !monotonicity.is_monotonic() { + if monotonicity == AggregateExprSetMonotonicity::NotMonotonic { return None; } let expr = Arc::new(Column::new(self.name(), aggr_func_idx)); - let options = SortOptions::new(monotonicity.is_decreasing(), false); + let options = SortOptions::new( + monotonicity == AggregateExprSetMonotonicity::Decreasing, + false, + ); Some(PhysicalSortExpr { expr, options }) } }