Skip to content

Commit

Permalink
Respect nulls in approx_percentile_cont
Browse files Browse the repository at this point in the history
  • Loading branch information
Dandandan committed Jul 30, 2024
1 parent 2f5e73c commit a42524b
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
17 changes: 14 additions & 3 deletions datafusion/functions-aggregate/src/approx_percentile_cont.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use std::any::Any;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;

use arrow::array::RecordBatch;
use arrow::array::{Array, RecordBatch};
use arrow::compute::{filter, is_not_null};
use arrow::{
array::{
ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
Expand Down Expand Up @@ -104,6 +105,12 @@ impl ApproxPercentileCont {
None
};

if args.ignore_nulls {
return not_impl_err!(
"IGNORE NULLS clause not yet supported for APPROX_PERCENTILE_CONT"
);
}

let accumulator: ApproxPercentileAccumulator = match args.input_type {
t @ (DataType::UInt8
| DataType::UInt16
Expand Down Expand Up @@ -393,8 +400,12 @@ impl Accumulator for ApproxPercentileAccumulator {
}

fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
let values = &values[0];
let sorted_values = &arrow::compute::sort(values, None)?;
// respect nulls by default
let mut values = values[0];
if let Some(nulls) = values.nulls() {
values = filter(&values, &is_not_null(values)?)?;
}
let sorted_values = &arrow::compute::sort(&values, None)?;
let sorted_values = ApproxPercentileAccumulator::convert_to_float(sorted_values)?;
self.digest = self.digest.merge_sorted_f64(&sorted_values);
Ok(())
Expand Down
6 changes: 6 additions & 0 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,12 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.9) AS DOUBLE) / 0.834) < 0.05
----
true

# percentile_cont_with_nulls
query I
SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v);
----
2

# csv_query_cube_avg
query TIR
SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE (c1, c2) ORDER BY c1, c2
Expand Down

0 comments on commit a42524b

Please sign in to comment.