Skip to content

Commit

Permalink
Issue-14416 - feat: Add array_min function
Browse files Browse the repository at this point in the history
  • Loading branch information
erenavsarogullari committed Feb 5, 2025
1 parent ea788c7 commit c696cbb
Show file tree
Hide file tree
Showing 4 changed files with 269 additions and 0 deletions.
2 changes: 2 additions & 0 deletions datafusion/functions-nested/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub mod map;
pub mod map_extract;
pub mod map_keys;
pub mod map_values;
pub mod min;
pub mod planner;
pub mod position;
pub mod range;
Expand Down Expand Up @@ -139,6 +140,7 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
length::array_length_udf(),
distance::array_distance_udf(),
flatten::flatten_udf(),
min::array_min_udf(),
sort::array_sort_udf(),
repeat::array_repeat_udf(),
resize::array_resize_udf(),
Expand Down
174 changes: 174 additions & 0 deletions datafusion/functions-nested/src/min.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! [`ScalarUDFImpl`] definitions for array_min function.
use crate::sort::array_sort_inner;
use crate::utils::make_scalar_function;
use arrow_array::{Array, ArrayRef, StringArray};
use arrow_schema::DataType;
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use datafusion_common::cast::as_list_array;
use datafusion_common::exec_err;
use datafusion_doc::Documentation;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::Arc;

make_udf_expr_and_func!(
ArrayMin,
array_min,
array,
"returns the minimum value in the array.",
array_min_udf
);

#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns the minimum value in the array.",
syntax_example = "array_min(array)",
sql_example = r#"```sql
> select array_min([3,1,4,2]);
+-----------------------------------------+
| array_min(List([3,1,4,2])) |
+-----------------------------------------+
| 1 |
+-----------------------------------------+
```"#,
argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
)
)]
#[derive(Debug)]
pub struct ArrayMin {
signature: Signature,
aliases: Vec<String>,
}

impl Default for ArrayMin {
fn default() -> Self {
Self::new()
}
}

impl ArrayMin {
pub fn new() -> Self {
Self {
signature: Signature::array(Volatility::Immutable),
aliases: vec!["list_min".to_string()],
}
}
}

impl ScalarUDFImpl for ArrayMin {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &str {
"array_min"
}

fn display_name(&self, args: &[Expr]) -> datafusion_common::Result<String> {
let args_name = args.iter().map(ToString::to_string).collect::<Vec<_>>();
if args_name.len() != 1 {
return exec_err!("expects 1 arg, got {}", args_name.len());
}

Ok(format!("{}", args_name[0]))
}

fn schema_name(&self, args: &[Expr]) -> datafusion_common::Result<String> {
let args_name = args
.iter()
.map(|e| e.schema_name().to_string())
.collect::<Vec<_>>();
if args_name.len() != 1 {
return exec_err!("expects 1 arg, got {}", args_name.len());
}

Ok(format!("{}", args_name[0]))
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
match &arg_types[0] {
List(field) | LargeList(field) | FixedSizeList(field, _) => {
Ok(field.data_type().clone())
}
_ => exec_err!(
"Not reachable, data_type should be List, LargeList or FixedSizeList"
),
}
}

fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> datafusion_common::Result<ColumnarValue> {
make_scalar_function(array_min_inner)(args)
}

fn aliases(&self) -> &[String] {
&self.aliases
}

fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
}

/// array_min SQL function
///
/// There is one argument for array_min as the array.
/// `array_min(array)`
///
/// For example:
/// > array_min(\[3, 1, 2]) -> 1
pub fn array_min_inner(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
if args.len() != 1 {
return exec_err!("array_min needs one argument");
}

match &args[0].data_type() {
List(_) | LargeList(_) | FixedSizeList(_, _) => {
let new_args = vec![
args[0].clone(),
Arc::new(StringArray::from_iter(vec![Some("ASC")])),
Arc::new(StringArray::from_iter(vec![Some("NULLS LAST")])),
];
array_min_internal(&new_args)
}
_ => exec_err!("array_min does not support type: {:?}", args[0].data_type()),
}
}

fn array_min_internal(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
let sorted_array = array_sort_inner(args)?;
let result_array = as_list_array(&sorted_array)?.value(0);
if result_array.is_empty() {
return exec_err!("array_min needs one argument as non-empty array");
}
let min_result = result_array.slice(0, 1);
Ok(min_result)
}
60 changes: 60 additions & 0 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1433,6 +1433,66 @@ NULL 23
NULL 43
5 NULL

## array_min
# array_min scalar function #1 (with positive index)
query I
select array_min(make_array(5, 3, 4, 6));
----
3

query I
select array_min(make_array(5, 3, 4, NULL, 6, NULL));
----
3

query I
select array_min(make_array(NULL, NULL));
----
NULL

query T
select array_min(make_array('h', 'e', 'l', 'l', 'o'));
----
e

query T
select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL));
----
e

query B
select array_min(make_array(true, true, false, true));
----
false

query B
select array_min(make_array(true, true, NULL, false, true));
----
false

query D
select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1985-11-01', DATE '1999-05-01'));
----
1985-11-01

query D
select array_min(make_array(DATE '1995-09-01', DATE '1993-03-01', NULL, DATE '1999-05-01'));
----
1993-03-01

query P
select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1984-10-01', TIMESTAMP '1995-06-01'));
----
1984-10-01T00:00:00

query P
select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01'));
----
1995-06-01T00:00:00

query error Execution error: array_min needs one argument as non-empty array
select array_min(make_array());

## array_pop_back (aliases: `list_pop_back`)

# array_pop_back scalar function with null
Expand Down
33 changes: 33 additions & 0 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -2524,6 +2524,7 @@ _Alias of [current_date](#current_date)._
- [array_intersect](#array_intersect)
- [array_join](#array_join)
- [array_length](#array_length)
- [array_min](#array_min)
- [array_ndims](#array_ndims)
- [array_pop_back](#array_pop_back)
- [array_pop_front](#array_pop_front)
Expand Down Expand Up @@ -2569,6 +2570,7 @@ _Alias of [current_date](#current_date)._
- [list_intersect](#list_intersect)
- [list_join](#list_join)
- [list_length](#list_length)
- [list_min](#list_min)
- [list_ndims](#list_ndims)
- [list_pop_back](#list_pop_back)
- [list_pop_front](#list_pop_front)
Expand Down Expand Up @@ -3002,6 +3004,33 @@ array_length(array, dimension)

- list_length

### `array_min`

Returns the minimum value in the array.

```
array_min(array)
```

#### Arguments

- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators.

#### Example

```sql
> select array_min([3,1,4,2]);
+-----------------------------------------+
| array_min(List([3,1,4,2])) |
+-----------------------------------------+
| 1 |
+-----------------------------------------+
```

#### Aliases

- list_min

### `array_ndims`

Returns the number of dimensions of the array.
Expand Down Expand Up @@ -3759,6 +3788,10 @@ _Alias of [array_to_string](#array_to_string)._

_Alias of [array_length](#array_length)._

### `list_min`

_Alias of [array_min](#array_min)._

### `list_ndims`

_Alias of [array_ndims](#array_ndims)._
Expand Down

0 comments on commit c696cbb

Please sign in to comment.