Skip to content

Commit

Permalink
function: Allow more expressive array signatures
Browse files Browse the repository at this point in the history
This commit allows for more expressive array function signatures.
Previously, `ArrayFunctionSignature` was an enum of potential argument
combinations and orders. For many array functions, none of the
`ArrayFunctionSignature` variants worked, so they used
`TypeSignature::VariadicAny` instead. This commit will allow those
functions to use more descriptive signatures which will prevent them
from having to perform manual type checking in the function
implementation.

As an example, this commit also updates the signature of the
`array_replace` family of functions to use a new expressive signature,
which removes a panic that existed previously.

There are still a couple of limitations with this approach. First of
all, there's no way to describe a function that has multiple different
arrays of different type or dimension. Additionally, there isn't
support for functions with map arrays and recursive arrays that have
more than one argument.

Works towards resolving #14451
  • Loading branch information
jkosh44 committed Feb 6, 2025
1 parent bab0f54 commit d4b74db
Show file tree
Hide file tree
Showing 7 changed files with 265 additions and 180 deletions.
124 changes: 77 additions & 47 deletions datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
//! and return types of functions in DataFusion.
use std::fmt::Display;
use std::num::NonZeroUsize;

use crate::type_coercion::aggregates::NUMERICS;
use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
Expand Down Expand Up @@ -227,25 +226,11 @@ impl Display for TypeSignatureClass {

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionSignature {
/// Specialized Signature for ArrayAppend and similar functions
/// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list.
/// The second argument's list dimension should be one dimension less than the first argument's list dimension.
/// List dimension of the List/LargeList is equivalent to the number of List.
/// List dimension of the non-list is 0.
ArrayAndElement,
/// Specialized Signature for ArrayPrepend and similar functions
/// The first argument should be non-list or list, and the second argument should be List/LargeList.
/// The first argument's list dimension should be one dimension less than the second argument's list dimension.
ElementAndArray,
/// Specialized Signature for Array functions of the form (List/LargeList, Index+)
/// The first argument should be List/LargeList/FixedSizedList, and the next n arguments should be Int64.
ArrayAndIndexes(NonZeroUsize),
/// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
ArrayAndElementAndOptionalIndex,
/// Specialized Signature for ArrayEmpty and similar functions
/// The function takes a single argument that must be a List/LargeList/FixedSizeList
/// or something that can be coerced to one of those types.
Array,
/// A function takes at least one List/LargeList/FixedSizeList argument.
Array {
/// A full list of the arguments accepted by this function.
arguments: Vec<ArrayFunctionArgument>,
},
/// A function takes a single argument that must be a List/LargeList/FixedSizeList
/// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
RecursiveArray,
Expand All @@ -257,25 +242,15 @@ pub enum ArrayFunctionSignature {
impl Display for ArrayFunctionSignature {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionSignature::ArrayAndElement => {
write!(f, "array, element")
}
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
write!(f, "array, element, [index]")
}
ArrayFunctionSignature::ElementAndArray => {
write!(f, "element, array")
}
ArrayFunctionSignature::ArrayAndIndexes(count) => {
write!(f, "array")?;
for _ in 0..count.get() {
write!(f, ", index")?;
ArrayFunctionSignature::Array { arguments } => {
for (idx, argument) in arguments.iter().enumerate() {
write!(f, "{argument}")?;
if idx != arguments.len() - 1 {
write!(f, ", ")?;
}
}
Ok(())
}
ArrayFunctionSignature::Array => {
write!(f, "array")
}
ArrayFunctionSignature::RecursiveArray => {
write!(f, "recursive_array")
}
Expand All @@ -286,6 +261,34 @@ impl Display for ArrayFunctionSignature {
}
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionArgument {
/// A non-list or list argument. The list dimensions should be one less than the Array's list
/// dimensions.
Element,
/// An Int64 index argument.
Index,
/// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
/// to the same type.
Array,
}

impl Display for ArrayFunctionArgument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionArgument::Element => {
write!(f, "element")
}
ArrayFunctionArgument::Index => {
write!(f, "index")
}
ArrayFunctionArgument::Array => {
write!(f, "array")
}
}
}
}

impl TypeSignature {
pub fn to_string_repr(&self) -> Vec<String> {
match self {
Expand Down Expand Up @@ -580,46 +583,73 @@ impl Signature {
pub fn array_and_element(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndElement,
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
},
),
volatility,
}
}
/// Specialized Signature for Array functions with an optional index
pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex,
),
type_signature: TypeSignature::OneOf(vec![
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
}),
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
ArrayFunctionArgument::Index,
],
}),
]),
volatility,
}
}
/// Specialized Signature for ArrayPrepend and similar functions
pub fn element_and_array(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ElementAndArray,
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Element,
ArrayFunctionArgument::Array,
],
},
),
volatility,
}
}
/// Specialized Signature for ArrayElement and similar functions
pub fn array_and_index(volatility: Volatility) -> Self {
Self::array_and_indexes(volatility, NonZeroUsize::new(1).expect("1 is non-zero"))
}
/// Specialized Signature for ArraySlice and similar functions
pub fn array_and_indexes(volatility: Volatility, count: NonZeroUsize) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndIndexes(count),
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Index,
],
},
),
volatility,
}
}
/// Specialized Signature for ArrayEmpty and similar functions
pub fn array(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::Array {
arguments: vec![ArrayFunctionArgument::Array],
},
),
volatility,
}
}
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ pub use datafusion_expr_common::columnar_value::ColumnarValue;
pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
pub use datafusion_expr_common::operator::Operator;
pub use datafusion_expr_common::signature::{
ArrayFunctionSignature, Signature, TypeSignature, TypeSignatureClass, Volatility,
TIMEZONE_WILDCARD,
ArrayFunctionArgument, ArrayFunctionSignature, Signature, TypeSignature,
TypeSignatureClass, Volatility, TIMEZONE_WILDCARD,
};
pub use datafusion_expr_common::type_coercion::binary;
pub use expr::{
Expand Down
Loading

0 comments on commit d4b74db

Please sign in to comment.