Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

function: Allow more expressive array signatures #14532

Merged
merged 16 commits into from
Feb 14, 2025
44 changes: 36 additions & 8 deletions datafusion/common/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,13 @@ pub fn base_type(data_type: &DataType) -> DataType {
}
}

/// Information about how to coerce lists.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ListCoercion {
/// [`DataType::FixedSizeList`] should be coerced to [`DataType::List`].
FixedSizedListToList,
}

/// A helper function to coerce base type in List.
///
/// Example
Expand All @@ -600,26 +607,47 @@ pub fn base_type(data_type: &DataType) -> DataType {
///
/// let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
/// let base_type = DataType::Float64;
/// let coerced_type = coerced_type_with_base_type_only(&data_type, &base_type);
/// let coerced_type = coerced_type_with_base_type_only(&data_type, &base_type, None);
/// assert_eq!(coerced_type, DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))));
pub fn coerced_type_with_base_type_only(
data_type: &DataType,
base_type: &DataType,
array_coercion: Option<&ListCoercion>,
) -> DataType {
match data_type {
DataType::List(field) | DataType::FixedSizeList(field, _) => {
let field_type =
coerced_type_with_base_type_only(field.data_type(), base_type);
match (data_type, array_coercion) {
(DataType::List(field), _)
| (DataType::FixedSizeList(field, _), Some(ListCoercion::FixedSizedListToList)) =>
{
let field_type = coerced_type_with_base_type_only(
field.data_type(),
base_type,
array_coercion,
);

DataType::List(Arc::new(Field::new(
field.name(),
field_type,
field.is_nullable(),
)))
}
DataType::LargeList(field) => {
let field_type =
coerced_type_with_base_type_only(field.data_type(), base_type);
(DataType::FixedSizeList(field, len), _) => {
let field_type = coerced_type_with_base_type_only(
field.data_type(),
base_type,
array_coercion,
);

DataType::FixedSizeList(
Arc::new(Field::new(field.name(), field_type, field.is_nullable())),
*len,
)
}
(DataType::LargeList(field), _) => {
let field_type = coerced_type_with_base_type_only(
field.data_type(),
base_type,
array_coercion,
);

DataType::LargeList(Arc::new(Field::new(
field.name(),
Expand Down
135 changes: 80 additions & 55 deletions datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
//! and return types of functions in DataFusion.

use std::fmt::Display;
use std::num::NonZeroUsize;

use crate::type_coercion::aggregates::NUMERICS;
use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
use datafusion_common::types::{LogicalTypeRef, NativeType};
use datafusion_common::utils::ListCoercion;
use itertools::Itertools;

/// Constant that is used as a placeholder for any valid timezone.
Expand Down Expand Up @@ -227,25 +227,13 @@ impl Display for TypeSignatureClass {

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionSignature {
/// Specialized Signature for ArrayAppend and similar functions
/// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list.
/// The second argument's list dimension should be one dimension less than the first argument's list dimension.
/// List dimension of the List/LargeList is equivalent to the number of List.
/// List dimension of the non-list is 0.
ArrayAndElement,
/// Specialized Signature for ArrayPrepend and similar functions
/// The first argument should be non-list or list, and the second argument should be List/LargeList.
/// The first argument's list dimension should be one dimension less than the second argument's list dimension.
ElementAndArray,
/// Specialized Signature for Array functions of the form (List/LargeList, Index+)
/// The first argument should be List/LargeList/FixedSizedList, and the next n arguments should be Int64.
ArrayAndIndexes(NonZeroUsize),
/// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
ArrayAndElementAndOptionalIndex,
/// Specialized Signature for ArrayEmpty and similar functions
/// The function takes a single argument that must be a List/LargeList/FixedSizeList
/// or something that can be coerced to one of those types.
Array,
/// A function takes at least one List/LargeList/FixedSizeList argument.
Array {
/// A full list of the arguments accepted by this function.
arguments: Vec<ArrayFunctionArgument>,
/// Additional information about how array arguments should be coerced.
array_coercion: Option<ListCoercion>,
},
/// A function takes a single argument that must be a List/LargeList/FixedSizeList
/// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
RecursiveArray,
Expand All @@ -257,25 +245,15 @@ pub enum ArrayFunctionSignature {
impl Display for ArrayFunctionSignature {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionSignature::ArrayAndElement => {
write!(f, "array, element")
}
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
write!(f, "array, element, [index]")
}
ArrayFunctionSignature::ElementAndArray => {
write!(f, "element, array")
}
ArrayFunctionSignature::ArrayAndIndexes(count) => {
write!(f, "array")?;
for _ in 0..count.get() {
write!(f, ", index")?;
ArrayFunctionSignature::Array { arguments, .. } => {
for (idx, argument) in arguments.iter().enumerate() {
write!(f, "{argument}")?;
if idx != arguments.len() - 1 {
write!(f, ", ")?;
}
}
Ok(())
}
ArrayFunctionSignature::Array => {
write!(f, "array")
}
ArrayFunctionSignature::RecursiveArray => {
write!(f, "recursive_array")
}
Expand All @@ -286,6 +264,34 @@ impl Display for ArrayFunctionSignature {
}
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
pub enum ArrayFunctionArgument {
/// A non-list or list argument. The list dimensions should be one less than the Array's list
/// dimensions.
Element,
/// An Int64 index argument.
Index,
Comment on lines +272 to +273
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Offset might be a better name for this? It can technically be used for sizes in functions like array_resize or counts for functions like array_replace_n.

/// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
/// to the same type.
Array,
}

impl Display for ArrayFunctionArgument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArrayFunctionArgument::Element => {
write!(f, "element")
}
ArrayFunctionArgument::Index => {
write!(f, "index")
}
ArrayFunctionArgument::Array => {
write!(f, "array")
}
}
}
}

impl TypeSignature {
pub fn to_string_repr(&self) -> Vec<String> {
match self {
Expand Down Expand Up @@ -580,46 +586,65 @@ impl Signature {
pub fn array_and_element(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndElement,
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
array_coercion: Some(ListCoercion::FixedSizedListToList),
},
),
volatility,
}
}
/// Specialized Signature for Array functions with an optional index
pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndElementAndOptionalIndex,
),
volatility,
}
}
/// Specialized Signature for ArrayPrepend and similar functions
pub fn element_and_array(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ElementAndArray,
),
type_signature: TypeSignature::OneOf(vec![
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
],
array_coercion: None,
}),
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Element,
ArrayFunctionArgument::Index,
],
array_coercion: None,
}),
]),
volatility,
}
}

/// Specialized Signature for ArrayElement and similar functions
pub fn array_and_index(volatility: Volatility) -> Self {
Self::array_and_indexes(volatility, NonZeroUsize::new(1).expect("1 is non-zero"))
}
/// Specialized Signature for ArraySlice and similar functions
pub fn array_and_indexes(volatility: Volatility, count: NonZeroUsize) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::ArrayAndIndexes(count),
ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::Index,
],
array_coercion: None,
},
),
volatility,
}
}
/// Specialized Signature for ArrayEmpty and similar functions
pub fn array(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
type_signature: TypeSignature::ArraySignature(
ArrayFunctionSignature::Array {
arguments: vec![ArrayFunctionArgument::Array],
array_coercion: None,
},
),
volatility,
}
}
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ pub use datafusion_expr_common::columnar_value::ColumnarValue;
pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
pub use datafusion_expr_common::operator::Operator;
pub use datafusion_expr_common::signature::{
ArrayFunctionSignature, Signature, TypeSignature, TypeSignatureClass, Volatility,
TIMEZONE_WILDCARD,
ArrayFunctionArgument, ArrayFunctionSignature, Signature, TypeSignature,
TypeSignatureClass, Volatility, TIMEZONE_WILDCARD,
};
pub use datafusion_expr_common::type_coercion::binary;
pub use expr::{
Expand Down
Loading
Loading