Refactor equality as per spec. eqg and add testing equality ops (#537)

partiql · Jan 15, 2025 · 4792d1c · 4792d1c · github-actions · Jan 15, 2025
1 parent 59dbbcc
commit 4792d1c
Show file tree

Hide file tree

Showing 10 changed files with 183 additions and 56 deletions.
diff --git a/partiql-conformance-tests/tests/mod.rs b/partiql-conformance-tests/tests/mod.rs
@@ -210,7 +210,9 @@ pub(crate) fn pass_eval(
     expected: &TestValue,
 ) {
     match eval(statement, mode, env) {
-        Ok(v) => assert_eq!(v.result, expected.value),
+        Ok(v) => {
+            assert_eq!(&TestValue::from(v), expected)
+        },
         Err(TestError::Parse(_)) => {
             panic!("When evaluating (mode = {mode:#?}) `{statement}`, unexpected parse error")
         }

diff --git a/partiql-conformance-tests/tests/test_value.rs b/partiql-conformance-tests/tests/test_value.rs
@@ -1,18 +1,40 @@
-use partiql_value::Value;
+use partiql_eval::eval::Evaluated;
+use partiql_value::{EqualityValue, NullableEq, Value};
 
 use partiql_extension_ion::decode::{IonDecoderBuilder, IonDecoderConfig};
 use partiql_extension_ion::Encoding;
 
 #[allow(dead_code)]
+#[derive(Debug, Ord, PartialOrd)]
 pub(crate) struct TestValue {
     pub value: Value,
 }
 
+impl Eq for TestValue {}
+
+impl PartialEq for TestValue {
+    fn eq(&self, other: &Self) -> bool {
+        // When testing, we need NaN == NaN and NULL == NULL in order to assert test success properly
+        let wrap_value = EqualityValue::<'_, true, true, Value>;
+        NullableEq::eq(&wrap_value(&self.value), &wrap_value(&other.value)) == Value::Boolean(true)
+    }
+}
+
+impl From<Value> for TestValue {
+    fn from(value: Value) -> Self {
+        TestValue { value }
+    }
+}
+
+impl From<Evaluated> for TestValue {
+    fn from(value: Evaluated) -> Self {
+        value.result.into()
+    }
+}
+
 impl From<&str> for TestValue {
     fn from(contents: &str) -> Self {
-        TestValue {
-            value: parse_test_value_str(contents),
-        }
+        parse_test_value_str(contents).into()
     }
 }
 

diff --git a/partiql-eval/src/eval/expr/operators.rs b/partiql-eval/src/eval/expr/operators.rs
@@ -249,11 +249,11 @@ impl BindEvalExpr for EvalOpBinary {
             EvalOpBinary::And => logical!(AndCheck, partiql_value::BinaryAnd::and),
             EvalOpBinary::Or => logical!(OrCheck, partiql_value::BinaryOr::or),
             EvalOpBinary::Eq => equality!(|lhs, rhs| {
-                let wrap = EqualityValue::<false, Value>;
+                let wrap = EqualityValue::<false, false, Value>;
                 NullableEq::eq(&wrap(lhs), &wrap(rhs))
             }),
             EvalOpBinary::Neq => equality!(|lhs, rhs| {
-                let wrap = EqualityValue::<false, Value>;
+                let wrap = EqualityValue::<false, false, Value>;
                 NullableEq::neq(&wrap(lhs), &wrap(rhs))
             }),
             EvalOpBinary::Gt => comparison!(NullableOrd::gt),

diff --git a/partiql-value/src/bag.rs b/partiql-value/src/bag.rs
@@ -178,16 +178,38 @@ impl Debug for Bag {
 
 impl PartialEq for Bag {
     fn eq(&self, other: &Self) -> bool {
-        if self.len() != other.len() {
-            return false;
+        let wrap = EqualityValue::<true, false, _>;
+        NullableEq::eq(&wrap(self), &wrap(other)) == Value::Boolean(true)
+    }
+}
+
+impl<const NULLS_EQUAL: bool, const NAN_EQUAL: bool> NullableEq
+    for EqualityValue<'_, NULLS_EQUAL, NAN_EQUAL, Bag>
+{
+    #[inline(always)]
+    fn eq(&self, other: &Self) -> Value {
+        let ord_wrap = NullSortedValue::<'_, false, _>;
+        let (l, r) = (self.0, other.0);
+        if l.len() != r.len() {
+            return Value::Boolean(false);
         }
-        for (v1, v2) in self.0.iter().sorted().zip(other.0.iter().sorted()) {
-            let wrap = EqualityValue::<true, Value>;
-            if NullableEq::eq(&wrap(v1), &wrap(v2)) != Value::Boolean(true) {
-                return false;
+
+        let li = l.iter().map(ord_wrap).sorted().map(|nsv| nsv.0);
+        let ri = r.iter().map(ord_wrap).sorted().map(|nsv| nsv.0);
+
+        for (v1, v2) in li.zip(ri) {
+            let wrap = EqualityValue::<{ NULLS_EQUAL }, { NAN_EQUAL }, Value>;
+            if NullableEq::eqg(&wrap(v1), &wrap(v2)) != Value::Boolean(true) {
+                return Value::Boolean(false);
             }
         }
-        true
+        Value::Boolean(true)
+    }
+
+    #[inline(always)]
+    fn eqg(&self, rhs: &Self) -> Value {
+        let wrap = EqualityValue::<'_, true, { NAN_EQUAL }, _>;
+        NullableEq::eq(&wrap(self.0), &wrap(rhs.0))
     }
 }
 

diff --git a/partiql-value/src/comparison.rs b/partiql-value/src/comparison.rs
@@ -1,5 +1,5 @@
-use crate::util;
 use crate::Value;
+use crate::{util, Bag, List, Tuple};
 
 pub trait Comparable {
     fn is_comparable_to(&self, rhs: &Self) -> bool;
@@ -16,6 +16,7 @@ impl Comparable for Value {
             | (Value::Boolean(_), Value::Boolean(_))
             | (Value::String(_), Value::String(_))
             | (Value::Blob(_), Value::Blob(_))
+            | (Value::DateTime(_), Value::DateTime(_))
             | (Value::List(_), Value::List(_))
             | (Value::Bag(_), Value::Bag(_))
             | (Value::Tuple(_), Value::Tuple(_))
@@ -31,19 +32,43 @@ impl Comparable for Value {
 
 // `Value` `eq` and `neq` with Missing and Null propagation
 pub trait NullableEq {
-    type Output;
-    fn eq(&self, rhs: &Self) -> Self::Output;
-    fn neq(&self, rhs: &Self) -> Self::Output;
-}
+    fn eq(&self, rhs: &Self) -> Value;
 
-/// A wrapper on [`T`] that specifies if missing and null values should be equal.
-#[derive(Eq, PartialEq)]
-pub struct EqualityValue<'a, const NULLS_EQUAL: bool, T>(pub &'a T);
+    fn neq(&self, rhs: &Self) -> Value {
+        let eq_result = NullableEq::eq(self, rhs);
+        match eq_result {
+            Value::Boolean(_) | Value::Null => !eq_result,
+            _ => Value::Missing,
+        }
+    }
 
-impl<const GROUP_NULLS: bool> NullableEq for EqualityValue<'_, GROUP_NULLS, Value> {
-    type Output = Value;
+    /// `PartiQL's `eqg` is used to compare the internals of Lists, Bags, and Tuples.
+    ///
+    /// > The eqg, unlike the =, returns true when a NULL is compared to a NULL or a MISSING
+    /// > to a MISSING
+    fn eqg(&self, rhs: &Self) -> Value;
 
-    fn eq(&self, rhs: &Self) -> Self::Output {
+    fn neqg(&self, rhs: &Self) -> Value {
+        let eqg_result = NullableEq::eqg(self, rhs);
+        match eqg_result {
+            Value::Boolean(_) | Value::Null => !eqg_result,
+            _ => Value::Missing,
+        }
+    }
+}
+
+/// A wrapper on [`T`] that specifies equality outcome for missing and null, and `NaN` values.
+#[derive(Eq, PartialEq, Debug)]
+pub struct EqualityValue<'a, const NULLS_EQUAL: bool, const NAN_EQUAL: bool, T>(pub &'a T);
+
+impl<const GROUP_NULLS: bool, const NAN_EQUAL: bool> NullableEq
+    for EqualityValue<'_, GROUP_NULLS, NAN_EQUAL, Value>
+{
+    #[inline(always)]
+    fn eq(&self, rhs: &Self) -> Value {
+        let wrap_list = EqualityValue::<'_, { GROUP_NULLS }, { NAN_EQUAL }, List>;
+        let wrap_bag = EqualityValue::<'_, { GROUP_NULLS }, { NAN_EQUAL }, Bag>;
+        let wrap_tuple = EqualityValue::<'_, { GROUP_NULLS }, { NAN_EQUAL }, Tuple>;
         if GROUP_NULLS {
             if let (Value::Missing | Value::Null, Value::Missing | Value::Null) = (self.0, rhs.0) {
                 return Value::Boolean(true);
@@ -73,16 +98,23 @@ impl<const GROUP_NULLS: bool> NullableEq for EqualityValue<'_, GROUP_NULLS, Valu
             (Value::Decimal(_), Value::Real(_)) => {
                 Value::from(self.0 == &util::coerce_int_or_real_to_decimal(rhs.0))
             }
+            (Value::Real(l), Value::Real(r)) => {
+                if NAN_EQUAL && l.is_nan() && r.is_nan() {
+                    return Value::Boolean(true);
+                }
+                Value::from(l == r)
+            }
+            (Value::List(l), Value::List(r)) => NullableEq::eq(&wrap_list(l), &wrap_list(r)),
+            (Value::Bag(l), Value::Bag(r)) => NullableEq::eq(&wrap_bag(l), &wrap_bag(r)),
+            (Value::Tuple(l), Value::Tuple(r)) => NullableEq::eq(&wrap_tuple(l), &wrap_tuple(r)),
             (_, _) => Value::from(self.0 == rhs.0),
         }
     }
 
-    fn neq(&self, rhs: &Self) -> Self::Output {
-        let eq_result = NullableEq::eq(self, rhs);
-        match eq_result {
-            Value::Boolean(_) | Value::Null => !eq_result,
-            _ => Value::Missing,
-        }
+    #[inline(always)]
+    fn eqg(&self, rhs: &Self) -> Value {
+        let wrap = EqualityValue::<'_, true, { NAN_EQUAL }, _>;
+        NullableEq::eq(&wrap(self.0), &wrap(rhs.0))
     }
 }
 

diff --git a/partiql-value/src/lib.rs b/partiql-value/src/lib.rs
@@ -85,15 +85,15 @@ mod tests {
 
     #[test]
     fn iterators() {
-        let bag: Bag = [1, 10, 3, 4].iter().collect();
+        let bag: Bag = [1, 10, 3, 4].into_iter().collect();
         assert_eq!(bag.len(), 4);
         let max = bag
             .iter()
             .fold(Value::Integer(0), |x, y| if y > &x { y.clone() } else { x });
         assert_eq!(max, Value::Integer(10));
         let _bref = Value::from(bag).as_bag_ref();
 
-        let list: List = [1, 2, 3, -4].iter().collect();
+        let list: List = [1, 2, 3, -4].into_iter().collect();
         assert_eq!(list.len(), 4);
         let max = list
             .iter()
@@ -442,14 +442,14 @@ mod tests {
         //  tests
 
         fn nullable_eq(lhs: Value, rhs: Value) -> Value {
-            let wrap = EqualityValue::<false, Value>;
+            let wrap = EqualityValue::<false, false, Value>;
             let lhs = wrap(&lhs);
             let rhs = wrap(&rhs);
             NullableEq::eq(&lhs, &rhs)
         }
 
         fn nullable_neq(lhs: Value, rhs: Value) -> Value {
-            let wrap = EqualityValue::<false, Value>;
+            let wrap = EqualityValue::<false, false, Value>;
             let lhs = wrap(&lhs);
             let rhs = wrap(&rhs);
             NullableEq::neq(&lhs, &rhs)

diff --git a/partiql-value/src/list.rs b/partiql-value/src/list.rs
@@ -175,16 +175,32 @@ impl Debug for List {
 
 impl PartialEq for List {
     fn eq(&self, other: &Self) -> bool {
-        if self.len() != other.len() {
-            return false;
+        let wrap = EqualityValue::<true, false, _>;
+        NullableEq::eq(&wrap(self), &wrap(other)) == Value::Boolean(true)
+    }
+}
+
+impl<const NULLS_EQUAL: bool, const NAN_EQUAL: bool> NullableEq
+    for EqualityValue<'_, NULLS_EQUAL, NAN_EQUAL, List>
+{
+    #[inline(always)]
+    fn eq(&self, other: &Self) -> Value {
+        if self.0.len() != other.0.len() {
+            return Value::Boolean(false);
         }
         for (v1, v2) in self.0.iter().zip(other.0.iter()) {
-            let wrap = EqualityValue::<true, Value>;
-            if NullableEq::eq(&wrap(v1), &wrap(v2)) != Value::Boolean(true) {
-                return false;
+            let wrap = EqualityValue::<{ NULLS_EQUAL }, { NAN_EQUAL }, Value>;
+            if NullableEq::eqg(&wrap(v1), &wrap(v2)) != Value::Boolean(true) {
+                return Value::Boolean(false);
             }
         }
-        true
+        Value::Boolean(true)
+    }
+
+    #[inline(always)]
+    fn eqg(&self, rhs: &Self) -> Value {
+        let wrap = EqualityValue::<'_, true, { NAN_EQUAL }, _>;
+        NullableEq::eq(&wrap(self.0), &wrap(rhs.0))
     }
 }
 

diff --git a/partiql-value/src/sort.rs b/partiql-value/src/sort.rs
@@ -18,9 +18,9 @@ where
 
 impl<const NULLS_FIRST: bool> Ord for NullSortedValue<'_, NULLS_FIRST, Value> {
     fn cmp(&self, other: &Self) -> Ordering {
-        let wrap_list = NullSortedValue::<{ NULLS_FIRST }, List>;
-        let wrap_tuple = NullSortedValue::<{ NULLS_FIRST }, Tuple>;
-        let wrap_bag = NullSortedValue::<{ NULLS_FIRST }, Bag>;
+        let wrap_list = NullSortedValue::<'_, { NULLS_FIRST }, List>;
+        let wrap_tuple = NullSortedValue::<'_, { NULLS_FIRST }, Tuple>;
+        let wrap_bag = NullSortedValue::<'_, { NULLS_FIRST }, Bag>;
         let null_cond = |order: Ordering| {
             if NULLS_FIRST {
                 order

diff --git a/partiql-value/src/tuple.rs b/partiql-value/src/tuple.rs
@@ -213,19 +213,35 @@ impl Iterator for Tuple {
 
 impl PartialEq for Tuple {
     fn eq(&self, other: &Self) -> bool {
-        if self.vals.len() != other.vals.len() {
-            return false;
+        let wrap = EqualityValue::<true, false, _>;
+        NullableEq::eq(&wrap(self), &wrap(other)) == Value::Boolean(true)
+    }
+}
+
+impl<const NULLS_EQUAL: bool, const NAN_EQUAL: bool> NullableEq
+    for EqualityValue<'_, NULLS_EQUAL, NAN_EQUAL, Tuple>
+{
+    #[inline(always)]
+    fn eq(&self, other: &Self) -> Value {
+        if self.0.vals.len() != other.0.vals.len() {
+            return Value::Boolean(false);
         }
-        for ((ls, lv), (rs, rv)) in self.pairs().sorted().zip(other.pairs().sorted()) {
+        for ((ls, lv), (rs, rv)) in self.0.pairs().sorted().zip(other.0.pairs().sorted()) {
             if ls != rs {
-                return false;
+                return Value::Boolean(false);
             }
-            let wrap = EqualityValue::<true, Value>;
-            if NullableEq::eq(&wrap(lv), &wrap(rv)) != Value::Boolean(true) {
-                return false;
+            let wrap = EqualityValue::<{ NULLS_EQUAL }, { NAN_EQUAL }, Value>;
+            if NullableEq::eqg(&wrap(lv), &wrap(rv)) != Value::Boolean(true) {
+                return Value::Boolean(false);
             }
         }
-        true
+        Value::Boolean(true)
+    }
+
+    #[inline(always)]
+    fn eqg(&self, rhs: &Self) -> Value {
+        let wrap = EqualityValue::<'_, true, { NAN_EQUAL }, _>;
+        NullableEq::eq(&wrap(self.0), &wrap(rhs.0))
     }
 }
 

diff --git a/partiql-value/src/value.rs b/partiql-value/src/value.rs
@@ -378,6 +378,13 @@ impl From<&str> for Value {
     }
 }
 
+impl From<i128> for Value {
+    #[inline]
+    fn from(n: i128) -> Self {
+        Value::from(RustDecimal::from(n))
+    }
+}
+
 impl From<i64> for Value {
     #[inline]
     fn from(n: i64) -> Self {
@@ -409,8 +416,11 @@ impl From<i8> for Value {
 impl From<usize> for Value {
     #[inline]
     fn from(n: usize) -> Self {
-        // TODO overflow to bigint/decimal
-        Value::Integer(n as i64)
+        if n > i64::MAX as usize {
+            Value::from(RustDecimal::from(n))
+        } else {
+            Value::Integer(n as i64)
+        }
     }
 }
 
@@ -445,14 +455,21 @@ impl From<u64> for Value {
 impl From<u128> for Value {
     #[inline]
     fn from(n: u128) -> Self {
-        (n as usize).into()
+        Value::from(RustDecimal::from(n))
     }
 }
 
 impl From<f64> for Value {
     #[inline]
     fn from(f: f64) -> Self {
-        Value::Real(OrderedFloat(f))
+        Value::from(OrderedFloat(f))
+    }
+}
+
+impl From<OrderedFloat<f64>> for Value {
+    #[inline]
+    fn from(f: OrderedFloat<f64>) -> Self {
+        Value::Real(f)
     }
 }
Benchmark suite	Current: `4792d1c`	Previous: `59dbbcc`	Ratio
`arith_agg-avg`	`799180` ns/iter (`± 10763`)	`773594` ns/iter (`± 4076`)	`1.03`
`arith_agg-avg_distinct`	`882685` ns/iter (`± 2463`)	`857665` ns/iter (`± 5095`)	`1.03`
`arith_agg-count`	`854073` ns/iter (`± 16094`)	`822309` ns/iter (`± 44600`)	`1.04`
`arith_agg-count_distinct`	`878784` ns/iter (`± 3765`)	`852355` ns/iter (`± 10718`)	`1.03`
`arith_agg-min`	`856840` ns/iter (`± 5485`)	`826952` ns/iter (`± 39267`)	`1.04`
`arith_agg-min_distinct`	`882859` ns/iter (`± 3213`)	`866099` ns/iter (`± 32879`)	`1.02`
`arith_agg-max`	`860209` ns/iter (`± 4433`)	`834860` ns/iter (`± 30814`)	`1.03`
`arith_agg-max_distinct`	`888296` ns/iter (`± 2830`)	`868044` ns/iter (`± 3176`)	`1.02`
`arith_agg-sum`	`859308` ns/iter (`± 10047`)	`832122` ns/iter (`± 2947`)	`1.03`
`arith_agg-sum_distinct`	`880882` ns/iter (`± 9083`)	`860507` ns/iter (`± 8300`)	`1.02`
`arith_agg-avg-count-min-max-sum`	`1022309` ns/iter (`± 7418`)	`990002` ns/iter (`± 6705`)	`1.03`
`arith_agg-avg-count-min-max-sum-group_by`	`1299778` ns/iter (`± 9948`)	`1278229` ns/iter (`± 28446`)	`1.02`
`arith_agg-avg-count-min-max-sum-group_by-group_as`	`1935932` ns/iter (`± 9387`)	`1870588` ns/iter (`± 10252`)	`1.03`
`arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct`	`1235222` ns/iter (`± 11077`)	`1191863` ns/iter (`± 16141`)	`1.04`
`arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by`	`1529091` ns/iter (`± 10680`)	`1461233` ns/iter (`± 40502`)	`1.05`
`arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by-group_as`	`2149568` ns/iter (`± 16375`)	`2075422` ns/iter (`± 12211`)	`1.04`
`parse-1`	`5441` ns/iter (`± 152`)	`5436` ns/iter (`± 346`)	`1.00`
`parse-15`	`46367` ns/iter (`± 159`)	`47354` ns/iter (`± 198`)	`0.98`
`parse-30`	`90639` ns/iter (`± 191`)	`92881` ns/iter (`± 419`)	`0.98`
`compile-1`	`4295` ns/iter (`± 109`)	`4142` ns/iter (`± 9`)	`1.04`
`compile-15`	`30685` ns/iter (`± 129`)	`30301` ns/iter (`± 128`)	`1.01`
`compile-30`	`64239` ns/iter (`± 2548`)	`62895` ns/iter (`± 743`)	`1.02`
`plan-1`	`70894` ns/iter (`± 387`)	`69940` ns/iter (`± 1149`)	`1.01`
`plan-15`	`1105696` ns/iter (`± 10190`)	`1097476` ns/iter (`± 80703`)	`1.01`
`plan-30`	`2213383` ns/iter (`± 25125`)	`2192204` ns/iter (`± 9567`)	`1.01`
`eval-1`	`12809424` ns/iter (`± 223288`)	`12168571` ns/iter (`± 111834`)	`1.05`
`eval-15`	`79139260` ns/iter (`± 1088736`)	`78372842` ns/iter (`± 1204765`)	`1.01`
`eval-30`	`149902007` ns/iter (`± 740154`)	`149908935` ns/iter (`± 1993148`)	`1.00`
`join`	`9971` ns/iter (`± 49`)	`9891` ns/iter (`± 28`)	`1.01`
`simple`	`2565` ns/iter (`± 10`)	`2558` ns/iter (`± 19`)	`1.00`
`simple-no`	`465` ns/iter (`± 1`)	`486` ns/iter (`± 1`)	`0.96`
`numbers`	`57` ns/iter (`± 0`)	`48` ns/iter (`± 0`)	`1.19`
`parse-simple`	`736` ns/iter (`± 3`)	`766` ns/iter (`± 2`)	`0.96`
`parse-ion`	`2304` ns/iter (`± 24`)	`2495` ns/iter (`± 7`)	`0.92`
`parse-group`	`6892` ns/iter (`± 21`)	`7387` ns/iter (`± 22`)	`0.93`
`parse-complex`	`18554` ns/iter (`± 93`)	`19209` ns/iter (`± 258`)	`0.97`
`parse-complex-fexpr`	`25476` ns/iter (`± 203`)	`26160` ns/iter (`± 114`)	`0.97`