diff --git a/modin/core/storage_formats/base/doc_utils.py b/modin/core/storage_formats/base/doc_utils.py index ce0f3935731..6ba7a98a7f7 100644 --- a/modin/core/storage_formats/base/doc_utils.py +++ b/modin/core/storage_formats/base/doc_utils.py @@ -193,6 +193,14 @@ def doc_binary_method(operation, sign, self_on_right=False, op_type="arithmetic" fill_value : float or None Value to fill missing elements during frame alignment. """, + "series_comparison": """ + level : int or label + In case of MultiIndex match index values on the passed level. + fill_value : float or None + Value to fill missing elements during frame alignment. + axis : {{0, 1}} + Unused. Parameter needed for compatibility with DataFrame. + """, } verbose_substitution = ( diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 343008d2a3d..965fb98efb3 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -647,6 +647,18 @@ def combine_first(self, other, **kwargs): # noqa: PR02 def eq(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs) + @doc_utils.doc_binary_method( + operation="equality comparison", sign="==", op_type="series_comparison" + ) + def series_eq(self, other, **kwargs): # noqa: PR02 + return BinaryDefault.register(pandas.Series.eq)( + self, + other=other, + squeeze_self=True, + squeeze_other=kwargs.pop("squeeze_other", False), + **kwargs, + ) + @doc_utils.add_refer_to("DataFrame.equals") def equals(self, other): # noqa: PR01, RT01 return BinaryDefault.register(pandas.DataFrame.equals)(self, other=other) @@ -685,24 +697,76 @@ def divmod(self, other, **kwargs): def ge(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs) + @doc_utils.doc_binary_method( + operation="greater than or equal comparison", + sign=">=", + op_type="series_comparison", + ) + def series_ge(self, other, **kwargs): # noqa: PR02 + return BinaryDefault.register(pandas.Series.ge)( + self, + other=other, + squeeze_self=True, + squeeze_other=kwargs.pop("squeeze_other", False), + **kwargs, + ) + @doc_utils.doc_binary_method( operation="greater than comparison", sign=">", op_type="comparison" ) def gt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs) + @doc_utils.doc_binary_method( + operation="greater than comparison", sign=">", op_type="series_comparison" + ) + def series_gt(self, other, **kwargs): # noqa: PR02 + return BinaryDefault.register(pandas.Series.gt)( + self, + other=other, + squeeze_self=True, + squeeze_other=kwargs.pop("squeeze_other", False), + **kwargs, + ) + @doc_utils.doc_binary_method( operation="less than or equal comparison", sign="<=", op_type="comparison" ) def le(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs) + @doc_utils.doc_binary_method( + operation="less than or equal comparison", + sign="<=", + op_type="series_comparison", + ) + def series_le(self, other, **kwargs): # noqa: PR02 + return BinaryDefault.register(pandas.Series.le)( + self, + other=other, + squeeze_self=True, + squeeze_other=kwargs.pop("squeeze_other", False), + **kwargs, + ) + @doc_utils.doc_binary_method( operation="less than comparison", sign="<", op_type="comparison" ) def lt(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs) + @doc_utils.doc_binary_method( + operation="less than", sign="<", op_type="series_comparison" + ) + def series_lt(self, other, **kwargs): # noqa: PR02 + return BinaryDefault.register(pandas.Series.lt)( + self, + other=other, + squeeze_self=True, + squeeze_other=kwargs.pop("squeeze_other", False), + **kwargs, + ) + @doc_utils.doc_binary_method(operation="modulo", sign="%") def mod(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.mod)(self, other=other, **kwargs) @@ -818,6 +882,18 @@ def dot(self, other, **kwargs): # noqa: PR02 def ne(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs) + @doc_utils.doc_binary_method( + operation="not equal comparison", sign="!=", op_type="series_comparison" + ) + def series_ne(self, other, **kwargs): # noqa: PR02 + return BinaryDefault.register(pandas.Series.ne)( + self, + other=other, + squeeze_self=True, + squeeze_other=kwargs.pop("squeeze_other", False), + **kwargs, + ) + @doc_utils.doc_binary_method(operation="exponential power", sign="**") def pow(self, other, **kwargs): # noqa: PR02 return BinaryDefault.register(pandas.DataFrame.pow)(self, other=other, **kwargs) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index c7fb0bae21b..655d427de1d 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -253,6 +253,26 @@ def caller(df, *args, **kwargs): return caller +def _series_logical_binop(func): + """ + Build a callable function to pass to Binary.register for Series logical operators. + + Parameters + ---------- + func : callable + Binary operator method of pandas.Series to be applied. + + Returns + ------- + callable + """ + return lambda x, y, **kwargs: func( + x.squeeze(axis=1), + y.squeeze(axis=1) if kwargs.pop("squeeze_other", False) else y, + **kwargs, + ).to_frame() + + @_inherit_docstrings(BaseQueryCompiler) class PandasQueryCompiler(BaseQueryCompiler, QueryCompilerCaster): """ @@ -522,6 +542,26 @@ def to_numpy(self, **kwargs): sort=False, ) + # Series logical operators take an additional fill_value flag that dataframe does not + series_eq = Binary.register( + _series_logical_binop(pandas.Series.eq), infer_dtypes="bool" + ) + series_ge = Binary.register( + _series_logical_binop(pandas.Series.ge), infer_dtypes="bool" + ) + series_gt = Binary.register( + _series_logical_binop(pandas.Series.gt), infer_dtypes="bool" + ) + series_le = Binary.register( + _series_logical_binop(pandas.Series.le), infer_dtypes="bool" + ) + series_lt = Binary.register( + _series_logical_binop(pandas.Series.lt), infer_dtypes="bool" + ) + series_ne = Binary.register( + _series_logical_binop(pandas.Series.ne), infer_dtypes="bool" + ) + # Needed for numpy API _logical_and = Binary.register( lambda df, other, *args, **kwargs: pandas.DataFrame( diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 04dd845915c..1b893743282 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -509,6 +509,17 @@ def _binary_op(self, op, other, **kwargs) -> Self: ] if op in exclude_list: kwargs.pop("axis") + # Series logical operations take an additional fill_value argument that DF does not + series_specialize_list = [ + "eq", + "ge", + "gt", + "le", + "lt", + "ne", + ] + if not self._is_dataframe and op in series_specialize_list: + op = "series_" + op new_query_compiler = getattr(self._query_compiler, op)(other, **kwargs) return self._create_or_update_from_compiler(new_query_compiler) diff --git a/modin/pandas/series.py b/modin/pandas/series.py index 00083200762..20825343f0d 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -1036,7 +1036,14 @@ def eq( Return Equal to of series and `other`, element-wise (binary operator `eq`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).eq(new_other, level=level, axis=axis) + return new_self._binary_op( + "eq", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, Series), + ) def equals(self, other) -> bool: # noqa: PR01, RT01, D200 """ @@ -1135,7 +1142,7 @@ def floordiv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).floordiv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def ge( @@ -1145,7 +1152,14 @@ def ge( Return greater than or equal to of series and `other`, element-wise (binary operator `ge`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).ge(new_other, level=level, axis=axis) + return new_self._binary_op( + "ge", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, Series), + ) def groupby( self, @@ -1193,7 +1207,14 @@ def gt( Return greater than of series and `other`, element-wise (binary operator `gt`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).gt(new_other, level=level, axis=axis) + return new_self._binary_op( + "gt", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, Series), + ) def hist( self, @@ -1311,7 +1332,14 @@ def le( Return less than or equal to of series and `other`, element-wise (binary operator `le`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).le(new_other, level=level, axis=axis) + return new_self._binary_op( + "le", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, Series), + ) def lt( self, other, level=None, fill_value=None, axis=0 @@ -1320,7 +1348,14 @@ def lt( Return less than of series and `other`, element-wise (binary operator `lt`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).lt(new_other, level=level, axis=axis) + return new_self._binary_op( + "lt", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, Series), + ) def map(self, arg, na_action=None) -> Series: # noqa: PR01, RT01, D200 """ @@ -1407,7 +1442,7 @@ def mod( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).mod( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def mode(self, dropna=True) -> Series: # noqa: PR01, RT01, D200 @@ -1424,7 +1459,7 @@ def mul( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).mul( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) multiply = mul @@ -1437,7 +1472,7 @@ def rmul( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rmul( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def ne( @@ -1447,7 +1482,14 @@ def ne( Return not equal to of series and `other`, element-wise (binary operator `ne`). """ new_self, new_other = self._prepare_inter_op(other) - return super(Series, new_self).ne(new_other, level=level, axis=axis) + return new_self._binary_op( + "ne", + new_other, + level=level, + fill_value=fill_value, + axis=axis, + squeeze_other=isinstance(other, Series), + ) def nlargest(self, n=5, keep="first") -> Series: # noqa: PR01, RT01, D200 """ @@ -1567,7 +1609,7 @@ def pow( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).pow( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) @_inherit_docstrings(pandas.Series.prod, apilink="pandas.Series.prod") @@ -1768,7 +1810,7 @@ def rfloordiv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rfloordiv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rmod( @@ -1779,7 +1821,7 @@ def rmod( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rmod( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rpow( @@ -1790,7 +1832,7 @@ def rpow( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rpow( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rsub( @@ -1801,7 +1843,7 @@ def rsub( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rsub( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) def rtruediv( @@ -1812,7 +1854,7 @@ def rtruediv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).rtruediv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) rdiv = rtruediv @@ -1960,7 +2002,7 @@ def sub( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).sub( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) subtract = sub @@ -2135,7 +2177,7 @@ def truediv( """ new_self, new_other = self._prepare_inter_op(other) return super(Series, new_self).truediv( - new_other, level=level, fill_value=None, axis=axis + new_other, level=level, fill_value=fill_value, axis=axis ) div = divide = truediv diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py index b283a7a1ede..115b6679cb6 100644 --- a/modin/tests/pandas/test_series.py +++ b/modin/tests/pandas/test_series.py @@ -5100,3 +5100,48 @@ def test__reduce__(): .rename("league") ) df_equals(result_md, result_pd) + + +@pytest.mark.parametrize( + "op", + [ + "add", + "radd", + "divmod", + "eq", + "floordiv", + "ge", + "gt", + "le", + "lt", + "mod", + "mul", + "rmul", + "ne", + "pow", + "rdivmod", + "rfloordiv", + "rmod", + "rpow", + "rsub", + "rtruediv", + "sub", + "truediv", + ], +) +def test_binary_with_fill_value_issue_7381(op): + # Ensures that series binary operations respect the fill_value flag + series_md, series_pd = create_test_series([0, 1, 2, 3]) + rhs_md, rhs_pd = create_test_series([0]) + result_md = getattr(series_md, op)(rhs_md, fill_value=2) + result_pd = getattr(series_pd, op)(rhs_pd, fill_value=2) + df_equals(result_md, result_pd) + + +@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"]) +def test_logical_binary_with_list(op): + series_md, series_pd = create_test_series([0, 1, 2]) + rhs = [2, 1, 0] + result_md = getattr(series_md, op)(rhs) + result_pd = getattr(series_pd, op)(rhs) + df_equals(result_md, result_pd)