From f70176a796db92a5484ae4d3530906bdc3f5eb70 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Wed, 28 Aug 2024 03:32:28 -0700 Subject: [PATCH] FIX-#7371: Fix inserting datelike values into a DataFrame (#7372) Signed-off-by: Jonathan Shi --- modin/core/dataframe/pandas/metadata/dtypes.py | 2 +- modin/tests/pandas/dataframe/test_map_metadata.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/modin/core/dataframe/pandas/metadata/dtypes.py b/modin/core/dataframe/pandas/metadata/dtypes.py index 1918cce16fa..9220a2dace4 100644 --- a/modin/core/dataframe/pandas/metadata/dtypes.py +++ b/modin/core/dataframe/pandas/metadata/dtypes.py @@ -1225,7 +1225,7 @@ def extract_dtype(value) -> DtypeObj | pandas.Series: """ try: dtype = pandas.api.types.pandas_dtype(value) - except TypeError: + except (TypeError, ValueError): dtype = pandas.Series(value).dtype return dtype diff --git a/modin/tests/pandas/dataframe/test_map_metadata.py b/modin/tests/pandas/dataframe/test_map_metadata.py index 07b195bdafa..fc9c3b76ea7 100644 --- a/modin/tests/pandas/dataframe/test_map_metadata.py +++ b/modin/tests/pandas/dataframe/test_map_metadata.py @@ -1837,3 +1837,18 @@ def test_constructor_from_index(): data = pd.Index([1, 2, 3], name="pricing_date") modin_df, pandas_df = create_test_dfs(data) df_equals(modin_df, pandas_df) + + +def test_insert_datelike_string_issue_7371(): + # When a new value is inserted into a frame, we call pandas.api.types.pandas_dtype(value) to + # extract the dtype of an object like a pandas Series or numpy array. When a scalar value is passed, + # this usually raises a TypeError, so we construct a local pandas Series from the object and + # extract the dtype from there. + # When the passed value is a date-like string, pandas will instead raise a ValueError because + # it tries to parse it as a numpy structured dtype. After fixing GH#7371, we now catch + # ValueError in addition to TypeError to handle this case. + modin_df = pd.DataFrame({"a": [0]}) + modin_df["c"] = "2020-01-01" + pandas_df = pandas.DataFrame({"a": [0]}) + pandas_df["c"] = "2020-01-01" + df_equals(modin_df, pandas_df)