Skip to content

Commit

Permalink
BUG: Fix dtypes for read_json (pandas-dev#42819)
Browse files Browse the repository at this point in the history
* Fix dtypes for read_json

* Address comments

* Add whatsnew entry

* Update doc/source/whatsnew/v1.4.0.rst

Co-authored-by: Matthew Zeitlin <[email protected]>

* Linting

Co-authored-by: Matthew Zeitlin <[email protected]>
  • Loading branch information
r-raymond and mzeitlin11 authored Oct 4, 2021
1 parent 0eeda64 commit 6a1c6b4
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ I/O
- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
- Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
-
Expand Down
9 changes: 1 addition & 8 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,11 +876,8 @@ def check_keys_split(self, decoded):

def parse(self):

# try numpy
numpy = self.numpy
if numpy:
if self.numpy:
self._parse_numpy()

else:
self._parse_no_numpy()

Expand Down Expand Up @@ -941,10 +938,6 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
)
if dtype is not None:
try:
# error: Argument 1 to "dtype" has incompatible type
# "Union[ExtensionDtype, str, dtype[Any], Type[object]]";
# expected "Type[Any]"
dtype = np.dtype(dtype) # type: ignore[arg-type]
return data.astype(dtype), True
except (TypeError, ValueError):
return data, False
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,36 @@ def test_from_json_to_json_table_dtypes(self):
result = read_json(dfjson, orient="table")
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
def test_to_json_from_json_columns_dtypes(self, orient):
# GH21892 GH33205
expected = DataFrame.from_dict(
{
"Integer": Series([1, 2, 3], dtype="int64"),
"Float": Series([None, 2.0, 3.0], dtype="float64"),
"Object": Series([None, "", "c"], dtype="object"),
"Bool": Series([True, False, True], dtype="bool"),
"Category": Series(["a", "b", None], dtype="category"),
"Datetime": Series(
["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
),
}
)
dfjson = expected.to_json(orient=orient)
result = read_json(
dfjson,
orient=orient,
dtype={
"Integer": "int64",
"Float": "float64",
"Object": "object",
"Bool": "bool",
"Category": "category",
"Datetime": "datetime64[ns]",
},
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
def test_read_json_table_dtype_raises(self, dtype):
# GH21345
Expand Down

0 comments on commit 6a1c6b4

Please sign in to comment.