Skip to content

Commit

Permalink
Throw a specific an clear error
Browse files Browse the repository at this point in the history
  • Loading branch information
theroggy committed Jan 26, 2025
1 parent 4165e6a commit 24c8a06
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
10 changes: 9 additions & 1 deletion pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,15 @@ def read_dataframe(
kwargs = {"self_destruct": True}
if arrow_to_pandas_kwargs is not None:
kwargs.update(arrow_to_pandas_kwargs)
df = table.to_pandas(**kwargs)

try:
df = table.to_pandas(**kwargs)
except UnicodeDecodeError as ex:
# Arrow does not support reading data in a non-UTF-8 encoding
raise DataSourceError(
"The file being read is not encoded in UTF-8; please use_arrow=False"
) from ex

del table

if fid_as_index:
Expand Down
15 changes: 13 additions & 2 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,24 @@ def test_read_csv_encoding(tmp_path, encoding, arrow):
reason="test requires non-UTF-8 default platform",
)
def test_read_csv_platform_encoding(tmp_path, use_arrow):
"""verify that read defaults to platform encoding; only works on Windows (CP1252)"""
"""Verify that read defaults to platform encoding; only works on Windows (CP1252).
When use_arrow=True, reading an non-UTF8 fails.
"""
csv_path = tmp_path / "test.csv"
with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
csv.write("näme,city\n")
csv.write("Wilhelm Röntgen,Zürich\n")

df = read_dataframe(csv_path, use_arrow=use_arrow)
if use_arrow:
handler = pytest.raises(
DataSourceError, match="The file being read is not encoded in UTF-8"
)
else:
handler = contextlib.nullcontext()

with handler:
df = read_dataframe(csv_path, use_arrow=use_arrow)

assert len(df) == 1
assert df.columns.tolist() == ["näme", "city"]
Expand Down

0 comments on commit 24c8a06

Please sign in to comment.