Skip to content

Commit

Permalink
Feat/polars integration (#87)
Browse files Browse the repository at this point in the history
* feat(polars): add basic feature

* refactor(test): merge polars and pandas
  • Loading branch information
jGundermann authored Mar 21, 2023
1 parent 6b29b24 commit 6dd2772
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 56 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dev-setup: install-test-requirements install-doc-requirements
pre-commit install

dev-install:
maturin develop -E pandas
maturin develop -E pandas,polars

prod-install:
./prod_install.sh
Expand Down
2 changes: 1 addition & 1 deletion prod_install.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash -xe
rm -rf target/wheels/
maturin build --release
pip install --force-reinstall "$(echo target/wheels/*.whl)[pandas]"
pip install --force-reinstall "$(echo target/wheels/*.whl)[pandas, polars]"
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ dependencies = [

[project.optional-dependencies]
pandas = ["pandas>=1.4.4,<1.6"]
polars = ["polars>=0.16.14,<0.17"]


[project.urls]
"Source Code" = "https://github.com/ToucanToco/fastexcel"
Expand Down
10 changes: 10 additions & 0 deletions python/fastexcel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd

import pyarrow as pa
import polars as pl

from ._fastexcel import __version__, _ExcelReader, _ExcelSheet
from ._fastexcel import read_excel as _read_excel
Expand Down Expand Up @@ -47,6 +48,15 @@ def to_pandas(self) -> "pd.DataFrame":
# We know for sure that the sheet will yield exactly one RecordBatch
return self.to_arrow().to_pandas()

def to_polars(self) -> pl.DataFrame | pl.Series:
"""Converts the sheet to a Polars `DataFrame`.
Requires the `polars` extra to be installed.
"""
# We know for sure that the sheet will yield exactly one RecordBatch
batch = self.to_arrow()
return pl.from_arrow(data=pa.Table.from_batches([batch]))

def __repr__(self) -> str:
return self._sheet.__repr__()

Expand Down
Loading

0 comments on commit 6dd2772

Please sign in to comment.