-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update
repr_llm
and add DataFrameSummarizer
with customizable sum…
…marizing function (#323) * update repr_llm * wip * getter * compat * cleanup
- Loading branch information
Showing
4 changed files
with
64 additions
and
10 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from typing import Callable, Optional | ||
|
||
import pandas as pd | ||
|
||
|
||
class DataFrameSummarizer: | ||
_instance: "DataFrameSummarizer" = None | ||
summarizing_func: Optional[Callable] = None | ||
|
||
def __init__(self, summarizing_func: Optional[Callable] = None): | ||
if summarizing_func is None: | ||
self._try_to_load_repr_llm() | ||
else: | ||
self.summarizing_func = summarizing_func | ||
|
||
def _try_to_load_repr_llm(self) -> None: | ||
"""Load repr_llm's summarize_dataframe into the summarizing_func if it's available.""" | ||
try: | ||
from repr_llm.pandas import summarize_dataframe | ||
|
||
self.summarizing_func = summarize_dataframe | ||
except ImportError: | ||
return | ||
|
||
@classmethod | ||
def instance(cls) -> "DataFrameSummarizer": | ||
if cls._instance is None: | ||
cls._instance = cls() | ||
return cls._instance | ||
|
||
def summarize(self, df: pd.DataFrame) -> str: | ||
"""Generate a summary of a dataframe using the configured summarizing_func.""" | ||
if not isinstance(df, pd.DataFrame): | ||
raise ValueError("`df` must be a pandas DataFrame") | ||
|
||
if self.summarizing_func is None: | ||
return df.describe().to_string() | ||
|
||
return self.summarizing_func(df) | ||
|
||
|
||
def get_summarizing_function() -> Optional[Callable]: | ||
"""Get the function to use for summarizing dataframes.""" | ||
return DataFrameSummarizer.instance().summarizing_func | ||
|
||
|
||
def set_summarizing_function(func: Callable) -> None: | ||
"""Set the function to use for summarizing dataframes.""" | ||
DataFrameSummarizer.instance().summarizing_func = func | ||
|
||
|
||
def make_df_summary(df: pd.DataFrame) -> str: | ||
"""Generate a summary of a dataframe using the configured summarizing_func.""" | ||
return DataFrameSummarizer.instance().summarize(df) |