From b0d5f025408f3925e3459fc6942df3c1bafea423 Mon Sep 17 00:00:00 2001 From: Alexander Druz Date: Mon, 4 Dec 2023 14:43:45 +0100 Subject: [PATCH] Add embed argument to `spotlight.show` and CLI --- renumics/spotlight/app.py | 22 ++++++++++++++++++++-- renumics/spotlight/app_config.py | 5 ++++- renumics/spotlight/cli.py | 25 ++++++++++++++++++++----- renumics/spotlight/viewer.py | 10 +++++++++- 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/renumics/spotlight/app.py b/renumics/spotlight/app.py index d0081ba1..147f0254 100644 --- a/renumics/spotlight/app.py +++ b/renumics/spotlight/app.py @@ -123,7 +123,10 @@ class SpotlightApp(FastAPI): # data issues issues: Optional[List[DataIssue]] = [] _custom_issues: List[DataIssue] = [] - analyze_columns: Union[List[str], bool] = False + analyze_columns: Union[List[str], bool] + + # embedding + embed_columns: Union[List[str], bool] def __init__(self) -> None: super().__init__() @@ -138,6 +141,7 @@ def __init__(self) -> None: self.analyze_columns = False self.issues = None self._custom_issues = [] + self.embed_columns = False self._dataset = None self._user_dtypes = {} @@ -328,6 +332,8 @@ def update(self, config: AppConfig) -> None: self.analyze_columns = config.analyze if config.custom_issues is not None: self.custom_issues = config.custom_issues + if config.embed is not None: + self.embed_columns = config.embed if config.dataset is not None: self._dataset = config.dataset self._data_source = create_datasource(self._dataset) @@ -464,12 +470,24 @@ def _update_embeddings(self) -> None: """ Update embeddings, update them in the data store and notify client about. """ + if not self.embed_columns: + return + if self._data_store is None: return logger.info("Embedding started.") - embedders = create_embedders(self._data_store, self._data_store.column_names) + if self.embed_columns is True: + embed_columns = self._data_store.column_names + else: + embed_columns = [ + column + for column in self.embed_columns + if column in self._data_store.column_names + ] + + embedders = create_embedders(self._data_store, embed_columns) self._data_store.embeddings = {column: None for column in embedders} diff --git a/renumics/spotlight/app_config.py b/renumics/spotlight/app_config.py index 15bcf217..e1364fa3 100644 --- a/renumics/spotlight/app_config.py +++ b/renumics/spotlight/app_config.py @@ -24,9 +24,12 @@ class AppConfig: project_root: Optional[Path] = None # data analysis - analyze: Optional[Union[bool, List[str]]] = None + analyze: Optional[Union[List[str], bool]] = None custom_issues: Optional[List[DataIssue]] = None + # embedding + embed: Optional[Union[List[str], bool]] = None + # frontend layout: Optional[Layout] = None filebrowsing_allowed: Optional[bool] = None diff --git a/renumics/spotlight/cli.py b/renumics/spotlight/cli.py index 7854a8c7..6736fb9b 100644 --- a/renumics/spotlight/cli.py +++ b/renumics/spotlight/cli.py @@ -6,7 +6,7 @@ import platform import signal import sys -from typing import Dict, Optional, Tuple, Union, List +from typing import Dict, Optional, Tuple, Union import click @@ -94,9 +94,21 @@ def cli_dtype_callback( ) @click.option( "--analyze", - default=[], + default=(), multiple=True, - help="Automatically analyze issues for all columns.", + help="Columns to analyze (if no --analyze-all).", +) +@click.option( + "--embed-all", + is_flag=True, + default=False, + help="Automatically embed all columns.", +) +@click.option( + "--embed", + default=(), + multiple=True, + help="Columns to embed (if no --analyze-all).", ) @click.option("-v", "--verbose", is_flag=True) @click.version_option(spotlight.__version__) @@ -109,8 +121,10 @@ def main( dtype: Optional[Dict[str, str]], no_browser: bool, filebrowsing: bool, - analyze: List[str], + analyze: Tuple[str], analyze_all: bool, + embed: Tuple[str], + embed_all: bool, verbose: bool, ) -> None: """ @@ -135,5 +149,6 @@ def main( no_browser=no_browser, allow_filebrowsing=filebrowsing, wait="forever", - analyze=True if analyze_all else analyze, + analyze=True if analyze_all else list(analyze), + embed=True if embed_all else list(embed), ) diff --git a/renumics/spotlight/viewer.py b/renumics/spotlight/viewer.py index 40a07683..94c979b9 100644 --- a/renumics/spotlight/viewer.py +++ b/renumics/spotlight/viewer.py @@ -150,8 +150,9 @@ def show( allow_filebrowsing: Union[bool, Literal["auto"]] = "auto", wait: Union[bool, Literal["auto", "forever"]] = "auto", dtype: Optional[Dict[str, Any]] = None, - analyze: Optional[Union[bool, List[str]]] = None, + analyze: Optional[Union[List[str], bool]] = None, issues: Optional[Collection[DataIssue]] = None, + embed: Optional[Union[List[str], bool]] = None, ) -> None: """ Show a dataset or folder in this spotlight viewer. @@ -173,6 +174,8 @@ def show( column types allowed by Spotlight (for dataframes only). analyze: Automatically analyze common dataset issues (disabled by default). issues: Custom dataset issues displayed in the viewer. + embed: Automatically embed all or given columns with default + embedders (disabled by default). """ if is_pathtype(dataset): @@ -206,6 +209,7 @@ def show( project_root=project_root, analyze=analyze, custom_issues=list(issues) if issues else None, + embed=embed, layout=parsed_layout, filebrowsing_allowed=filebrowsing_allowed, ) @@ -373,6 +377,7 @@ def show( dtype: Optional[Dict[str, Any]] = None, analyze: Optional[Union[bool, List[str]]] = None, issues: Optional[Collection[DataIssue]] = None, + embed: Optional[Union[List[str], bool]] = None, ) -> Viewer: """ Start a new Spotlight viewer. @@ -397,6 +402,8 @@ def show( column types allowed by Spotlight (for dataframes only). analyze: Automatically analyze common dataset issues (disabled by default). issues: Custom dataset issues displayed in the viewer. + embed: Automatically embed all or given columns with default + embedders (disabled by default). """ viewer = None @@ -419,6 +426,7 @@ def show( dtype=dtype, analyze=analyze, issues=issues, + embed=embed, ) return viewer