Skip to content

Commit

Permalink
Add embed argument to spotlight.show and CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
druzsan committed Dec 4, 2023
1 parent 5b241de commit b0d5f02
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 9 deletions.
22 changes: 20 additions & 2 deletions renumics/spotlight/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,10 @@ class SpotlightApp(FastAPI):
# data issues
issues: Optional[List[DataIssue]] = []
_custom_issues: List[DataIssue] = []
analyze_columns: Union[List[str], bool] = False
analyze_columns: Union[List[str], bool]

# embedding
embed_columns: Union[List[str], bool]

def __init__(self) -> None:
super().__init__()
Expand All @@ -138,6 +141,7 @@ def __init__(self) -> None:
self.analyze_columns = False
self.issues = None
self._custom_issues = []
self.embed_columns = False

self._dataset = None
self._user_dtypes = {}
Expand Down Expand Up @@ -328,6 +332,8 @@ def update(self, config: AppConfig) -> None:
self.analyze_columns = config.analyze
if config.custom_issues is not None:
self.custom_issues = config.custom_issues
if config.embed is not None:
self.embed_columns = config.embed
if config.dataset is not None:
self._dataset = config.dataset
self._data_source = create_datasource(self._dataset)
Expand Down Expand Up @@ -464,12 +470,24 @@ def _update_embeddings(self) -> None:
"""
Update embeddings, update them in the data store and notify client about.
"""
if not self.embed_columns:
return

if self._data_store is None:
return

logger.info("Embedding started.")

embedders = create_embedders(self._data_store, self._data_store.column_names)
if self.embed_columns is True:
embed_columns = self._data_store.column_names
else:
embed_columns = [
column
for column in self.embed_columns
if column in self._data_store.column_names
]

embedders = create_embedders(self._data_store, embed_columns)

self._data_store.embeddings = {column: None for column in embedders}

Expand Down
5 changes: 4 additions & 1 deletion renumics/spotlight/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@ class AppConfig:
project_root: Optional[Path] = None

# data analysis
analyze: Optional[Union[bool, List[str]]] = None
analyze: Optional[Union[List[str], bool]] = None
custom_issues: Optional[List[DataIssue]] = None

# embedding
embed: Optional[Union[List[str], bool]] = None

# frontend
layout: Optional[Layout] = None
filebrowsing_allowed: Optional[bool] = None
25 changes: 20 additions & 5 deletions renumics/spotlight/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import platform
import signal
import sys
from typing import Dict, Optional, Tuple, Union, List
from typing import Dict, Optional, Tuple, Union

import click

Expand Down Expand Up @@ -94,9 +94,21 @@ def cli_dtype_callback(
)
@click.option(
"--analyze",
default=[],
default=(),
multiple=True,
help="Automatically analyze issues for all columns.",
help="Columns to analyze (if no --analyze-all).",
)
@click.option(
"--embed-all",
is_flag=True,
default=False,
help="Automatically embed all columns.",
)
@click.option(
"--embed",
default=(),
multiple=True,
help="Columns to embed (if no --analyze-all).",
)
@click.option("-v", "--verbose", is_flag=True)
@click.version_option(spotlight.__version__)
Expand All @@ -109,8 +121,10 @@ def main(
dtype: Optional[Dict[str, str]],
no_browser: bool,
filebrowsing: bool,
analyze: List[str],
analyze: Tuple[str],
analyze_all: bool,
embed: Tuple[str],
embed_all: bool,
verbose: bool,
) -> None:
"""
Expand All @@ -135,5 +149,6 @@ def main(
no_browser=no_browser,
allow_filebrowsing=filebrowsing,
wait="forever",
analyze=True if analyze_all else analyze,
analyze=True if analyze_all else list(analyze),
embed=True if embed_all else list(embed),
)
10 changes: 9 additions & 1 deletion renumics/spotlight/viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@ def show(
allow_filebrowsing: Union[bool, Literal["auto"]] = "auto",
wait: Union[bool, Literal["auto", "forever"]] = "auto",
dtype: Optional[Dict[str, Any]] = None,
analyze: Optional[Union[bool, List[str]]] = None,
analyze: Optional[Union[List[str], bool]] = None,
issues: Optional[Collection[DataIssue]] = None,
embed: Optional[Union[List[str], bool]] = None,
) -> None:
"""
Show a dataset or folder in this spotlight viewer.
Expand All @@ -173,6 +174,8 @@ def show(
column types allowed by Spotlight (for dataframes only).
analyze: Automatically analyze common dataset issues (disabled by default).
issues: Custom dataset issues displayed in the viewer.
embed: Automatically embed all or given columns with default
embedders (disabled by default).
"""

if is_pathtype(dataset):
Expand Down Expand Up @@ -206,6 +209,7 @@ def show(
project_root=project_root,
analyze=analyze,
custom_issues=list(issues) if issues else None,
embed=embed,
layout=parsed_layout,
filebrowsing_allowed=filebrowsing_allowed,
)
Expand Down Expand Up @@ -373,6 +377,7 @@ def show(
dtype: Optional[Dict[str, Any]] = None,
analyze: Optional[Union[bool, List[str]]] = None,
issues: Optional[Collection[DataIssue]] = None,
embed: Optional[Union[List[str], bool]] = None,
) -> Viewer:
"""
Start a new Spotlight viewer.
Expand All @@ -397,6 +402,8 @@ def show(
column types allowed by Spotlight (for dataframes only).
analyze: Automatically analyze common dataset issues (disabled by default).
issues: Custom dataset issues displayed in the viewer.
embed: Automatically embed all or given columns with default
embedders (disabled by default).
"""

viewer = None
Expand All @@ -419,6 +426,7 @@ def show(
dtype=dtype,
analyze=analyze,
issues=issues,
embed=embed,
)
return viewer

Expand Down

0 comments on commit b0d5f02

Please sign in to comment.