Skip to content

Commit

Permalink
Add preliminary 'get_supported_features' RPC for data explorer backend (
Browse files Browse the repository at this point in the history
#2643)

Addresses #2201. This is just a starting point and will be refined to add more granular feature flagging, but wanted to break the seal on this.

Summary
    Adds feature flags for search_schema, set_row_filters, and get_column_profiles
    For filtering and profiles, indicates which row filter types / profile types are supported
    Return basic feature flags for pandas objects (each type of data structure can return different feature flags, so it isn't necessarily global to the runtime but rather local to the particular data type)
    Extracted row filter type and profile type into shared enums
Also addresses #2599 (renaming search_filter_params.type to search_type)
  • Loading branch information
wesm authored Apr 3, 2024
1 parent a09e6ba commit 8c7167c
Show file tree
Hide file tree
Showing 8 changed files with 446 additions and 98 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,34 @@
ColumnHistogram,
ColumnSummaryStats,
CompareFilterParamsOp,
ColumnProfileRequestType,
ColumnProfileType,
ColumnProfileResult,
ColumnSchema,
ColumnDisplayType,
ColumnSortKey,
DataExplorerBackendMessageContent,
DataExplorerFrontendEvent,
FilterResult,
GetColumnProfilesFeatures,
GetColumnProfilesRequest,
GetDataValuesRequest,
GetSchemaRequest,
GetStateRequest,
GetSupportedFeaturesRequest,
RowFilter,
RowFilterFilterType,
RowFilterType,
SchemaUpdateParams,
SearchFilterParamsType,
SearchFilterType,
SearchSchemaFeatures,
SearchSchemaRequest,
SearchSchemaResult,
SetRowFiltersFeatures,
SetRowFiltersRequest,
SetSortColumnsRequest,
SummaryStatsBoolean,
SummaryStatsNumber,
SummaryStatsString,
SupportedFeatures,
TableData,
TableSchema,
TableShape,
Expand Down Expand Up @@ -140,27 +145,30 @@ def get_column_profiles(self, request: GetColumnProfilesRequest):
results = []

for req in request.params.profiles:
if req.type == ColumnProfileRequestType.NullCount:
if req.profile_type == ColumnProfileType.NullCount:
count = self._prof_null_count(req.column_index)
result = ColumnProfileResult(null_count=count)
elif req.type == ColumnProfileRequestType.SummaryStats:
elif req.profile_type == ColumnProfileType.SummaryStats:
stats = self._prof_summary_stats(req.column_index)
result = ColumnProfileResult(summary_stats=stats)
elif req.type == ColumnProfileRequestType.FrequencyTable:
elif req.profile_type == ColumnProfileType.FrequencyTable:
freq_table = self._prof_freq_table(req.column_index)
result = ColumnProfileResult(frequency_table=freq_table)
elif req.type == ColumnProfileRequestType.Histogram:
elif req.profile_type == ColumnProfileType.Histogram:
histogram = self._prof_histogram(req.column_index)
result = ColumnProfileResult(histogram=histogram)
else:
raise NotImplementedError(req.type)
raise NotImplementedError(req.profile_type)
results.append(result.dict())

return results

def get_state(self, request: GetStateRequest):
return self._get_state().dict()

def get_supported_features(self, request: GetSupportedFeaturesRequest):
return self._get_supported_features().dict()

@abc.abstractmethod
def invalidate_computations(self):
pass
Expand Down Expand Up @@ -220,6 +228,10 @@ def _prof_histogram(self, column_index: int) -> ColumnHistogram:
def _get_state(self) -> TableState:
pass

@abc.abstractmethod
def _get_supported_features(self) -> SupportedFeatures:
pass


def _pandas_format_values(col):
import pandas.io.formats.format as fmt
Expand Down Expand Up @@ -498,19 +510,19 @@ def _eval_filter(self, filt: RowFilter):
col = self.table.iloc[:, filt.column_index]
mask = None
if filt.filter_type in (
RowFilterFilterType.Between,
RowFilterFilterType.NotBetween,
RowFilterType.Between,
RowFilterType.NotBetween,
):
params = filt.between_params
assert params is not None
left_value = _coerce_value_param(params.left_value, col.dtype)
right_value = _coerce_value_param(params.right_value, col.dtype)
if filt.filter_type == RowFilterFilterType.Between:
if filt.filter_type == RowFilterType.Between:
mask = (col >= left_value) & (col <= right_value)
else:
# NotBetween
mask = (col < left_value) | (col > right_value)
elif filt.filter_type == RowFilterFilterType.Compare:
elif filt.filter_type == RowFilterType.Compare:
params = filt.compare_params
assert params is not None

Expand All @@ -519,11 +531,11 @@ def _eval_filter(self, filt: RowFilter):
op = COMPARE_OPS[params.op]
# pandas comparison filters return False for null values
mask = op(col, _coerce_value_param(params.value, col.dtype))
elif filt.filter_type == RowFilterFilterType.IsNull:
elif filt.filter_type == RowFilterType.IsNull:
mask = col.isnull()
elif filt.filter_type == RowFilterFilterType.NotNull:
elif filt.filter_type == RowFilterType.NotNull:
mask = col.notnull()
elif filt.filter_type == RowFilterFilterType.SetMembership:
elif filt.filter_type == RowFilterType.SetMembership:
params = filt.set_membership_params
assert params is not None
boxed_values = pd_.Series(params.values).astype(col.dtype)
Expand All @@ -532,7 +544,7 @@ def _eval_filter(self, filt: RowFilter):
if not params.inclusive:
# NOT-IN
mask = ~mask
elif filt.filter_type == RowFilterFilterType.Search:
elif filt.filter_type == RowFilterType.Search:
params = filt.search_params
assert params is not None

Expand All @@ -543,17 +555,17 @@ def _eval_filter(self, filt: RowFilter):

term = params.term

if params.type == SearchFilterParamsType.RegexMatch:
if params.search_type == SearchFilterType.RegexMatch:
mask = col.str.match(term, case=params.case_sensitive)
else:
if not params.case_sensitive:
col = col.str.lower()
term = term.lower()
if params.type == SearchFilterParamsType.Contains:
if params.search_type == SearchFilterType.Contains:
mask = col.str.contains(term)
elif params.type == SearchFilterParamsType.StartsWith:
elif params.search_type == SearchFilterType.StartsWith:
mask = col.str.startswith(term)
elif params.type == SearchFilterParamsType.EndsWith:
elif params.search_type == SearchFilterType.EndsWith:
mask = col.str.endswith(term)

assert mask is not None
Expand Down Expand Up @@ -682,6 +694,35 @@ def _get_state(self) -> TableState:
sort_keys=self.sort_keys,
)

def _get_supported_features(self) -> SupportedFeatures:
row_filter_features = SetRowFiltersFeatures(
supported=True,
supports_conditions=False,
supported_types=[
RowFilterType.Between,
RowFilterType.Compare,
RowFilterType.IsNull,
RowFilterType.NotNull,
RowFilterType.NotBetween,
RowFilterType.Search,
RowFilterType.SetMembership,
],
)

column_profile_features = GetColumnProfilesFeatures(
supported=True,
supported_types=[
ColumnProfileType.NullCount,
ColumnProfileType.SummaryStats,
],
)

return SupportedFeatures(
search_schema=SearchSchemaFeatures(supported=True),
set_row_filters=row_filter_features,
get_column_profiles=column_profile_features,
)


COMPARE_OPS = {
CompareFilterParamsOp.Gt: operator.gt,
Expand Down
Loading

0 comments on commit 8c7167c

Please sign in to comment.