Skip to content

Commit

Permalink
subset based on several criteria
Browse files Browse the repository at this point in the history
  • Loading branch information
kasunamare committed Dec 22, 2023
1 parent 0e98dc4 commit b8822ab
Showing 1 changed file with 52 additions and 2 deletions.
54 changes: 52 additions & 2 deletions src/triage/component/architect/feature_group_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,35 @@ def interval_subsetter(config_item, table, features):
search_str = f"_{config_item}_"
return [feature for feature in features if search_str in feature]

def combination_subsetter(config_item, table, features):
" Return features that has all the specified conditions"

# we start with the full feature set
feature_set = features

for key, values in config_item.items():
temp_f = []
if key == 'prefix':
for v in values:
temp_f += prefix_subsetter(v, table, feature_set)
feature_set = temp_f # This contains the filtered feature set

elif key == 'metrics':
for v in values:
temp_f += metric_subsetter(v, table, feature_set)
feature_set = temp_f
elif key == 'intervals':
for v in values:
search_str = f"_{v}_"
temp_f += [feature for feature in feature_set if search_str in feature]
feature_set = temp_f
else:
logger.warning('key has to be one of prefix, metric, or interval')

logger.info(f'Filtered features -- {table}: {", ".join(feature_set)}')

return feature_set


def all_subsetter(config_item, table, features):
return features
Expand All @@ -61,8 +90,9 @@ class FeatureGroupCreator:
subsetters = {
"tables": table_subsetter,
"prefix": prefix_subsetter,
"metric": metric_subsetter,
"interval": interval_subsetter,
"metrics": metric_subsetter,
"intervals": interval_subsetter,
"combinations": combination_subsetter,
"all": all_subsetter
}

Expand Down Expand Up @@ -111,6 +141,7 @@ def subsets(self, feature_dictionary):
f"Creating feature groups, using: {self.definition}, Master feature dictionary: {feature_dictionary}",
)
subsets = []
logger.info(self.definition)
for name, config in sorted(self.definition.items()):
logger.spam(f"Parsing config grouping method {name}, items {config}")
for config_item in config:
Expand Down Expand Up @@ -138,3 +169,22 @@ def subsets(self, feature_dictionary):
)
logger.verbose(f"Found {len(subsets)} total feature subsets")
return subsets

# def subsets_new(self, feature_dictionary):

# subsets = []
# for table, features in feature_dictionary.items():

# feature_set = features
# # looping through each filter
# for name, config in sorted(self.definition.items()):
# # looping through each item in the filter
# for config_item in config:
# subset = FeatureGroup(name=f"{name}: {config_item}")
# matching_features = self.subsetters[name](
# config_item, table, feature_set
# )
# if len(matching_features) > 0:
# subset[table] = FeatureNameList(matching_features)


0 comments on commit b8822ab

Please sign in to comment.