From acd254fb506ed769edd9dc2e7ea0664b7e921d19 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Tue, 21 Jan 2025 07:27:52 -0500 Subject: [PATCH] change loggers Signed-off-by: Yotam Perlitz --- src/unitxt/db_utils.py | 13 +-- src/unitxt/metrics.py | 209 ++++++++++++++++++++--------------------- 2 files changed, 108 insertions(+), 114 deletions(-) diff --git a/src/unitxt/db_utils.py b/src/unitxt/db_utils.py index 63bc988334..07a1f58f90 100644 --- a/src/unitxt/db_utils.py +++ b/src/unitxt/db_utils.py @@ -5,17 +5,14 @@ from abc import ABC, abstractmethod from typing import Any, List, Optional -import evaluate import requests from huggingface_hub import snapshot_download from requests.exceptions import ConnectionError, ReadTimeout +from .logging_utils import get_logger from .types import SQLDatabase -# Path to the user's databases cache directory. -# Logger instance. - -logger = evaluate.logging.get_logger(__name__) +logger = get_logger() class DatabaseConnector(ABC): @@ -193,9 +190,9 @@ class RemoteDatabaseConnector(DatabaseConnector): def __init__(self, db_config: SQLDatabase): super().__init__(db_config) - assert db_config[ - "db_id" - ], "db_id must be in db_config for RemoteDatabaseConnector" + assert db_config["db_id"], ( + "db_id must be in db_config for RemoteDatabaseConnector" + ) self.api_url, self.database_id = ( db_config["db_id"].split(",")[0], db_config["db_id"].split("db_id=")[-1].split(",")[0], diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py index 2fee2518e7..1efd9a8f33 100644 --- a/src/unitxt/metrics.py +++ b/src/unitxt/metrics.py @@ -53,8 +53,6 @@ from .type_utils import Type, isoftype, parse_type_string, to_type_string from .utils import deep_copy, recursive_copy -logger = evaluate.logging.get_logger(__name__) - logger = get_logger() settings = get_settings() @@ -1121,9 +1119,9 @@ def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generato ) for reduction, fields in self.reduction_map.items(): - assert ( - reduction in self.implemented_reductions - ), f"Reduction {reduction} is not implemented, use one of {self.implemented_reductions}" + assert reduction in self.implemented_reductions, ( + f"Reduction {reduction} is not implemented, use one of {self.implemented_reductions}" + ) if reduction == "mean": for field_name in fields: @@ -1392,12 +1390,12 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval): def _validate_group_mean_task_data(self, instance): # instances need to all have task_data field with field group_id assert "task_data" in instance, "each instance must have an task_data field" - assert isinstance( - instance["task_data"], dict - ), "each instance must have an task_data field that is a dict" - assert ( - "group_id" in instance["task_data"] - ), "each instance task_data dict must have a key group_id" + assert isinstance(instance["task_data"], dict), ( + "each instance must have an task_data field that is a dict" + ) + assert "group_id" in instance["task_data"], ( + "each instance task_data dict must have a key group_id" + ) def _validate_group_mean_reduction(self): """Ensure that group_mean reduction_map is properly formatted. @@ -1450,30 +1448,30 @@ def accuracy_diff(subgroup_scores_dict, expected_subgroup_types=['original', 'pa 2 'Why are ants eating my food?' 'original' """ # validate the reduction_map - assert ( - "group_mean" in self.reduction_map - ), "reduction_map must have a 'group_mean' key" + assert "group_mean" in self.reduction_map, ( + "reduction_map must have a 'group_mean' key" + ) fields = self.reduction_map["group_mean"] # for group_mean, expects a dict assert isinstance(fields, dict) - assert ( - "agg_func" in fields - ), "fields should have a key 'agg_func' whose value is a 3-element list of a function name, function definition, and a boolean indicator" - assert isinstance( - fields["agg_func"], list - ), "fields['agg_func'] should be a list" - assert ( - len(fields["agg_func"]) == 3 - ), "fields['agg_func'] should be a 3-element list" - assert isinstance( - fields["agg_func"][0], str - ), "first item in fields['agg_func'] should be a string name of a function" - assert callable( - fields["agg_func"][1] - ), "second item in fields['agg_func'] should be a callable function" - assert isinstance( - fields["agg_func"][2], bool - ), "third item in fields['agg_func'] should be a boolean value" + assert "agg_func" in fields, ( + "fields should have a key 'agg_func' whose value is a 3-element list of a function name, function definition, and a boolean indicator" + ) + assert isinstance(fields["agg_func"], list), ( + "fields['agg_func'] should be a list" + ) + assert len(fields["agg_func"]) == 3, ( + "fields['agg_func'] should be a 3-element list" + ) + assert isinstance(fields["agg_func"][0], str), ( + "first item in fields['agg_func'] should be a string name of a function" + ) + assert callable(fields["agg_func"][1]), ( + "second item in fields['agg_func'] should be a callable function" + ) + assert isinstance(fields["agg_func"][2], bool), ( + "third item in fields['agg_func'] should be a boolean value" + ) if "score_fields" in fields: assert isinstance(fields["score_fields"], list) @@ -1481,9 +1479,9 @@ def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generato instance_scores = self.compute_instance_scores(stream) global_score = {"num_of_instances": len(instance_scores)} for reduction_type, reduction_params in self.reduction_map.items(): - assert ( - reduction_type in self.implemented_reductions - ), f"Reduction {reduction_type} is not implemented, use one of {self.implemented_reductions}" + assert reduction_type in self.implemented_reductions, ( + f"Reduction {reduction_type} is not implemented, use one of {self.implemented_reductions}" + ) field_name_full_prefix = "" # used for passing to the bootstrapping, depends on whether the groups are fixed or not @@ -1581,7 +1579,9 @@ def compute_instance_scores( assert ( "task_data" in instance and self.subgroup_column in instance["task_data"] - ), f"each instance task_data dict must have a key {self.subgroup_column}" + ), ( + f"each instance task_data dict must have a key {self.subgroup_column}" + ) task_data = instance["task_data"] if "task_data" in instance else {} @@ -2183,15 +2183,15 @@ def disable_confidence_interval_calculation(self): def verify(self): super().verify() - assert ( - self.metric is not None - ), f"'metric' is not set in {self.get_metric_name()}" - assert ( - self.main_score is not None - ), f"'main_score' is not set in {self.get_metric_name()}" - assert isinstance( - self.metric, Metric - ), f"'metric' is not set to a Metric class in {self.get_metric_name()} (type{self.metric})" + assert self.metric is not None, ( + f"'metric' is not set in {self.get_metric_name()}" + ) + assert self.main_score is not None, ( + f"'main_score' is not set in {self.get_metric_name()}" + ) + assert isinstance(self.metric, Metric), ( + f"'metric' is not set to a Metric class in {self.get_metric_name()} (type{self.metric})" + ) if self.postpreprocess_steps is not None: depr_message = "Field 'postpreprocess_steps' is deprecated. Please use 'postprocess_steps' for the same purpose." warnings.warn(depr_message, DeprecationWarning, stacklevel=2) @@ -2212,9 +2212,9 @@ def prepare(self): and isinstance(self.postprocess_steps, list) and len(self.postprocess_steps) > 0 ) - assert not ( - has_postpreprocess and has_postprocess - ), "Must define at most one of postpreprocess_steps (which is deprecated) and postprocess_steps (to be used from now on)" + assert not (has_postpreprocess and has_postprocess), ( + "Must define at most one of postpreprocess_steps (which is deprecated) and postprocess_steps (to be used from now on)" + ) if has_postpreprocess: self.postprocess_steps = self.postpreprocess_steps self.prepare_score = SequentialOperator( @@ -2289,20 +2289,21 @@ def verify(self): Documentation.HUGGINGFACE_METRICS, ) - assert ( - self.hf_additional_input_fields is None - or isoftype(self.hf_additional_input_fields, List[str]) - ), f"Argument hf_additional_input_fields should be either None or List[str]. It is now: {self.hf_additional_input_fields}." - assert ( - self.hf_additional_input_fields_pass_one_value is None - or isoftype(self.hf_additional_input_fields_pass_one_value, List[str]) - ), f"Argument hf_additional_input_fields_pass_one_value should be either None or List[str]. It is now: {self.hf_additional_input_fields_pass_one_value}." + assert self.hf_additional_input_fields is None or isoftype( + self.hf_additional_input_fields, List[str] + ), ( + f"Argument hf_additional_input_fields should be either None or List[str]. It is now: {self.hf_additional_input_fields}." + ) + assert self.hf_additional_input_fields_pass_one_value is None or isoftype( + self.hf_additional_input_fields_pass_one_value, List[str] + ), ( + f"Argument hf_additional_input_fields_pass_one_value should be either None or List[str]. It is now: {self.hf_additional_input_fields_pass_one_value}." + ) return super().verify() def prepare(self): super().prepare() - import evaluate self.metric = evaluate.load( self.hf_metric_name, experiment_id=str(uuid.uuid4()) @@ -2316,25 +2317,25 @@ def compute( ) -> dict: passed_task_data = {} for additional_input_field in self.hf_additional_input_fields: - assert ( - additional_input_field in task_data[0] - ), f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}" + assert additional_input_field in task_data[0], ( + f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}" + ) passed_task_data[additional_input_field] = [ additional_input[additional_input_field] for additional_input in task_data ] for additional_input_field in self.hf_additional_input_fields_pass_one_value: - assert ( - additional_input_field in task_data[0] - ), f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}" + assert additional_input_field in task_data[0], ( + f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}" + ) values = { additional_input[additional_input_field] for additional_input in task_data } - assert ( - len(values) == 1 - ), f"Values of '{additional_input_field}' field required by {__class__.__name__} should all be the same, but have multiple values {values}" + assert len(values) == 1, ( + f"Values of '{additional_input_field}' field required by {__class__.__name__} should all be the same, but have multiple values {values}" + ) passed_task_data[additional_input_field] = next(iter(values)) @@ -2349,22 +2350,22 @@ def compute( result[self.main_score] = float(result[self.hf_main_score]) del result[self.hf_main_score] if self.scale != 1.0: - assert ( - self.scaled_fields is not None - ), f"Scaling factor was set to {self.scale}, but no fields specified" + assert self.scaled_fields is not None, ( + f"Scaling factor was set to {self.scale}, but no fields specified" + ) for key in self.scaled_fields: - assert ( - key in result - ), f"Trying to scale field '{key}' which is not in results of metrics: {result}" + assert key in result, ( + f"Trying to scale field '{key}' which is not in results of metrics: {result}" + ) if isinstance(result[key], list): - assert all( - isinstance(v, float) for v in result[key] - ), "Not all scaled field '{key}' values are floats: {result[key]}" + assert all(isinstance(v, float) for v in result[key]), ( + "Not all scaled field '{key}' values are floats: {result[key]}" + ) result[key] = [v / self.scale for v in result[key]] else: - assert isinstance( - result[key], float - ), "Scaled field '{key}' is not float: {result[key]}" + assert isinstance(result[key], float), ( + "Scaled field '{key}' is not float: {result[key]}" + ) result[key] /= self.scale if self.main_score in result: result[self.main_score] = float(result[self.main_score]) @@ -2380,7 +2381,6 @@ class HuggingfaceBulkMetric(BulkInstanceMetric): def prepare(self): super().prepare() - import evaluate self.metric = evaluate.load( self.hf_metric_name, experiment_id=str(uuid.uuid4()) @@ -2394,9 +2394,9 @@ def compute( ) -> List[Dict[str, Any]]: passed_task_data = {} for additional_input_field in self.hf_additional_input_fields: - assert ( - additional_input_field in task_data[0] - ), f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}" + assert additional_input_field in task_data[0], ( + f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}" + ) passed_task_data[additional_input_field] = [ additional_input[additional_input_field] for additional_input in task_data @@ -2428,7 +2428,6 @@ class HuggingfaceInstanceMetric(InstanceMetric): def prepare(self): super().prepare() - import evaluate self.metric = evaluate.load( self.hf_metric_name, experiment_id=str(uuid.uuid4()) @@ -2533,7 +2532,6 @@ class F1(GlobalMetric): def prepare(self): super().prepare() - import evaluate self._metric = evaluate.load(self.metric, experiment_id=str(uuid.uuid4())) @@ -2735,9 +2733,9 @@ def download_finqa_eval_script_file(url, local_path, hash_of_script): response = requests.get(url) response.raise_for_status() content = response.content - assert ( - hashlib.md5(content).hexdigest() == hash_of_script - ), f'URL ("{url}") is different than expected. Make sure you added the right one.' + assert hashlib.md5(content).hexdigest() == hash_of_script, ( + f'URL ("{url}") is different than expected. Make sure you added the right one.' + ) with open(local_path, "wb") as file: file.write(content) @@ -2811,7 +2809,6 @@ class F1MultiLabel(GlobalMetric, PackageRequirementsMixin): def prepare(self): super().prepare() - import evaluate self._metric = evaluate.load( self.metric, "multilabel", experiment_id=str(uuid.uuid4()) @@ -2872,9 +2869,9 @@ def compute( labels=labels_param, ) if isinstance(result[self.metric], numpy.ndarray): - assert ( - len(result[self.metric]) == len(labels) - ), f"F1 result ({result[self.metric]}) has more entries than labels ({labels})" + assert len(result[self.metric]) == len(labels), ( + f"F1 result ({result[self.metric]}) has more entries than labels ({labels})" + ) final_result = {self.main_score: nan_mean(result[self.metric])} for i, label in enumerate(labels): final_result[self.metric + "_" + label] = result[self.metric][i] @@ -4657,12 +4654,12 @@ def validate_subgroup_types( for subgroup_name, score_list in subgroup_scores_dict.items() } ) - assert isinstance( - control_subgroup_types, list - ), "control_subgroup_types must be a list" - assert isinstance( - comparison_subgroup_types, list - ), "comparison_subgroup_types must be a list" + assert isinstance(control_subgroup_types, list), ( + "control_subgroup_types must be a list" + ) + assert isinstance(comparison_subgroup_types, list), ( + "comparison_subgroup_types must be a list" + ) # make sure each list is unique, so that labels aren't double-counted control_subgroup_types = list(set(control_subgroup_types)) comparison_subgroup_types = list(set(comparison_subgroup_types)) @@ -4817,9 +4814,9 @@ def normalized_cohens_h( # requires scores to be in [0,1] for subgroup_name, score_list in subgroup_scores_dict.items(): - assert all( - 0 <= score <= 1 for score in score_list - ), f"all {subgroup_name} scores must be in [0,1]" + assert all(0 <= score <= 1 for score in score_list), ( + f"all {subgroup_name} scores must be in [0,1]" + ) # combine all scores from each label (if there are more than 1 in each group) into a list group_scores_list = [ @@ -5623,9 +5620,9 @@ def prepare(self): def create_ensemble_scores(self, instance): score = self.ensemble(instance) - instance[ - "prediction" - ] = score # We use here the prediction field to pass the score to the compute method. + instance["prediction"] = ( + score # We use here the prediction field to pass the score to the compute method. + ) return instance def ensemble(self, instance): @@ -5805,9 +5802,9 @@ def load_weights(json_file): return json.load(file) def ensemble(self, instance): - assert ( - self.weights is not None - ), "RandomForestMetricsEnsemble must set self.weights before it can be used" + assert self.weights is not None, ( + "RandomForestMetricsEnsemble must set self.weights before it can be used" + ) ensemble_model = self.decode_forest(self.weights) prediction_lst = []