From acd254fb506ed769edd9dc2e7ea0664b7e921d19 Mon Sep 17 00:00:00 2001
From: Yotam Perlitz <y.perlitz@ibm.com>
Date: Tue, 21 Jan 2025 07:27:52 -0500
Subject: [PATCH] change loggers

Signed-off-by: Yotam Perlitz <y.perlitz@ibm.com>
---
 src/unitxt/db_utils.py |  13 +--
 src/unitxt/metrics.py  | 209 ++++++++++++++++++++---------------------
 2 files changed, 108 insertions(+), 114 deletions(-)

diff --git a/src/unitxt/db_utils.py b/src/unitxt/db_utils.py
index 63bc988334..07a1f58f90 100644
--- a/src/unitxt/db_utils.py
+++ b/src/unitxt/db_utils.py
@@ -5,17 +5,14 @@
 from abc import ABC, abstractmethod
 from typing import Any, List, Optional
 
-import evaluate
 import requests
 from huggingface_hub import snapshot_download
 from requests.exceptions import ConnectionError, ReadTimeout
 
+from .logging_utils import get_logger
 from .types import SQLDatabase
 
-# Path to the user's databases cache directory.
-# Logger instance.
-
-logger = evaluate.logging.get_logger(__name__)
+logger = get_logger()
 
 
 class DatabaseConnector(ABC):
@@ -193,9 +190,9 @@ class RemoteDatabaseConnector(DatabaseConnector):
     def __init__(self, db_config: SQLDatabase):
         super().__init__(db_config)
 
-        assert db_config[
-            "db_id"
-        ], "db_id must be in db_config for RemoteDatabaseConnector"
+        assert db_config["db_id"], (
+            "db_id must be in db_config for RemoteDatabaseConnector"
+        )
         self.api_url, self.database_id = (
             db_config["db_id"].split(",")[0],
             db_config["db_id"].split("db_id=")[-1].split(",")[0],
diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index 2fee2518e7..1efd9a8f33 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -53,8 +53,6 @@
 from .type_utils import Type, isoftype, parse_type_string, to_type_string
 from .utils import deep_copy, recursive_copy
 
-logger = evaluate.logging.get_logger(__name__)
-
 logger = get_logger()
 settings = get_settings()
 
@@ -1121,9 +1119,9 @@ def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generato
             )
 
         for reduction, fields in self.reduction_map.items():
-            assert (
-                reduction in self.implemented_reductions
-            ), f"Reduction {reduction} is not implemented, use one of {self.implemented_reductions}"
+            assert reduction in self.implemented_reductions, (
+                f"Reduction {reduction} is not implemented, use one of {self.implemented_reductions}"
+            )
 
             if reduction == "mean":
                 for field_name in fields:
@@ -1392,12 +1390,12 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
     def _validate_group_mean_task_data(self, instance):
         # instances need to all have task_data field with field group_id
         assert "task_data" in instance, "each instance must have an task_data field"
-        assert isinstance(
-            instance["task_data"], dict
-        ), "each instance must have an task_data field that is a dict"
-        assert (
-            "group_id" in instance["task_data"]
-        ), "each instance task_data dict must have a key group_id"
+        assert isinstance(instance["task_data"], dict), (
+            "each instance must have an task_data field that is a dict"
+        )
+        assert "group_id" in instance["task_data"], (
+            "each instance task_data dict must have a key group_id"
+        )
 
     def _validate_group_mean_reduction(self):
         """Ensure that group_mean reduction_map is properly formatted.
@@ -1450,30 +1448,30 @@ def accuracy_diff(subgroup_scores_dict, expected_subgroup_types=['original', 'pa
             2           'Why are ants eating my food?'               'original'
         """
         # validate the reduction_map
-        assert (
-            "group_mean" in self.reduction_map
-        ), "reduction_map must have a 'group_mean' key"
+        assert "group_mean" in self.reduction_map, (
+            "reduction_map must have a 'group_mean' key"
+        )
         fields = self.reduction_map["group_mean"]
         # for group_mean, expects a dict
         assert isinstance(fields, dict)
-        assert (
-            "agg_func" in fields
-        ), "fields should have a key 'agg_func' whose value is a 3-element list of a function name, function definition, and a boolean indicator"
-        assert isinstance(
-            fields["agg_func"], list
-        ), "fields['agg_func'] should be a list"
-        assert (
-            len(fields["agg_func"]) == 3
-        ), "fields['agg_func'] should be a 3-element list"
-        assert isinstance(
-            fields["agg_func"][0], str
-        ), "first item in fields['agg_func'] should be a string name of a function"
-        assert callable(
-            fields["agg_func"][1]
-        ), "second item in fields['agg_func'] should be a callable function"
-        assert isinstance(
-            fields["agg_func"][2], bool
-        ), "third item in fields['agg_func'] should be a boolean value"
+        assert "agg_func" in fields, (
+            "fields should have a key 'agg_func' whose value is a 3-element list of a function name, function definition, and a boolean indicator"
+        )
+        assert isinstance(fields["agg_func"], list), (
+            "fields['agg_func'] should be a list"
+        )
+        assert len(fields["agg_func"]) == 3, (
+            "fields['agg_func'] should be a 3-element list"
+        )
+        assert isinstance(fields["agg_func"][0], str), (
+            "first item in fields['agg_func'] should be a string name of a function"
+        )
+        assert callable(fields["agg_func"][1]), (
+            "second item in fields['agg_func'] should be a callable function"
+        )
+        assert isinstance(fields["agg_func"][2], bool), (
+            "third item in fields['agg_func'] should be a boolean value"
+        )
         if "score_fields" in fields:
             assert isinstance(fields["score_fields"], list)
 
@@ -1481,9 +1479,9 @@ def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generato
         instance_scores = self.compute_instance_scores(stream)
         global_score = {"num_of_instances": len(instance_scores)}
         for reduction_type, reduction_params in self.reduction_map.items():
-            assert (
-                reduction_type in self.implemented_reductions
-            ), f"Reduction {reduction_type} is not implemented, use one of {self.implemented_reductions}"
+            assert reduction_type in self.implemented_reductions, (
+                f"Reduction {reduction_type} is not implemented, use one of {self.implemented_reductions}"
+            )
 
             field_name_full_prefix = ""
             # used for passing to the bootstrapping, depends on whether the groups are fixed or not
@@ -1581,7 +1579,9 @@ def compute_instance_scores(
                 assert (
                     "task_data" in instance
                     and self.subgroup_column in instance["task_data"]
-                ), f"each instance task_data dict must have a key {self.subgroup_column}"
+                ), (
+                    f"each instance task_data dict must have a key {self.subgroup_column}"
+                )
 
             task_data = instance["task_data"] if "task_data" in instance else {}
 
@@ -2183,15 +2183,15 @@ def disable_confidence_interval_calculation(self):
 
     def verify(self):
         super().verify()
-        assert (
-            self.metric is not None
-        ), f"'metric' is not set in {self.get_metric_name()}"
-        assert (
-            self.main_score is not None
-        ), f"'main_score' is not set in {self.get_metric_name()}"
-        assert isinstance(
-            self.metric, Metric
-        ), f"'metric' is not set to a Metric class in {self.get_metric_name()} (type{self.metric})"
+        assert self.metric is not None, (
+            f"'metric' is not set in {self.get_metric_name()}"
+        )
+        assert self.main_score is not None, (
+            f"'main_score' is not set in {self.get_metric_name()}"
+        )
+        assert isinstance(self.metric, Metric), (
+            f"'metric' is not set to a Metric class in {self.get_metric_name()} (type{self.metric})"
+        )
         if self.postpreprocess_steps is not None:
             depr_message = "Field 'postpreprocess_steps' is deprecated. Please use 'postprocess_steps' for the same purpose."
             warnings.warn(depr_message, DeprecationWarning, stacklevel=2)
@@ -2212,9 +2212,9 @@ def prepare(self):
             and isinstance(self.postprocess_steps, list)
             and len(self.postprocess_steps) > 0
         )
-        assert not (
-            has_postpreprocess and has_postprocess
-        ), "Must define at most one of postpreprocess_steps (which is deprecated) and postprocess_steps (to be used from now on)"
+        assert not (has_postpreprocess and has_postprocess), (
+            "Must define at most one of postpreprocess_steps (which is deprecated) and postprocess_steps (to be used from now on)"
+        )
         if has_postpreprocess:
             self.postprocess_steps = self.postpreprocess_steps
         self.prepare_score = SequentialOperator(
@@ -2289,20 +2289,21 @@ def verify(self):
                 Documentation.HUGGINGFACE_METRICS,
             )
 
-        assert (
-            self.hf_additional_input_fields is None
-            or isoftype(self.hf_additional_input_fields, List[str])
-        ), f"Argument hf_additional_input_fields should be either None or List[str]. It is now: {self.hf_additional_input_fields}."
-        assert (
-            self.hf_additional_input_fields_pass_one_value is None
-            or isoftype(self.hf_additional_input_fields_pass_one_value, List[str])
-        ), f"Argument hf_additional_input_fields_pass_one_value should be either None or List[str]. It is now: {self.hf_additional_input_fields_pass_one_value}."
+        assert self.hf_additional_input_fields is None or isoftype(
+            self.hf_additional_input_fields, List[str]
+        ), (
+            f"Argument hf_additional_input_fields should be either None or List[str]. It is now: {self.hf_additional_input_fields}."
+        )
+        assert self.hf_additional_input_fields_pass_one_value is None or isoftype(
+            self.hf_additional_input_fields_pass_one_value, List[str]
+        ), (
+            f"Argument hf_additional_input_fields_pass_one_value should be either None or List[str]. It is now: {self.hf_additional_input_fields_pass_one_value}."
+        )
 
         return super().verify()
 
     def prepare(self):
         super().prepare()
-        import evaluate
 
         self.metric = evaluate.load(
             self.hf_metric_name, experiment_id=str(uuid.uuid4())
@@ -2316,25 +2317,25 @@ def compute(
     ) -> dict:
         passed_task_data = {}
         for additional_input_field in self.hf_additional_input_fields:
-            assert (
-                additional_input_field in task_data[0]
-            ), f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}"
+            assert additional_input_field in task_data[0], (
+                f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}"
+            )
             passed_task_data[additional_input_field] = [
                 additional_input[additional_input_field]
                 for additional_input in task_data
             ]
         for additional_input_field in self.hf_additional_input_fields_pass_one_value:
-            assert (
-                additional_input_field in task_data[0]
-            ), f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}"
+            assert additional_input_field in task_data[0], (
+                f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}"
+            )
 
             values = {
                 additional_input[additional_input_field]
                 for additional_input in task_data
             }
-            assert (
-                len(values) == 1
-            ), f"Values of '{additional_input_field}' field required by {__class__.__name__}  should all be the same, but have multiple values {values}"
+            assert len(values) == 1, (
+                f"Values of '{additional_input_field}' field required by {__class__.__name__}  should all be the same, but have multiple values {values}"
+            )
 
             passed_task_data[additional_input_field] = next(iter(values))
 
@@ -2349,22 +2350,22 @@ def compute(
             result[self.main_score] = float(result[self.hf_main_score])
             del result[self.hf_main_score]
         if self.scale != 1.0:
-            assert (
-                self.scaled_fields is not None
-            ), f"Scaling factor was set to {self.scale}, but no fields specified"
+            assert self.scaled_fields is not None, (
+                f"Scaling factor was set to {self.scale}, but no fields specified"
+            )
             for key in self.scaled_fields:
-                assert (
-                    key in result
-                ), f"Trying to scale field '{key}' which is not in results of metrics: {result}"
+                assert key in result, (
+                    f"Trying to scale field '{key}' which is not in results of metrics: {result}"
+                )
                 if isinstance(result[key], list):
-                    assert all(
-                        isinstance(v, float) for v in result[key]
-                    ), "Not all scaled field '{key}' values are floats: {result[key]}"
+                    assert all(isinstance(v, float) for v in result[key]), (
+                        "Not all scaled field '{key}' values are floats: {result[key]}"
+                    )
                     result[key] = [v / self.scale for v in result[key]]
                 else:
-                    assert isinstance(
-                        result[key], float
-                    ), "Scaled field '{key}' is not float: {result[key]}"
+                    assert isinstance(result[key], float), (
+                        "Scaled field '{key}' is not float: {result[key]}"
+                    )
                     result[key] /= self.scale
         if self.main_score in result:
             result[self.main_score] = float(result[self.main_score])
@@ -2380,7 +2381,6 @@ class HuggingfaceBulkMetric(BulkInstanceMetric):
 
     def prepare(self):
         super().prepare()
-        import evaluate
 
         self.metric = evaluate.load(
             self.hf_metric_name, experiment_id=str(uuid.uuid4())
@@ -2394,9 +2394,9 @@ def compute(
     ) -> List[Dict[str, Any]]:
         passed_task_data = {}
         for additional_input_field in self.hf_additional_input_fields:
-            assert (
-                additional_input_field in task_data[0]
-            ), f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}"
+            assert additional_input_field in task_data[0], (
+                f"'{additional_input_field}' field required by {__class__.__name__} is not in passed in task_data: {task_data[0]}"
+            )
             passed_task_data[additional_input_field] = [
                 additional_input[additional_input_field]
                 for additional_input in task_data
@@ -2428,7 +2428,6 @@ class HuggingfaceInstanceMetric(InstanceMetric):
 
     def prepare(self):
         super().prepare()
-        import evaluate
 
         self.metric = evaluate.load(
             self.hf_metric_name, experiment_id=str(uuid.uuid4())
@@ -2533,7 +2532,6 @@ class F1(GlobalMetric):
 
     def prepare(self):
         super().prepare()
-        import evaluate
 
         self._metric = evaluate.load(self.metric, experiment_id=str(uuid.uuid4()))
 
@@ -2735,9 +2733,9 @@ def download_finqa_eval_script_file(url, local_path, hash_of_script):
                 response = requests.get(url)
                 response.raise_for_status()
                 content = response.content
-                assert (
-                    hashlib.md5(content).hexdigest() == hash_of_script
-                ), f'URL ("{url}") is different than expected. Make sure you added the right one.'
+                assert hashlib.md5(content).hexdigest() == hash_of_script, (
+                    f'URL ("{url}") is different than expected. Make sure you added the right one.'
+                )
 
                 with open(local_path, "wb") as file:
                     file.write(content)
@@ -2811,7 +2809,6 @@ class F1MultiLabel(GlobalMetric, PackageRequirementsMixin):
 
     def prepare(self):
         super().prepare()
-        import evaluate
 
         self._metric = evaluate.load(
             self.metric, "multilabel", experiment_id=str(uuid.uuid4())
@@ -2872,9 +2869,9 @@ def compute(
             labels=labels_param,
         )
         if isinstance(result[self.metric], numpy.ndarray):
-            assert (
-                len(result[self.metric]) == len(labels)
-            ), f"F1 result ({result[self.metric]}) has more entries than labels ({labels})"
+            assert len(result[self.metric]) == len(labels), (
+                f"F1 result ({result[self.metric]}) has more entries than labels ({labels})"
+            )
             final_result = {self.main_score: nan_mean(result[self.metric])}
             for i, label in enumerate(labels):
                 final_result[self.metric + "_" + label] = result[self.metric][i]
@@ -4657,12 +4654,12 @@ def validate_subgroup_types(
             for subgroup_name, score_list in subgroup_scores_dict.items()
         }
     )
-    assert isinstance(
-        control_subgroup_types, list
-    ), "control_subgroup_types must be a list"
-    assert isinstance(
-        comparison_subgroup_types, list
-    ), "comparison_subgroup_types must be a list"
+    assert isinstance(control_subgroup_types, list), (
+        "control_subgroup_types must be a list"
+    )
+    assert isinstance(comparison_subgroup_types, list), (
+        "comparison_subgroup_types must be a list"
+    )
     # make sure each list is unique, so that labels aren't double-counted
     control_subgroup_types = list(set(control_subgroup_types))
     comparison_subgroup_types = list(set(comparison_subgroup_types))
@@ -4817,9 +4814,9 @@ def normalized_cohens_h(
 
     # requires scores to be in [0,1]
     for subgroup_name, score_list in subgroup_scores_dict.items():
-        assert all(
-            0 <= score <= 1 for score in score_list
-        ), f"all {subgroup_name} scores must be in [0,1]"
+        assert all(0 <= score <= 1 for score in score_list), (
+            f"all {subgroup_name} scores must be in [0,1]"
+        )
 
     # combine all scores from each label (if there are more than 1 in each group) into a list
     group_scores_list = [
@@ -5623,9 +5620,9 @@ def prepare(self):
 
     def create_ensemble_scores(self, instance):
         score = self.ensemble(instance)
-        instance[
-            "prediction"
-        ] = score  # We use here the prediction field to pass the score to the compute method.
+        instance["prediction"] = (
+            score  # We use here the prediction field to pass the score to the compute method.
+        )
         return instance
 
     def ensemble(self, instance):
@@ -5805,9 +5802,9 @@ def load_weights(json_file):
             return json.load(file)
 
     def ensemble(self, instance):
-        assert (
-            self.weights is not None
-        ), "RandomForestMetricsEnsemble must set self.weights before it can be used"
+        assert self.weights is not None, (
+            "RandomForestMetricsEnsemble must set self.weights before it can be used"
+        )
         ensemble_model = self.decode_forest(self.weights)
 
         prediction_lst = []