diff --git a/hiclass/metrics.py b/hiclass/metrics.py index d8926700..5e9fe441 100644 --- a/hiclass/metrics.py +++ b/hiclass/metrics.py @@ -9,20 +9,14 @@ def _validate_input(y_true, y_pred): assert len(y_true) == len(y_pred) y_pred = make_leveled(y_pred) y_true = make_leveled(y_true) - y_true = check_array(y_true, dtype=None) - y_pred = check_array(y_pred, dtype=None) + y_true = check_array(y_true, dtype=None, ensure_2d=False, allow_nd=True) + y_pred = check_array(y_pred, dtype=None, ensure_2d=False, allow_nd=True) return y_true, y_pred -def precision(y_true: np.ndarray, y_pred: np.ndarray): +def precision(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"): r""" - Compute precision score for hierarchical classification. - - :math:`hP = \displaystyle{\frac{\sum_{i}| \alpha_i \cap \beta_i |}{\sum_{i}| \alpha_i |}}`, - where :math:`\alpha_i` is the set consisting of the most specific classes predicted - for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the - set containing the true most specific classes of test example :math:`i` and all - their ancestors, with summations computed over all test examples. + Compute hierarchical precision score. Parameters ---------- @@ -30,38 +24,89 @@ def precision(y_true: np.ndarray, y_pred: np.ndarray): Ground truth (correct) labels. y_pred : np.array of shape (n_samples, n_levels) Predicted labels, as returned by a classifier. + average: {"micro", "macro"}, str, default="micro" + This parameter determines the type of averaging performed during the computation: + + - `micro`: The precision is computed by summing over all individual instances, :math:`\displaystyle{hP = \frac{\sum_{i=1}^{n}| \alpha_i \cap \beta_i |}{\sum_{i=1}^{n}| \alpha_i |}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors, with summations computed over all test examples. + - `macro`: The precision is computed for each instance and then averaged, :math:`\displaystyle{hP = \frac{\sum_{i=1}^{n}hP_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors. + Returns ------- precision : float What proportion of positive identifications was actually correct? """ y_true, y_pred = _validate_input(y_true, y_pred) + functions = { + "micro": _precision_micro, + "macro": _precision_macro, + } + return functions[average](y_true, y_pred) + + +def _precision_micro(y_true: np.ndarray, y_pred: np.ndarray): + precision_micro = { + 1: _precision_micro_1d, + 2: _precision_micro_2d, + 3: _precision_micro_3d, + } + return precision_micro[y_true.ndim](y_true, y_pred) + + +def _precision_micro_1d(y_true: np.ndarray, y_pred: np.ndarray): + sum_intersection = 0 + sum_prediction_and_ancestors = 0 + for ground_truth, prediction in zip(y_true, y_pred): + ground_truth_set = set([ground_truth]) + ground_truth_set.discard("") + predicted_set = set([prediction]) + predicted_set.discard("") + sum_intersection = sum_intersection + len( + ground_truth_set.intersection(predicted_set) + ) + sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(predicted_set) + return sum_intersection / sum_prediction_and_ancestors + + +def _precision_micro_2d(y_true: np.ndarray, y_pred: np.ndarray): sum_intersection = 0 sum_prediction_and_ancestors = 0 for ground_truth, prediction in zip(y_true, y_pred): ground_truth_set = set(ground_truth) ground_truth_set.discard("") - prediction_set = set(prediction) - prediction_set.discard("") + predicted_set = set(prediction) + predicted_set.discard("") sum_intersection = sum_intersection + len( - ground_truth_set.intersection(prediction_set) + ground_truth_set.intersection(predicted_set) ) - sum_prediction_and_ancestors = sum_prediction_and_ancestors + len( - prediction_set + sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(predicted_set) + return sum_intersection / sum_prediction_and_ancestors + + +def _precision_micro_3d(y_true: np.ndarray, y_pred: np.ndarray): + sum_intersection = 0 + sum_prediction_and_ancestors = 0 + for row_ground_truth, row_prediction in zip(y_true, y_pred): + ground_truth_set = set() + predicted_set = set() + for ground_truth, prediction in zip(row_ground_truth, row_prediction): + ground_truth_set.update(ground_truth) + predicted_set.update(prediction) + ground_truth_set.discard("") + predicted_set.discard("") + sum_intersection = sum_intersection + len( + ground_truth_set.intersection(predicted_set) ) - precision = sum_intersection / sum_prediction_and_ancestors - return precision + sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(predicted_set) + return sum_intersection / sum_prediction_and_ancestors -def recall(y_true: np.ndarray, y_pred: np.ndarray): - r""" - Compute recall score for hierarchical classification. +def _precision_macro(y_true: np.ndarray, y_pred: np.ndarray): + return _compute_macro(y_true, y_pred, _precision_micro) + - :math:`\displaystyle{hR = \frac{\sum_i|\alpha_i \cap \beta_i|}{\sum_i|\beta_i|}}`, - where :math:`\alpha_i` is the set consisting of the most specific classes predicted - for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the - set containing the true most specific classes of test example :math:`i` and all - their ancestors, with summations computed over all test examples. +def recall(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"): + r""" + Compute hierarchical recall score. Parameters ---------- @@ -69,21 +114,62 @@ def recall(y_true: np.ndarray, y_pred: np.ndarray): Ground truth (correct) labels. y_pred : np.array of shape (n_samples, n_levels) Predicted labels, as returned by a classifier. + average: {"micro", "macro"}, str, default="micro" + This parameter determines the type of averaging performed during the computation: + + - `micro`: The recall is computed by summing over all individual instances, :math:`\displaystyle{hR = \frac{\sum_{i=1}^{n}|\alpha_i \cap \beta_i|}{\sum_{i=1}^{n}|\beta_i|}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors, with summations computed over all test examples. + - `macro`: The recall is computed for each instance and then averaged, :math:`\displaystyle{hR = \frac{\sum_{i=1}^{n}hR_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors. + Returns ------- recall : float What proportion of actual positives was identified correctly? """ y_true, y_pred = _validate_input(y_true, y_pred) + functions = { + "micro": _recall_micro, + "macro": _recall_macro, + } + return functions[average](y_true, y_pred) + + +def _recall_micro(y_true: np.ndarray, y_pred: np.ndarray): + recall_micro = { + 1: _recall_micro_1d, + 2: _recall_micro_2d, + 3: _recall_micro_3d, + } + return recall_micro[y_true.ndim](y_true, y_pred) + + +def _recall_micro_1d(y_true: np.ndarray, y_pred: np.ndarray): + sum_intersection = 0 + sum_prediction_and_ancestors = 0 + for ground_truth, prediction in zip(y_true, y_pred): + ground_truth_set = set([ground_truth]) + ground_truth_set.discard("") + predicted_set = set([prediction]) + predicted_set.discard("") + sum_intersection = sum_intersection + len( + ground_truth_set.intersection(predicted_set) + ) + sum_prediction_and_ancestors = sum_prediction_and_ancestors + len( + ground_truth_set + ) + recall = sum_intersection / sum_prediction_and_ancestors + return recall + + +def _recall_micro_2d(y_true: np.ndarray, y_pred: np.ndarray): sum_intersection = 0 sum_prediction_and_ancestors = 0 for ground_truth, prediction in zip(y_true, y_pred): ground_truth_set = set(ground_truth) ground_truth_set.discard("") - prediction_set = set(prediction) - prediction_set.discard("") + predicted_set = set(prediction) + predicted_set.discard("") sum_intersection = sum_intersection + len( - ground_truth_set.intersection(prediction_set) + ground_truth_set.intersection(predicted_set) ) sum_prediction_and_ancestors = sum_prediction_and_ancestors + len( ground_truth_set @@ -92,12 +178,34 @@ def recall(y_true: np.ndarray, y_pred: np.ndarray): return recall -def f1(y_true: np.ndarray, y_pred: np.ndarray): - r""" - Compute f1 score for hierarchical classification. +def _recall_micro_3d(y_true: np.ndarray, y_pred: np.ndarray): + sum_intersection = 0 + sum_prediction_and_ancestors = 0 + for row_ground_truth, row_prediction in zip(y_true, y_pred): + ground_truth_set = set() + predicted_set = set() + for ground_truth, prediction in zip(row_ground_truth, row_prediction): + ground_truth_set.update(ground_truth) + predicted_set.update(prediction) + ground_truth_set.discard("") + predicted_set.discard("") + sum_intersection = sum_intersection + len( + ground_truth_set.intersection(predicted_set) + ) + sum_prediction_and_ancestors = sum_prediction_and_ancestors + len( + ground_truth_set + ) + recall = sum_intersection / sum_prediction_and_ancestors + return recall - :math:`\displaystyle{hF = \frac{2 \times hP \times hR}{hP + hR}}`, - where :math:`hP` is the hierarchical precision and :math:`hR` is the hierarchical recall. + +def _recall_macro(y_true: np.ndarray, y_pred: np.ndarray): + return _compute_macro(y_true, y_pred, _recall_micro) + + +def f1(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"): + r""" + Compute hierarchical f-score. Parameters ---------- @@ -105,13 +213,37 @@ def f1(y_true: np.ndarray, y_pred: np.ndarray): Ground truth (correct) labels. y_pred : np.array of shape (n_samples, n_levels) Predicted labels, as returned by a classifier. + average: {"micro", "macro"}, str, default="micro" + This parameter determines the type of averaging performed during the computation: + + - `micro`: The f-score is computed by summing over all individual instances, :math:`\displaystyle{hF = \frac{2 \times hP \times hR}{hP + hR}}`, where :math:`hP` is the hierarchical precision and :math:`hR` is the hierarchical recall. + - `macro`: The f-score is computed for each instance and then averaged, :math:`\displaystyle{hF = \frac{\sum_{i=1}^{n}hF_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors. Returns ------- f1 : float Weighted average of the precision and recall """ y_true, y_pred = _validate_input(y_true, y_pred) + functions = { + "micro": _f_score_micro, + "macro": _f_score_macro, + } + return functions[average](y_true, y_pred) + + +def _f_score_micro(y_true: np.ndarray, y_pred: np.ndarray): prec = precision(y_true, y_pred) rec = recall(y_true, y_pred) - f1 = 2 * prec * rec / (prec + rec) - return f1 + return 2 * prec * rec / (prec + rec) + + +def _f_score_macro(y_true: np.ndarray, y_pred: np.ndarray): + return _compute_macro(y_true, y_pred, _f_score_micro) + + +def _compute_macro(y_true: np.ndarray, y_pred: np.ndarray, _micro_function): + overall_sum = 0 + for ground_truth, prediction in zip(y_true, y_pred): + sample_score = _micro_function(np.array([ground_truth]), np.array([prediction])) + overall_sum = overall_sum + sample_score + return overall_sum / len(y_true) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index e2c325d1..dceaf440 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,44 +1,336 @@ import numpy as np import pytest -from hiclass import metrics +from pytest import approx +from hiclass.metrics import precision, recall, f1 -def test_unmatched_lengths(): - y_true = np.array([[1, 2, 3], [1, 2, 4], [1, 5, 6], [1, 5, 8]], dtype=np.int32) - y_pred = np.array([[1, 2, 3], [1, 2, 4]], dtype=np.int32) + +# TODO: add tests for 3D dataframe (not sure if it's possible to have 3D dataframes) + + +def test_unmatched_lengths_1d_list(): + y_true = [1, 2, 3] + y_pred = [1, 2] + with pytest.raises(AssertionError): + precision(y_true, y_pred) + + +def test_unmatched_lengths_2d_list(): + y_true = [[1, 2, 3], [1, 2, 4], [1, 5, 6], [1, 5, 8]] + y_pred = [[1, 2, 3], [1, 2, 4]] with pytest.raises(AssertionError): - metrics.precision(y_true, y_pred) + precision(y_true, y_pred) + + +def test_unmatched_lengths_3d_list(): + y_true = [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + y_pred = [ + [["human", "mermaid"], ["fish", "mermaid"]], + ] + with pytest.raises(AssertionError): + precision(y_true, y_pred) + + +def test_unmatched_lengths_1d_np_array(): + y_true = np.array([1, 2, 3]) + y_pred = np.array([1, 2]) + with pytest.raises(AssertionError): + precision(y_true, y_pred) + + +def test_unmatched_lengths_2d_np_array(): + y_true = np.array([[1, 2, 3], [1, 2, 4], [1, 5, 6], [1, 5, 8]]) + y_pred = np.array([[1, 2, 3], [1, 2, 4]]) + with pytest.raises(AssertionError): + precision(y_true, y_pred) + + +def test_unmatched_lengths_3d_np_array(): + y_true = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + y_pred = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + ] + ) + with pytest.raises(AssertionError): + precision(y_true, y_pred) + +def test_precision_micro_1d_list(): + y_true = [1, 2, 3, 4] + y_pred = [1, 2, 5, 6] + assert 0.5 == precision(y_true, y_pred, "micro") + assert 1 == precision(y_true, y_true, "micro") -def test_precision(): + +def test_precision_micro_2d_list(): + y_true = [[1, 2, 3, 4], [1, 2, 5, 6]] + y_pred = [[1, 2, 5, 6], [1, 2, 3, 4]] + assert 0.5 == precision(y_true, y_pred, "micro") + assert 1 == precision(y_true, y_true, "micro") + + +def test_precision_micro_1d_np_array(): + y_true = np.array([1, 2, 3, 4]) + y_pred = np.array([1, 2, 5, 6]) + assert 0.5 == precision(y_true, y_pred, "micro") + assert 1 == precision(y_true, y_true, "micro") + + +def test_precision_micro_2d_np_array(): y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]]) y_pred = np.array([[1, 2, 5, 6], [1, 2, 3, 4]]) - assert metrics.precision(y_true, y_pred) == 0.5 - assert metrics.precision(y_true, y_true) == 1 + assert 0.5 == precision(y_true, y_pred, "micro") + assert 1 == precision(y_true, y_true, "micro") + + +def test_precision_micro_3d_np_array(): + y_true = np.array( + [ + [["human", "mermaid"], ["", ""]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + y_pred = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + assert 0.8333 == approx(precision(y_true, y_pred, "micro"), rel=1e-3) + assert 1 == precision(y_true, y_true, "micro") + + +def test_precision_macro_1d_list(): + y_true = [1, 2, 3, 4] + y_pred = [1, 5, 6, 7] + assert 0.25 == precision(y_true, y_pred, "macro") + assert 1 == precision(y_true, y_true, "macro") + + +def test_precision_macro_2d_list(): + y_true = [[1, 2, 3, 4], [1, 2, 5, 6]] + y_pred = [[1, 5, 6, 7], [1, 2, 3, 4]] + assert 0.375 == precision(y_true, y_pred, "macro") + assert 1 == precision(y_true, y_true, "macro") + + +def test_precision_macro_1d_np_array(): + y_true = np.array([1, 2, 3, 4]) + y_pred = np.array([1, 5, 6, 7]) + assert 0.25 == precision(y_true, y_pred, "macro") + assert 1 == precision(y_true, y_true, "macro") + + +def test_precision_macro_2d_np_array(): + y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]]) + y_pred = np.array([[1, 5, 6, 7], [1, 2, 3, 4]]) + assert 0.375 == precision(y_true, y_pred, "macro") + assert 1 == precision(y_true, y_true, "macro") + + +def test_precision_macro_3d_np_array(): + y_true = np.array( + [ + [["human", "mermaid"], ["", ""]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + y_pred = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + assert 0.8333 == approx(precision(y_true, y_pred, "macro"), rel=1e-3) + assert 1 == precision(y_true, y_true, "macro") + +def test_recall_micro_1d_list(): + y_true = [1, 2, 3, 4] + y_pred = [1, 5, 6, 7] + assert 0.25 == recall(y_true, y_pred, "micro") -def test_recall(): + +def test_recall_micro_2d_list(): + y_true = [[1, 2], [1, 2]] + y_pred = [[1, 2, 5, 6], [1, 2, 3, 4]] + assert 1 == recall(y_true, y_pred, "micro") + assert 0.5 == recall(y_pred, y_true, "micro") + + +def test_recall_micro_1d_np_array(): + y_true = np.array([1, 2, 3, 4]) + y_pred = np.array([1, 5, 6, 7]) + assert 0.25 == recall(y_true, y_pred, "micro") + + +def test_recall_micro_2d_np_array(): y_true = np.array([[1, 2], [1, 2]]) y_pred = np.array([[1, 2, 5, 6], [1, 2, 3, 4]]) - assert metrics.recall(y_true, y_pred) == 1 - assert metrics.recall(y_pred, y_true) == 0.5 + assert 1 == recall(y_true, y_pred, "micro") + assert 0.5 == recall(y_pred, y_true, "micro") + + +def test_recall_micro_3d_np_array(): + y_true = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + y_pred = np.array( + [ + [["human", "mermaid"], ["", ""]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + assert 0.8333 == approx(recall(y_true, y_pred, "micro"), rel=1e-3) + assert 1 == recall(y_true, y_true, "micro") + + +def test_recall_macro_1d_list(): + y_true = [1, 2, 3, 5] + y_pred = [1, 5, 6, 7] + assert 0.25 == recall(y_true, y_pred, "macro") + + +def test_recall_macro_2d_list(): + y_true = [[1, 2], [1, 2]] + y_pred = [[1, 5, 6, 7], [1, 2, 3, 4]] + assert 0.75 == recall(y_true, y_pred, "macro") + assert 0.375 == recall(y_pred, y_true, "macro") + + +def test_recall_macro_1d_np_array(): + y_true = np.array([1, 2, 3, 5]) + y_pred = np.array([1, 5, 6, 7]) + assert 0.25 == recall(y_true, y_pred, "macro") + + +def test_recall_macro_2d_np_array(): + y_true = np.array([[1, 2], [1, 2]]) + y_pred = np.array([[1, 5, 6, 7], [1, 2, 3, 4]]) + assert 0.75 == recall(y_true, y_pred, "macro") + assert 0.375 == recall(y_pred, y_true, "macro") + + +def test_recall_macro_3d_np_array(): + y_true = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + y_pred = np.array( + [ + [["human", "mermaid"], ["", ""]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + assert 0.8333 == approx(recall(y_true, y_pred, "macro"), rel=1e-3) + assert 1 == recall(y_true, y_true, "macro") + +def test_f1_micro_1d_list(): + y_true = [1, 2, 3, 4] + y_pred = [1, 2, 5, 6] + assert 0.5 == f1(y_true, y_pred, "micro") -def test_f1(): + +def test_f1_micro_2d_list(): + y_true = [[1, 2, 3, 4], [1, 2, 5, 6]] + y_pred = [[1, 2, 5, 6], [1, 2, 3, 4]] + assert 0.5 == f1(y_true, y_pred, "micro") + + +def test_f1_micro_1d_np_array(): + y_true = np.array([1, 2, 3, 4]) + y_pred = np.array([1, 2, 5, 6]) + assert 0.5 == f1(y_true, y_pred, "micro") + + +def test_f1_micro_2d_np_array(): y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]]) y_pred = np.array([[1, 2, 5, 6], [1, 2, 3, 4]]) - assert metrics.f1(y_true, y_pred) == 0.5 + assert 0.5 == f1(y_true, y_pred, "micro") + + +def test_f1_micro_3d_np_array(): + y_true = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + y_pred = np.array( + [ + [["human", "mermaid"], ["", ""]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + assert 0.9090 == approx(f1(y_true, y_pred, "micro"), rel=1e-3) + assert 1 == f1(y_true, y_true, "micro") + + +def test_f1_macro_1d_list(): + y_true = [1, 2, 3, 4] + y_pred = [1, 2, 3, 4] + assert 1 == f1(y_true, y_pred, "macro") + + +def test_f1_macro_2d_list(): + y_true = [[1, 2, 3, 4], [1, 2, 5, 6]] + y_pred = [[1, 5, 6], [1, 2, 3]] + assert 0.4285714 == approx(f1(y_true, y_pred, "macro")) + + +def test_f1_macro_1d_np_array(): + y_true = np.array([1, 2, 3, 4]) + y_pred = np.array([1, 2, 3, 4]) + assert 1 == f1(y_true, y_pred, "macro") + + +def test_f1_macro_2d_np_array(): + y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]]) + y_pred = np.array([[1, 5, 6], [1, 2, 3]]) + assert 0.4285714 == approx(f1(y_true, y_pred, "macro")) + + +def test_f1_macro_3d_np_array(): + y_true = np.array( + [ + [["human", "mermaid"], ["fish", "mermaid"]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + y_pred = np.array( + [ + [["human", "mermaid"], ["", ""]], + [["human", "minotaur"], ["bull", "minotaur"]], + ] + ) + assert 0.9 == approx(f1(y_true, y_pred, "macro"), rel=1e-3) + assert 1 == f1(y_true, y_true, "macro") -def test_empty_levels_1(): - y_true = np.array([["2", "3"], ["1"], ["4", "5", "6"]], dtype=object) - y_pred = np.array([["1", "", ""], ["2", "3", ""], ["4", "5", "6"]], dtype=object) - assert metrics.f1(y_true, y_pred) == 0.5 - assert metrics.f1(y_true, y_true) == 1 +def test_empty_levels_2d_list_1(): + y_true = [["2", "3"], ["1"], ["4", "5", "6"]] + y_pred = [["1"], ["2", "3"], ["4", "5", "6"]] + assert 0.5 == f1(y_true, y_pred) + assert 1 == f1(y_true, y_true) -def test_empty_levels_2(): - y_true = np.array([["1"], ["2", "3"], ["4", "5", "6"]], dtype=object) - y_pred = np.array([["1", "", ""], ["2", "3", ""], ["4", "5", "6"]], dtype=object) - assert metrics.f1(y_true, y_pred) == 1 - assert metrics.f1(y_true, y_true) == 1 +def test_empty_levels_2d_list_2(): + y_true = [["1"], ["2", "3"], ["4", "5", "6"]] + y_pred = [["1"], ["2", "3"], ["4", "5", "6"]] + assert 1 == f1(y_true, y_pred) + assert 1 == f1(y_true, y_true)