From a4a006ecf654064c53495e0aede1eb61d7567baa Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 10:33:00 -0300 Subject: [PATCH 1/9] Fix unleved bug with bert --- hiclass/HierarchicalClassifier.py | 2 +- tests/test_LocalClassifiers.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hiclass/HierarchicalClassifier.py b/hiclass/HierarchicalClassifier.py index e23915b1..5ba6154e 100644 --- a/hiclass/HierarchicalClassifier.py +++ b/hiclass/HierarchicalClassifier.py @@ -161,7 +161,7 @@ def _pre_fit(self, X, y, sample_weight): ) else: self.X_ = np.array(X) - self.y_ = np.array(y) + self.y_ = np.array(make_leveled(y)) if sample_weight is not None: self.sample_weight_ = _check_sample_weight(sample_weight, X) diff --git a/tests/test_LocalClassifiers.py b/tests/test_LocalClassifiers.py index abd7bddf..2b3fec65 100644 --- a/tests/test_LocalClassifiers.py +++ b/tests/test_LocalClassifiers.py @@ -143,3 +143,14 @@ def test_tmp_dir(classifier): assert expected_name == name check_is_fitted(classifier) clf.fit(x, y) + + +@pytest.mark.parametrize("classifier", classifiers) +def test_bert_unleveled(classifier): + clf = classifier( + local_classifier=LogisticRegression(), + bert=True, + ) + x = [[0, 1], [2, 3]] + y = [["a"], ["b", "c"]] + clf.fit(x, y) From a92b6a83b8f5138f9510bf417068474625354b0b Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 11:23:25 -0300 Subject: [PATCH 2/9] Fix unleved bug with bert --- hiclass/HierarchicalClassifier.py | 4 +++- tests/test_LocalClassifiers.py | 27 ++++++++++++++++++--------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/hiclass/HierarchicalClassifier.py b/hiclass/HierarchicalClassifier.py index 5ba6154e..1351fa0b 100644 --- a/hiclass/HierarchicalClassifier.py +++ b/hiclass/HierarchicalClassifier.py @@ -161,7 +161,9 @@ def _pre_fit(self, X, y, sample_weight): ) else: self.X_ = np.array(X) - self.y_ = np.array(make_leveled(y)) + self.y_ = check_array( + make_leveled(y), dtype=None, ensure_2d=False, allow_nd=True + ) if sample_weight is not None: self.sample_weight_ = _check_sample_weight(sample_weight, X) diff --git a/tests/test_LocalClassifiers.py b/tests/test_LocalClassifiers.py index 2b3fec65..ec96bb13 100644 --- a/tests/test_LocalClassifiers.py +++ b/tests/test_LocalClassifiers.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from bert_sklearn import BertClassifier from numpy.testing import assert_array_equal from pyfakefs.fake_filesystem_unittest import Patcher from sklearn.linear_model import LogisticRegression @@ -10,8 +11,8 @@ from sklearn.utils.validation import check_is_fitted from hiclass import ( - LocalClassifierPerNode, LocalClassifierPerLevel, + LocalClassifierPerNode, LocalClassifierPerParentNode, ) from hiclass.ConstantClassifier import ConstantClassifier @@ -77,16 +78,20 @@ def test_empty_levels(empty_levels, classifier): @pytest.mark.parametrize("classifier", classifiers) def test_fit_bert(classifier): - bert = ConstantClassifier() + bert = BertClassifier() clf = classifier( local_classifier=bert, bert=True, ) - X = ["Text 1", "Text 2"] - y = ["a", "a"] - clf.fit(X, y) + x = ["Batman", "Joker", "Rorschach"] + y = [ + ["Action", "The Dark Night"], + ["Action", "The Dark Night"], + ["Action", "Watchmen"], + ] + clf.fit(x, y) check_is_fitted(clf) - predictions = clf.predict(X) + predictions = clf.predict(x) assert_array_equal(y, predictions) @@ -148,9 +153,13 @@ def test_tmp_dir(classifier): @pytest.mark.parametrize("classifier", classifiers) def test_bert_unleveled(classifier): clf = classifier( - local_classifier=LogisticRegression(), + local_classifier=BertClassifier(), bert=True, ) - x = [[0, 1], [2, 3]] - y = [["a"], ["b", "c"]] + x = ["Batman", "Joker"] + y = [["Action", "The Dark Night"], ["Action"]] + ground_truth = [["Action", "The Dark Night"], ["Action", "The Dark Night"]] clf.fit(x, y) + check_is_fitted(clf) + predictions = clf.predict(x) + assert_array_equal(ground_truth, predictions) From 65958e67e01abb16795387c91e26310f4b28c7c4 Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 11:27:48 -0300 Subject: [PATCH 3/9] Add bert-sklearn to dev dependencies --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 1c362c1e..fbdc78bb 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ "ray", "shap==0.44.1", "xarray==2023.1.0", + "bert-sklearn @ git+ssh://git@github.com/charles9n/bert-sklearn.git#egg=bert-sklearn", ], } From 69c9e0c8ff788a5f0dc3a1c63f35061391df5293 Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 11:36:46 -0300 Subject: [PATCH 4/9] Add ssh key --- .github/workflows/test-pr.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 7cdb3c3b..bcc3e1f7 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -23,6 +23,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | + ssh-keygen -q -t ed25519 -N '' -f ~/.ssh/id_rsa <</dev/null 2>&1 python -m pip install --upgrade pip python -m pip install -e ".[dev]" - name: Test with pytest From e5764a19de856006a13c1d412c4c63e6e9088fbc Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 11:38:14 -0300 Subject: [PATCH 5/9] Add ssh key --- .github/workflows/test-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index bcc3e1f7..2bd4744e 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -23,7 +23,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - ssh-keygen -q -t ed25519 -N '' -f ~/.ssh/id_rsa <</dev/null 2>&1 + ssh-keygen -q -t ed25519 -N '' -f ~/.ssh/id_ed25519 <</dev/null 2>&1 python -m pip install --upgrade pip python -m pip install -e ".[dev]" - name: Test with pytest From 69e3dc0304ce95236bc77983c19ccbc530866870 Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 11:42:01 -0300 Subject: [PATCH 6/9] Add bert-sklearn --- .github/workflows/test-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 2bd4744e..73cd796c 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -23,7 +23,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - ssh-keygen -q -t ed25519 -N '' -f ~/.ssh/id_ed25519 <</dev/null 2>&1 + pip install git+https://github.com/charles9n/bert-sklearn.git@master python -m pip install --upgrade pip python -m pip install -e ".[dev]" - name: Test with pytest From fbee1f174c1cac1efb5a53bab41ee2489dd1825c Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 11:45:49 -0300 Subject: [PATCH 7/9] Add bert-sklearn --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fbdc78bb..4d80b4d8 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ "ray", "shap==0.44.1", "xarray==2023.1.0", - "bert-sklearn @ git+ssh://git@github.com/charles9n/bert-sklearn.git#egg=bert-sklearn", + "bert-sklearn @ git+https://github.com/charles9n/bert-sklearn.git#egg=bert-sklearn", ], } From 9b5ffea8bc02a5d63ab908c3a3ec1314aac369a2 Mon Sep 17 00:00:00 2001 From: Fabio Date: Mon, 2 Dec 2024 11:48:14 -0300 Subject: [PATCH 8/9] Add bert-sklearn --- .github/workflows/test-pr.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 73cd796c..7cdb3c3b 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -23,7 +23,6 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install git+https://github.com/charles9n/bert-sklearn.git@master python -m pip install --upgrade pip python -m pip install -e ".[dev]" - name: Test with pytest From 909e8d4c22bf83986b7575ea2272734ddf1df0e8 Mon Sep 17 00:00:00 2001 From: Fabio Date: Thu, 5 Dec 2024 14:06:25 -0300 Subject: [PATCH 9/9] Fix tests bert --- tests/test_LocalClassifierPerParentNode.py | 38 +++++++++++++++++++++- tests/test_LocalClassifiers.py | 35 -------------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/tests/test_LocalClassifierPerParentNode.py b/tests/test_LocalClassifierPerParentNode.py index ad1a5469..268fc990 100644 --- a/tests/test_LocalClassifierPerParentNode.py +++ b/tests/test_LocalClassifierPerParentNode.py @@ -4,12 +4,14 @@ import networkx as nx import numpy as np import pytest -from numpy.testing import assert_array_equal, assert_array_almost_equal +from bert_sklearn import BertClassifier +from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy.sparse import csr_matrix from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression from sklearn.utils.estimator_checks import parametrize_with_checks from sklearn.utils.validation import check_is_fitted + from hiclass import LocalClassifierPerParentNode from hiclass._calibration.Calibrator import _Calibrator from hiclass.HierarchicalClassifier import make_leveled @@ -393,3 +395,37 @@ def test_fit_calibrate_predict_predict_proba_bert(): classifier.calibrate(x, y) classifier.predict(x) classifier.predict_proba(x) + + +# Note: bert only works with the local classifier per parent node +# It does not have the attribute classes_, which are necessary +# for the local classifiers per level and per node +def test_fit_bert(): + bert = BertClassifier() + clf = LocalClassifierPerParentNode( + local_classifier=bert, + bert=True, + ) + x = ["Batman", "rorschach"] + y = [ + ["Action", "The Dark Night"], + ["Action", "Watchmen"], + ] + clf.fit(x, y) + check_is_fitted(clf) + predictions = clf.predict(x) + assert_array_equal(y, predictions) + + +def test_bert_unleveled(): + clf = LocalClassifierPerParentNode( + local_classifier=BertClassifier(), + bert=True, + ) + x = ["Batman", "Jaws"] + y = [["Action", "The Dark Night"], ["Thriller"]] + ground_truth = [["Action", "The Dark Night"], ["Action", "The Dark Night"]] + clf.fit(x, y) + check_is_fitted(clf) + predictions = clf.predict(x) + assert_array_equal(ground_truth, predictions) diff --git a/tests/test_LocalClassifiers.py b/tests/test_LocalClassifiers.py index ec96bb13..065a1486 100644 --- a/tests/test_LocalClassifiers.py +++ b/tests/test_LocalClassifiers.py @@ -3,7 +3,6 @@ import numpy as np import pytest -from bert_sklearn import BertClassifier from numpy.testing import assert_array_equal from pyfakefs.fake_filesystem_unittest import Patcher from sklearn.linear_model import LogisticRegression @@ -76,25 +75,6 @@ def test_empty_levels(empty_levels, classifier): assert_array_equal(ground_truth, predictions) -@pytest.mark.parametrize("classifier", classifiers) -def test_fit_bert(classifier): - bert = BertClassifier() - clf = classifier( - local_classifier=bert, - bert=True, - ) - x = ["Batman", "Joker", "Rorschach"] - y = [ - ["Action", "The Dark Night"], - ["Action", "The Dark Night"], - ["Action", "Watchmen"], - ] - clf.fit(x, y) - check_is_fitted(clf) - predictions = clf.predict(x) - assert_array_equal(y, predictions) - - @pytest.mark.parametrize("classifier", classifiers) def test_knn(classifier): knn = KNeighborsClassifier( @@ -148,18 +128,3 @@ def test_tmp_dir(classifier): assert expected_name == name check_is_fitted(classifier) clf.fit(x, y) - - -@pytest.mark.parametrize("classifier", classifiers) -def test_bert_unleveled(classifier): - clf = classifier( - local_classifier=BertClassifier(), - bert=True, - ) - x = ["Batman", "Joker"] - y = [["Action", "The Dark Night"], ["Action"]] - ground_truth = [["Action", "The Dark Night"], ["Action", "The Dark Night"]] - clf.fit(x, y) - check_is_fitted(clf) - predictions = clf.predict(x) - assert_array_equal(ground_truth, predictions)