Beef up test coverage (#23)

* Beef up test coverage * Use GTIL as reference * Test multi-target XGBoost * Slim down tests on Windows * Beef up LightGBM tests * Beef up scikit-learn tests * Gracefully handle missing optional deps
dmlc · Feb 24, 2024 · fc2a5a1 · fc2a5a1
1 parent 69a8169
commit fc2a5a1
Show file tree

Hide file tree

Showing 8 changed files with 816 additions and 305 deletions.
diff --git a/ops/win-python-coverage.bat b/ops/win-python-coverage.bat
@@ -15,5 +15,5 @@ call micromamba activate dev
 if %errorlevel% neq 0 exit /b %errorlevel%
 set "PYTHONPATH=./python"
 set "PYTEST_TMPDIR=%WORKING_DIR%\temp"
-python -m pytest --basetemp="%WORKING_DIR%\temp" --cov=tl2cgen --cov-report xml -v -rxXs --fulltrace --durations=0 tests\python
+python -m pytest --basetemp="%WORKING_DIR%\temp" --cov=tl2cgen --cov-report xml -v -rxXs --fulltrace --durations=0 tests\python\test_basic.py
 if %errorlevel% neq 0 exit /b %errorlevel%
diff --git a/python/.pylintrc b/python/.pylintrc
@@ -25,4 +25,4 @@ generated-members=np.float32,np.uintc,np.uintp,np.uint32
 
 [MESSAGES CONTROL]
 # globally disable pylint checks (comma separated)
-disable=fixme,too-few-public-methods,too-many-instance-attributes,missing-raises-doc
+disable=fixme,too-few-public-methods,too-many-instance-attributes,missing-raises-doc,import-outside-toplevel,duplicate-code
diff --git a/tests/python/conftest.py b/tests/python/conftest.py
@@ -4,7 +4,6 @@
 import tempfile
 
 import pytest
-from sklearn.datasets import load_svmlight_file
 
 import tl2cgen
 
@@ -14,6 +13,11 @@
 @pytest.fixture(scope="session")
 def annotation():
     """Pre-computed branch annotation information for example datasets"""
+    try:
+        from sklearn.datasets import load_svmlight_file
+    except ImportError:
+        pytest.skip(reason="scikit-learn is required")
+
     with tempfile.TemporaryDirectory(dir=".") as tmpdir:
 
         def compute_annotation(dataset):

diff --git a/tests/python/hypothesis_util.py b/tests/python/hypothesis_util.py
@@ -1,13 +1,18 @@
 """Utility functions for hypothesis-based testing"""
 
-# pylint: disable=differing-param-doc,missing-type-doc
-
+from math import ceil
 from sys import platform as _platform
 
 import numpy as np
 from hypothesis import assume
 from hypothesis.strategies import composite, integers, just, none
-from sklearn.datasets import make_classification, make_regression
+from sklearn.datasets import (
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+
+# pylint: disable=differing-param-doc,missing-type-doc
 
 
 def _get_limits(strategy):
@@ -265,11 +270,69 @@ def standard_regression_datasets(
     return X.astype(np.float32), y.astype(np.float32)
 
 
+@composite
+def standard_multi_target_binary_classification_datasets(
+    draw,
+    n_samples=integers(min_value=100, max_value=200),
+    n_features=integers(min_value=100, max_value=200),
+    n_targets=just(5),
+    *,
+    n_pos_labels=None,
+    random_state=None,
+):
+    """
+    Returns a strategy to generate datasets for multi-target binary classification,
+    where each target is associated with a binary class label.
+    Note:
+    This function uses the sklearn.datasets.make_multilabel_classification function to
+    generate the regression problem from the provided search strategies.
+
+    Parameters
+    ----------
+    draw:
+        Callback function, to be used internally by Hypothesis
+    n_samples: SearchStrategy[int]
+        A search strategy for the number of samples in the X array
+    n_features: SearchStrategy[int]
+        A search strategy for the total number of features in the X array
+    n_targets: SearchStrategy[int], default=just(5)
+        A search strategy for the number of targets in the y array.
+    n_pos_labels: SearchStrategy[int], default=None
+        A search strategy for the expected number of positive class labels per sample.
+        If None, n_pos_labels will be set to ceil(0.4 * n_targets).
+    random_state: int, RandomState instance or None, default=None
+        Pass a random state or integer to determine the random number
+        generation for data set generation.
+    Returns
+    -------
+    (X, y):  SearchStrategy[array], SearchStrategy[array]
+        A tuple of search strategies for arrays subject to the constraints of
+        the provided parameters.
+    """
+    n_targets_ = draw(n_targets)
+    if n_pos_labels is None:
+        n_pos_labels = just(int(ceil(0.4 * n_targets_)))
+    ret = make_multilabel_classification(
+        n_samples=draw(n_samples),
+        n_features=draw(n_features),
+        n_classes=draw(n_targets),
+        n_labels=draw(n_pos_labels),
+        length=50,
+        allow_unlabeled=True,
+        sparse=False,
+        return_indicator="dense",
+        return_distributions=False,
+        random_state=random_state,
+    )
+    X, y = ret[0], ret[1]
+    return X.astype(np.float32), y.astype(np.float32)
+
+
 def standard_settings():
     """Default hypothesis settings. Set a smaller max_examples on Windows"""
     kwargs = {
         "deadline": None,
-        "max_examples": 20,
+        "max_examples": 40,
         "print_blob": True,
     }
     if _platform == "win32":

diff --git a/tests/python/test_lightgbm_integration.py b/tests/python/test_lightgbm_integration.py
@@ -9,34 +9,44 @@
 import pytest
 import scipy.sparse
 import treelite
-from hypothesis import given, settings
-from hypothesis.strategies import sampled_from
-from sklearn.datasets import load_iris, load_svmlight_file
-from sklearn.model_selection import train_test_split
 
 import tl2cgen
 from tl2cgen.contrib.util import _libext
 
-from .hypothesis_util import standard_regression_datasets, standard_settings
-from .metadata import (
-    example_model_db,
-    format_libpath_for_example_model,
-    load_example_model,
+try:
+    import lightgbm
+except ImportError:
+    pytest.skip("LightGBM not installed; skipping", allow_module_level=True)
+
+try:
+    from hypothesis import given, settings
+    from hypothesis.strategies import data as hypothesis_callback
+    from hypothesis.strategies import integers, just, sampled_from
+except ImportError:
+    pytest.skip("hypothesis not installed; skipping", allow_module_level=True)
+
+
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    pytest.skip("scikit-learn not installed; skipping", allow_module_level=True)
+
+
+from .hypothesis_util import (
+    standard_classification_datasets,
+    standard_regression_datasets,
+    standard_settings,
 )
+from .metadata import format_libpath_for_example_model, load_example_model
 from .util import (
     TemporaryDirectory,
     check_predictor,
     has_pandas,
     os_compatible_toolchains,
     os_platform,
+    to_categorical,
 )
 
-try:
-    import lightgbm
-except ImportError:
-    # skip this test suite if LightGBM is not installed
-    pytest.skip("LightGBM not installed; skipping", allow_module_level=True)
-
 
 def _compile_lightgbm_model(  # pylint: disable=too-many-arguments
     *,
@@ -109,28 +119,46 @@ def test_lightgbm_regression(toolchain, objective, reg_sqrt, dataset):
         np.testing.assert_almost_equal(out_pred, expected_pred, decimal=4)
 
 
-@pytest.mark.parametrize("toolchain", os_compatible_toolchains())
-@pytest.mark.parametrize("boosting_type", ["gbdt", "rf"])
-@pytest.mark.parametrize("objective", ["multiclass", "multiclassova"])
+@given(
+    dataset=standard_classification_datasets(
+        n_classes=integers(min_value=3, max_value=5), n_informative=just(5)
+    ),
+    objective=sampled_from(["multiclass", "multiclassova"]),
+    boosting_type=sampled_from(["gbdt", "rf"]),
+    num_boost_round=integers(min_value=5, max_value=50),
+    use_categorical=sampled_from([True, False]),
+    toolchain=sampled_from(os_compatible_toolchains()),
+    callback=hypothesis_callback(),
+)
+@settings(**standard_settings())
 def test_lightgbm_multiclass_classification(
-    tmpdir, objective, boosting_type, toolchain
+    dataset,
+    objective,
+    boosting_type,
+    num_boost_round,
+    use_categorical,
+    toolchain,
+    callback,
 ):
-    # pylint: disable=too-many-locals
+    # pylint: disable=too-many-locals,too-many-arguments
     """Test a multi-class classifier"""
-    model_path = pathlib.Path(tmpdir) / "iris_lightgbm.txt"
-
-    X, y = load_iris(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, shuffle=False
-    )
-    dtrain = lightgbm.Dataset(X_train, y_train, free_raw_data=False)
-    dtest = lightgbm.Dataset(X_test, y_test, reference=dtrain, free_raw_data=False)
+    X, y = dataset
+    num_class = np.max(y) + 1
+    if use_categorical:
+        n_categorical = callback.draw(integers(min_value=1, max_value=X.shape[1]))
+        df, X_pred = to_categorical(X, n_categorical=n_categorical, invalid_frac=0.1)
+        dtrain = lightgbm.Dataset(
+            df, label=y, free_raw_data=False, categorical_feature="auto"
+        )
+    else:
+        dtrain = lightgbm.Dataset(X, label=y, free_raw_data=False)
+        X_pred = X.copy()
     param = {
         "task": "train",
         "boosting": boosting_type,
         "objective": objective,
         "metric": "multi_logloss",
-        "num_class": 3,
+        "num_class": num_class,
         "num_leaves": 31,
         "learning_rate": 0.05,
     }
@@ -139,75 +167,97 @@ def test_lightgbm_multiclass_classification(
     bst = lightgbm.train(
         param,
         dtrain,
-        num_boost_round=10,
-        valid_sets=[dtrain, dtest],
-        valid_names=["train", "test"],
-    )
-    bst.save_model(model_path)
-
-    predictor = _compile_lightgbm_model(
-        model_path=model_path,
-        libname=f"iris_{objective}",
-        prefix=tmpdir,
-        toolchain=toolchain,
-        params={"quantize": 1},
-        verbose=True,
+        num_boost_round=num_boost_round,
+        valid_sets=[dtrain],
+        valid_names=["train"],
     )
+    tl_model = treelite.frontend.from_lightgbm(bst)
+    expected_prob = treelite.gtil.predict(tl_model, X_pred)
+    expected_margin = treelite.gtil.predict(tl_model, X_pred, pred_margin=True)
 
-    dmat = tl2cgen.DMatrix(X_test, dtype="float64")
-    out_pred = predictor.predict(dmat)
-    expected_pred = bst.predict(X_test).reshape((X_test.shape[0], 1, -1))
-    np.testing.assert_almost_equal(out_pred, expected_pred, decimal=5)
+    with TemporaryDirectory() as tmpdir:
+        libpath = pathlib.Path(tmpdir) / (f"lgb_multiclass_{objective}" + _libext())
+        tl2cgen.export_lib(
+            tl_model,
+            toolchain=toolchain,
+            libpath=libpath,
+            params={"parallel_comp": tl_model.num_tree, "quantize": 1},
+        )
+        predictor = tl2cgen.Predictor(libpath)
+        dmat = tl2cgen.DMatrix(X_pred, dtype="float64")
+        out_prob = predictor.predict(dmat)
+        out_margin = predictor.predict(dmat, pred_margin=True)
+        np.testing.assert_almost_equal(out_prob, expected_prob, decimal=5)
+        np.testing.assert_almost_equal(out_margin, expected_margin, decimal=5)
 
 
-@pytest.mark.parametrize("toolchain", os_compatible_toolchains())
-@pytest.mark.parametrize("objective", ["binary", "xentlambda", "xentropy"])
-def test_lightgbm_binary_classification(tmpdir, objective, toolchain):
-    # pylint: disable=too-many-locals
+@given(
+    dataset=standard_classification_datasets(n_classes=just(2)),
+    objective=sampled_from(["binary", "xentlambda", "xentropy"]),
+    num_boost_round=integers(min_value=5, max_value=10),
+    use_categorical=sampled_from([True, False]),
+    toolchain=sampled_from(os_compatible_toolchains()),
+    callback=hypothesis_callback(),
+)
+@settings(**standard_settings())
+def test_lightgbm_binary_classification(
+    dataset,
+    objective,
+    num_boost_round,
+    use_categorical,
+    toolchain,
+    callback,
+):
+    # pylint: disable=too-many-locals,too-many-arguments
     """Test a binary classifier"""
-    dataset = "mushroom"
-    model_path = pathlib.Path(tmpdir) / "mushroom_lightgbm.txt"
-    dtest_path = example_model_db[dataset].dtest
+    X, y = dataset
+    if use_categorical:
+        n_categorical = callback.draw(integers(min_value=1, max_value=X.shape[1]))
+        df, X_pred = to_categorical(X, n_categorical=n_categorical, invalid_frac=0.1)
+        dtrain = lightgbm.Dataset(
+            df, label=y, free_raw_data=False, categorical_feature="auto"
+        )
+    else:
+        dtrain = lightgbm.Dataset(X, label=y, free_raw_data=False)
+        X_pred = X.copy()
 
-    dtrain = lightgbm.Dataset(example_model_db[dataset].dtrain)
-    dtest = lightgbm.Dataset(dtest_path, reference=dtrain)
-    param = {
+    params = {
         "task": "train",
         "boosting_type": "gbdt",
         "objective": objective,
         "metric": "auc",
         "num_leaves": 7,
         "learning_rate": 0.1,
     }
+
     bst = lightgbm.train(
-        param,
+        params,
         dtrain,
-        num_boost_round=10,
-        valid_sets=[dtrain, dtest],
-        valid_names=["train", "test"],
-    )
-    bst.save_model(model_path)
-
-    shape = (dtest.num_data(), 1, -1)
-    expected_prob = bst.predict(dtest_path).reshape(shape)
-    expected_margin = bst.predict(dtest_path, raw_score=True).reshape(shape)
-
-    predictor = _compile_lightgbm_model(
-        model_path=model_path,
-        libname=f"agaricus_{objective}",
-        prefix=tmpdir,
-        toolchain=toolchain,
-        params={},
-        verbose=True,
-    )
-    dmat = tl2cgen.DMatrix(
-        load_svmlight_file(dtest_path, zero_based=True)[0], dtype="float64"
+        num_boost_round=num_boost_round,
+        valid_sets=[dtrain],
+        valid_names=["train"],
     )
-
-    out_prob = predictor.predict(dmat)
-    np.testing.assert_almost_equal(out_prob, expected_prob, decimal=5)
-    out_margin = predictor.predict(dmat, pred_margin=True)
-    np.testing.assert_almost_equal(out_margin, expected_margin, decimal=5)
+    with TemporaryDirectory() as tmpdir:
+        model_path = pathlib.Path(tmpdir) / f"lgb_binary_{objective}.txt"
+        bst.save_model(model_path)
+        tl_model = treelite.frontend.load_lightgbm_model(model_path)
+        expected_prob = treelite.gtil.predict(tl_model, X_pred)
+        expected_margin = treelite.gtil.predict(tl_model, X_pred, pred_margin=True)
+
+        libpath = pathlib.Path(tmpdir) / (f"lgb_binary_{objective}" + _libext())
+        tl2cgen.export_lib(
+            tl_model,
+            toolchain=toolchain,
+            libpath=libpath,
+            params={"parallel_comp": tl_model.num_tree},
+            verbose=True,
+        )
+        predictor = tl2cgen.Predictor(libpath)
+        dtest = tl2cgen.DMatrix(X_pred)
+        out_prob = predictor.predict(dtest)
+        out_margin = predictor.predict(dtest, pred_margin=True)
+        np.testing.assert_almost_equal(out_prob, expected_prob, decimal=5)
+        np.testing.assert_almost_equal(out_margin, expected_margin, decimal=5)
 
 
 @pytest.mark.parametrize("toolchain", os_compatible_toolchains())