Merge pull request #1079 from alan-turing-institute/integration

Add integration tests
JuliaAI · Jan 12, 2024 · 3a294de · 3a294de
2 parents 4b6303d + 5d1f56c
commit 3a294de
Show file tree

Hide file tree

Showing 4 changed files with 266 additions and 3 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -18,13 +18,18 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6'
           - '1' # automatically expands to the latest stable 1.x release of Julia.
         os:
           - ubuntu-latest
         arch:
           - x64
     steps:
+      - name: Set integration test flag
+        run: |
+          julia -e '
+            ENV["MLJ_TEST_INTEGRATION"]="true"'
+
+        if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }})
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
         with:

diff --git a/Project.toml b/Project.toml
@@ -8,6 +8,7 @@ CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+EvoLinear = "ab853011-1780-437f-b4b5-5de6f4777246"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLJBalancing = "45f359ea-796d-4f51-95a5-deb1a414c586"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
@@ -37,21 +38,54 @@ MLJEnsembles = "0.4"
 MLJFlow = "0.3"
 MLJIteration = "0.6"
 MLJModels = "0.16"
+MLJTestIntegration = "0.5.0"
 MLJTuning = "0.8"
 OpenML = "0.2,0.3"
+Pkg = "<0.0.1, 1"
 ProgressMeter = "1.1"
+Random = "<0.0.1, 1"
 Reexport = "1.2"
 ScientificTypes = "3"
 StatisticalMeasures = "0.1"
-Statistics = "1"
+Statistics = "<0.0.1, 1"
 StatsBase = "0.32,0.33, 0.34"
 Tables = "0.2,1.0"
 julia = "1.6"
 
 [extras]
+BetaML = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
+CatBoost = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12"
+EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
+Imbalance = "c709b415-507b-45b7-9a3d-1767c89fde68"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+LightGBM = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a"
+MLJClusteringInterface = "d354fa79-ed1c-40d4-88ef-b8c7bd1568af"
+MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
+MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
+MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c"
+MLJLIBSVMInterface = "61c7150f-6c77-4bb1-949c-13197eac2a52"
+MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692"
+MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728"
+MLJNaiveBayesInterface = "33e4bacb-b9e2-458e-9a13-5d9a90b235fa"
+MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab"
+MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e"
+MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd"
+MLJTestIntegration = "697918b4-fdc1-4f9e-8ff9-929724cee270"
+MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387"
+MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
+OneRule = "90484964-6d6a-4979-af09-8657dbed84ff"
+OutlierDetectionNeighbors = "51249a0a-cb36-4849-8e04-30c7f8d311bb"
+OutlierDetectionPython = "2449c660-d36c-460e-a68b-92ab3c865b3e"
+ParallelKMeans = "42b8e9d4-006b-409a-8472-7f34b3fb58af"
+PartialLeastSquaresRegressor = "f4b1acfe-f311-436c-bb79-8483f53c17d5"
+SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787"
+SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958"
+SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["NearestNeighborModels", "StableRNGs", "Test"]
+test = ["BetaML", "CatBoost", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Test"] 
+
diff --git a/test/integration.jl b/test/integration.jl
@@ -0,0 +1,214 @@
+using MLJTestIntegration, MLJModels, MLJ, Test, Markdown
+import MLJTestIntegration as MTI
+import Pkg.TOML as TOML
+
+const JULIA_TEST_LEVEL = 1
+const OTHER_TEST_LEVEL = 1
+
+
+# # RECORD OF OUTSTANDING ISSUES
+
+FILTER_GIVEN_ISSUE = Dict(
+    "https://github.com/JuliaAI/CatBoost.jl/pull/28 (waiting for 0.3.3 release)" =>
+        model -> model.name == "CatBoostRegressor",
+    "LOCIDetector too slow to train!" =>
+        model -> model.name == "LOCIDetector",
+    "https://github.com/JuliaML/LIBSVM.jl/issues/98" =>
+        model -> model.name == "LinearSVC" &&
+        model.package_name == "LIBSVM",
+    "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl/issues/4" =>
+        model -> model.name == "CDDetector" &&
+        model.package_name == "OutlierDetectionPython",
+    "https://github.com/JuliaAI/CatBoost.jl/issues/22" =>
+        model -> model.name == "CatBoostClassifier",
+    "https://github.com/sylvaticus/BetaML.jl/issues/65" =>
+        model -> model.name in ["KMeans", "KMedoids"] &&
+        model.package_name == "BetaML",
+    "https://github.com/JuliaAI/MLJTSVDInterface.jl/pull/17" =>
+        model -> model.name == "TSVDTransformer",
+    "https://github.com/alan-turing-institute/MLJ.jl/issues/1074" =>
+        model -> model.name == "AutoEncoderMLJ",
+    "https://github.com/sylvaticus/BetaML.jl/issues/64" =>
+        model -> model.name =="GaussianMixtureClusterer" && model.package_name=="BetaML",
+     "https://github.com/rikhuijzer/SIRUS.jl/issues/78" =>
+        model -> model.package_name == "SIRUS",
+    "https://github.com/lalvim/PartialLeastSquaresRegressor.jl/issues/29 "*
+        "(still need release > 2.2.0)" =>
+        model -> model.package_name == "PartialLeastSquaresRegressor",
+    "MLJScikitLearnInterface - multiple issues, hangs tests, WIP" =>
+        model -> model.package_name == "MLJScikitLearnInterface",
+)
+
+# # LOG OUTSTANDING ISSUES TO STDOUT
+
+const MODELS= models();
+const JULIA_MODELS = filter(m->m.is_pure_julia, MODELS);
+const OTHER_MODELS = setdiff(MODELS, JULIA_MODELS);
+
+const EXCLUDED_BY_ISSUE = filter(MODELS) do model
+    any([p(model) for p in values(FILTER_GIVEN_ISSUE)])
+end;
+
+affected_packages = unique([m.package_name for m in EXCLUDED_BY_ISSUE])
+n_excluded = length(EXCLUDED_BY_ISSUE)
+report = """
+
+# Integration Tests
+
+Currently, $n_excluded models are excluded from integration tests because of outstanding
+issues. When fixed, update `FILTER_GIVEN_ISSUE` in /test/integration.jl.
+
+If an issue is related to model traits (aka metadata), then the MLJ Model Registry may
+need to be updated to resolve the integration test failures. See the `MLJModels.@update`
+document string for how to do that.
+
+## Oustanding issues
+
+""";
+for issue in keys(FILTER_GIVEN_ISSUE)
+    global report *= "\n- $issue\n"
+end;
+report *= "\n## Affected packages\n"
+for pkg in affected_packages
+    global report *= "\n- $pkg"
+end;
+report_md = Markdown.parse(report);
+
+n_excluded > 0 && begin
+    show(stdout, MIME("text/plain"), report_md)
+    println()
+    println()
+    sleep(1)
+end
+
+
+# # FLAG MODELS THAT DON'T HAVE COMPATIBLE DATASETS FOR TESTING
+
+# We use the version of `MLJTestIntegration.test` that infers appropriate datasets. The
+# datasets provided by MLJTestIntegration.jl are not yet comprehensive, so we exclude
+# models from testing when no compatible dataset can be found.
+WITHOUT_DATASETS = filter(MODELS) do model
+    # multi-target datasets:
+    model.target_scitype <: Union{Table, AbstractMatrix} ||
+        # https://github.com/JuliaAI/MLJTestInterface.jl/issues/19
+        model.package_name == "MLJText" ||
+        # univariate transformers:
+        model.input_scitype <: AbstractVector ||
+        # image data:
+        model.input_scitype <: AbstractVector{<:Image} ||
+        # other data:
+        (model.name == "BernoulliNBClassifier" &&
+        model.package_name == "MLJScikitLearnInterface") ||
+        (model.name == "MultinomialNBClassifier" &&
+        model.package_name == "NaiveBayes") ||
+        (model.name == "OneRuleClassifier" &&
+        model.package_name == "OneRule") ||
+        (model.name == "ComplementNBClassifier" &&
+        model.package_name == "MLJScikitLearnInterface") ||
+        (model.name == "MultinomialNBClassifier" &&
+        model.package_name == "MLJScikitLearnInterface") ||
+        (model.name == "SMOTEN" &&
+        model.package_name == "Imbalance")
+end;
+
+# To remove any warning issued below, update `WITHOUT_DATASETS` defined above:
+for model in WITHOUT_DATASETS
+    !isempty(MLJTestIntegration.datasets(model)) &&
+        @warn "The model `$(model.name)` from `$(model.package_name)` "*
+        "is currently excluded "*
+        "from integration tests even though a compatible dataset appears "*
+        "to be available now. "
+end
+
+# Additionally exclude some models for which the inferred datasets have a model-specific
+# pathololgy that prevents a valid test:
+
+PATHOLOGIES = filter(MODELS) do model
+    # in the subsampling occuring in stacking, we get a Cholesky
+    # factorization fail (`PosDefException`):
+    (model.name=="GaussianNBClassifier" && model.package_name=="NaiveBayes") ||
+        # https://github.com/JuliaStats/MultivariateStats.jl/issues/224
+        (model.name =="ICA" && model.package_name=="MultivariateStats") ||
+        # in tuned_pipe_evaluation C library gives "Incorrect parameter: specified nu is
+        # infeasible":
+        (model.name in ["NuSVC", "ProbabilisticNuSVC"] &&
+        model.package_name == "LIBSVM")
+end
+
+WITHOUT_DATASETS = vcat(WITHOUT_DATASETS, PATHOLOGIES)
+
+
+# # CHECK PROJECT FILE INCLUDES ALL MODEL-PROVIDING PACKAGES
+
+# helper; `project_lines` are lines from a Project.toml file:
+function pkgs(project_lines)
+    project = TOML.parse(join(project_lines, "\n"))
+    headings = Set(keys(project)) ∩ ["deps", "extras"]
+    return vcat(collect.(keys.([project[h] for h in headings]))...)
+end
+
+# identify missing pkgs:
+project_path = joinpath(@__DIR__, "..", "Project.toml")
+project_lines = open(project_path) do io
+    readlines(io)
+end
+pkgs_in_project = pkgs(project_lines)
+registry_project_lines = MLJModels.Registry.registry_project()
+pkgs_in_registry = pkgs(registry_project_lines)
+missing_pkgs = setdiff(pkgs_in_registry, pkgs_in_project)
+
+# throw error if there are any:
+isempty(missing_pkgs) || error(
+    "Integration tests cannot proceed because the following packages are "*
+        "missing from the [extras] section of the MLJ Project.toml file: "*
+        join(missing_pkgs, ", ")
+)
+
+# # LOAD ALL MODEL CODE
+
+# Load all the model providing packages with a broad level=1 test:
+MLJTestIntegration.test(MODELS, (nothing, ), level=1, throw=true, verbosity=0);
+
+
+# # JULIA TESTS
+
+options = (
+    level = JULIA_TEST_LEVEL,
+    verbosity = 0, # bump to 2 to debug
+    throw = true,
+)
+@testset "level 4 tests" begin
+    println()
+    for model in JULIA_MODELS
+
+        # exclusions:
+        model in WITHOUT_DATASETS && continue
+        model in EXCLUDED_BY_ISSUE && continue
+
+        print("\rTesting $(model.name) ($(model.package_name))                       ")
+        @test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
+    end
+end
+
+
+# # NON-JULIA TESTS
+
+options = (
+    level = OTHER_TEST_LEVEL,
+    verbosity = 0, # bump to 2 to debug
+    throw = true,
+)
+@testset "level 3 tests" begin
+    println()
+    for model in OTHER_MODELS
+
+        # exclusions:
+        model in WITHOUT_DATASETS && continue
+        model in EXCLUDED_BY_ISSUE && continue
+
+        print("\rTesting $(model.name) ($(model.package_name))                       ")
+        @test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
+    end
+end
+
+true
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -19,3 +19,13 @@ end
 @testset "scitypes" begin
     @test include("scitypes.jl")
 end
+
+if get(ENV, "MLJ_TEST_INTEGRATION", "false")  == "true"
+    @testset "integration" begin
+        @test include("integration.jl")
+    end
+else
+    @info "Integration tests skipped. Set environment variable "*
+        "MLJ_TEST_INTEGRATION = \"true\" to include them.\n"*
+        "Integration tests take at least one hour. "
+end