diff --git a/Project.toml b/Project.toml index 96c00d723..f5752a0fa 100644 --- a/Project.toml +++ b/Project.toml @@ -1,25 +1,21 @@ name = "MLJ" uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" authors = ["Anthony D. Blaom "] -version = "0.8.0" +version = "0.9.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3" -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +MLJScientificTypes = "2e2323e0-db8b-457b-ae0d-bdfb3bc63afd" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" @@ -27,13 +23,12 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] CategoricalArrays = "^0.7" ComputationalResources = "^0.3" -Distributions = "^0.21" -DocStringExtensions = "^0.8" -MLJBase = "^0.10" -MLJTuning = "^0.1.1" -MLJModels = "^0.7" +Distributions = "^0.21,^0.22" +MLJBase = "^0.11" +MLJModels = "^0.8" +MLJScientificTypes = "^0.1" +MLJTuning = "^0.1" ProgressMeter = "^1.1" -ScientificTypes = "^0.5.1" StatsBase = "^0.32" Tables = "^0.2" julia = "1" diff --git a/README.md b/README.md index ae149b98e..7e9aeeaf8 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ The MLJ universe is made out of several repositories some of which can be used i * (⟂) [MLJBase.jl](https://github.com/alan-turing-institute/MLJBase.jl) offers essential tools to load and interpret data, describe ML models and use metrics; it is the repository you should interface with if you wish to make your package accessible via MLJ, * [MLJ.jl](https://github.com/alan-turing-institute/MLJ.jl) offers tools to compose, tune and evaluate models, -* [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl) contains interfaces to a number of important model-providing packages such as, [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), [ScikitLearn.jl](https://github.com/bensadeghi/ScikitLearn.jl) or [XGBoost.jl](https://github.com/dmlc/XGBoost.jl) as well as a few built-in transformations (one hot encoding, standardisation, ...), it also hosts the *model registry* which keeps track of all models accessible via MLJ, +* [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl) contains interfaces to a number of important model-providing packages such as, [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), [ScikitLearn.jl](https://github.com/cstjean/ScikitLearn.jl) or [XGBoost.jl](https://github.com/dmlc/XGBoost.jl) as well as a few built-in transformations (one hot encoding, standardisation, ...), it also hosts the *model registry* which keeps track of all models accessible via MLJ, * (⟂) [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) a lightweight package to help MLJ articulate it's conventions about how different types of data (`2.71`, `"male"`, `CategoricalArray{Int}`, etc ) should be *interpreted* by models (`Continuous`, `Textual`, `AbstractArray{Multiclass}`, etc). * (⟂) [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl) an experimental package for a wide range of penalised linear models such as Lasso, Elastic-Net, Robust regression, LAD regression, etc. * [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl) an experimental package to use Flux within MLJ. diff --git a/docs/Project.toml b/docs/Project.toml index be643b4b1..5cdc6653a 100755 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -6,23 +6,24 @@ DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" +MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +MLJScientificTypes = "2e2323e0-db8b-457b-ae0d-bdfb3bc63afd" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" [compat] -DecisionTree = "0.8, 0.9.1" -Documenter = "^0.22,0.23" -MLJBase = "^0.10" -ScientificTypes = "^0.5" +Documenter = "^0.24" +MLJBase = "^0.11" +MLJModelInterface = "^0.1" +MLJModels = "^0.8" +MLJScientificTypes = "^0.1" +MLJTuning = "^0.1" julia = "1.2" diff --git a/docs/make.jl b/docs/make.jl index af240921e..85adf42f6 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,40 +1,48 @@ if Base.HOME_PROJECT[] !== nothing Base.HOME_PROJECT[] = abspath(Base.HOME_PROJECT[]) end + using Pkg using Documenter using MLJ -using MLJBase -using MLJTuning -using MLJModels -using ScientificTypes +import MLJBase +import MLJTuning +import MLJModels +import MLJScientificTypes +import MLJModelInterface +using CategoricalArrays # avoid types like CategoricalArrays.Categorica +using LossFunctions + +const MMI = MLJModelInterface # using Literate # Literate.markdown("common_mlj_workflows.jl", ".", # codefence = "```@example workflows" => "```") -pages = Any["Getting Started"=>"index.md", - "Common MLJ Workflows" => "common_mlj_workflows.md", - "Model Search" => "model_search.md", - "Machines" => "machines.md", - "Evaluating Model Performance"=>"evaluating_model_performance.md", - "Performance Measures"=> "performance_measures.md", - "Tuning Models" => "tuning_models.md", - "Learning Curves" => "learning_curves.md", - "Built-in Transformers" => "built_in_transformers.md", - "Composing Models" => "composing_models.md", - "Homogeneous Ensembles" => "homogeneous_ensembles.md", - "Simple User Defined Models" => "simple_user_defined_models.md", - "Adding Models for General Use" => "adding_models_for_general_use.md", - "Benchmarking" => "benchmarking.md", - "Internals"=>"internals.md", - "Glossary"=>"glossary.md", - "API"=>"api.md", - "MLJ Cheatsheet" => "mlj_cheatsheet.md", - "MLJ News"=>"NEWS.md", - "FAQ" => "frequently_asked_questions.md", - "Julia BlogPost"=>"julia_blogpost.md", - "Acceleration and Parallelism"=>"acceleration_and_parallelism.md"] +pages = [ + "Getting Started" => "index.md", + "Common MLJ Workflows" => "common_mlj_workflows.md", + "Model Search" => "model_search.md", + "Machines" => "machines.md", + "Evaluating Model Performance" => "evaluating_model_performance.md", + "Performance Measures" => "performance_measures.md", + "Tuning Models" => "tuning_models.md", + "Learning Curves" => "learning_curves.md", + "Built-in Transformers" => "built_in_transformers.md", + "Composing Models" => "composing_models.md", + "Homogeneous Ensembles" => "homogeneous_ensembles.md", + "Simple User Defined Models" => "simple_user_defined_models.md", + "Adding Models for General Use" => "adding_models_for_general_use.md", + "Benchmarking" => "benchmarking.md", + "Internals" => "internals.md", + "Glossary" => "glossary.md", + # "API" => "api.md", # NOTE: commented as currently empty + "MLJ Cheatsheet" => "mlj_cheatsheet.md", + "MLJ News" => "NEWS.md", + "FAQ" => "frequently_asked_questions.md", + "Julia BlogPost" => "julia_blogpost.md", + "Acceleration and Parallelism" => "acceleration_and_parallelism.md" + ] for p in pages println(first(p)) @@ -42,9 +50,9 @@ end makedocs( sitename = "MLJ", - format = Documenter.HTML(), - modules = [MLJ, MLJBase, MLJTuning, MLJModels, ScientificTypes], - pages=pages) + format = Documenter.HTML(), + modules = [MLJ, MLJBase, MLJTuning, MLJModels, MLJScientificTypes, MLJModelInterface], + pages = pages) # By default Documenter does not deploy docs just for PR # this causes issues with how we're doing things and ends diff --git "a/docs/src/\nworkflows_learning_curves.png" "b/docs/src/_old/\nworkflows_learning_curves.png" similarity index 100% rename from "docs/src/\nworkflows_learning_curves.png" rename to "docs/src/_old/\nworkflows_learning_curves.png" diff --git a/docs/src/0.jpg b/docs/src/_old/0.jpg similarity index 100% rename from docs/src/0.jpg rename to docs/src/_old/0.jpg diff --git a/docs/src/0_small.jpg b/docs/src/_old/0_small.jpg similarity index 100% rename from docs/src/0_small.jpg rename to docs/src/_old/0_small.jpg diff --git a/docs/src/ATI_logo_black.png b/docs/src/_old/ATI_logo_black.png similarity index 100% rename from docs/src/ATI_logo_black.png rename to docs/src/_old/ATI_logo_black.png diff --git a/docs/src/common_mlj_workflows.ipynb b/docs/src/_old/common_mlj_workflows.ipynb similarity index 100% rename from docs/src/common_mlj_workflows.ipynb rename to docs/src/_old/common_mlj_workflows.ipynb diff --git a/docs/src/scitypes.xml b/docs/src/_old/scitypes.xml similarity index 100% rename from docs/src/scitypes.xml rename to docs/src/_old/scitypes.xml diff --git a/docs/src/scitypes_original.png b/docs/src/_old/scitypes_original.png similarity index 100% rename from docs/src/scitypes_original.png rename to docs/src/_old/scitypes_original.png diff --git a/docs/src/tiny_demo.ipynb b/docs/src/_old/tiny_demo.ipynb similarity index 100% rename from docs/src/tiny_demo.ipynb rename to docs/src/_old/tiny_demo.ipynb diff --git a/docs/src/tour.ipynb b/docs/src/_old/tour.ipynb similarity index 100% rename from docs/src/tour.ipynb rename to docs/src/_old/tour.ipynb diff --git a/docs/src/two_model_stack.dia b/docs/src/_old/two_model_stack.dia similarity index 100% rename from docs/src/two_model_stack.dia rename to docs/src/_old/two_model_stack.dia diff --git a/docs/src/two_parameter_tuning_plot.png b/docs/src/_old/two_parameter_tuning_plot.png similarity index 100% rename from docs/src/two_parameter_tuning_plot.png rename to docs/src/_old/two_parameter_tuning_plot.png diff --git a/docs/src/workflows_learning_curves_large.png b/docs/src/_old/workflows_learning_curves_large.png similarity index 100% rename from docs/src/workflows_learning_curves_large.png rename to docs/src/_old/workflows_learning_curves_large.png diff --git a/docs/src/workflows_tuning_plot_large.png b/docs/src/_old/workflows_tuning_plot_large.png similarity index 100% rename from docs/src/workflows_tuning_plot_large.png rename to docs/src/_old/workflows_tuning_plot_large.png diff --git a/docs/src/wrapped_ridge.dia b/docs/src/_old/wrapped_ridge.dia similarity index 100% rename from docs/src/wrapped_ridge.dia rename to docs/src/_old/wrapped_ridge.dia diff --git a/docs/src/acceleration_and_parallelism.md b/docs/src/acceleration_and_parallelism.md index 874df0f06..8473b5114 100644 --- a/docs/src/acceleration_and_parallelism.md +++ b/docs/src/acceleration_and_parallelism.md @@ -8,7 +8,7 @@ subject to breaking changes during minor or major releases without warning. -### User-facing interface +## User-facing interface To enable composable, extensible acceleration of core MLJ methods, [ComputationalResources.jl](https://github.com/timholy/ComputationalResources.jl) diff --git a/docs/src/adding_models_for_general_use.md b/docs/src/adding_models_for_general_use.md index 615331b0d..b8ea5413f 100755 --- a/docs/src/adding_models_for_general_use.md +++ b/docs/src/adding_models_for_general_use.md @@ -1,24 +1,38 @@ # Adding Models for General Use -This guide outlines in detail the specification of the MLJ model interface and -provides guidelines for implementing the interface for models intended -for general use. For sample implementations, see +This guide outlines in the specification of the MLJ model interface +and provides detailed guidelines for implementing the interface for +models intended for general use. See also the more condensed +[Step-by-Step Guide for Adding Models](@ref). + +For sample implementations, see [MLJModels/src](https://github.com/alan-turing-institute/MLJModels.jl/tree/master/src). The machine learning tools provided by MLJ can be applied to the models in any package that imports the package -[MLJBase](https://github.com/alan-turing-institute/MLJBase.jl) and +[MLJModelInterface](https://github.com/alan-turing-institute/MLJModelInterface.jl) and implements the API defined there, as outlined below. For a quick-and-dirty implementation of user-defined models see [Simple User Defined Models](simple_user_defined_models.md). To make new models available to all MLJ users, see [Where to place code implementing new models](@ref). +**Important.** +[MLJModelInterface](https://github.com/alan-turing-institute/MLJModelInterface.jl) +is a very light-weight interface allowing you to *define* your +interface, but does not provide the functionality required to use or +test your interface. So, while you only need to add +`MLJModelInterface` to your project's [deps] for testing purposes, you +need to add +[MLJBase](https://github.com/alan-turing-institute/MLJBase.jl) to your project's +[extras] and [targets]. In testing, simply use `MLJBase` in place of +`MLJModelInterface`. + It is assumed the reader has read [Getting Started](index.md). To implement the API described here, some familiarity with the following packages is also helpful: -- [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) +- [MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl) (for specifying model requirements of data) - [Distributions.jl](https://github.com/JuliaStats/Distributions.jl) @@ -38,7 +52,7 @@ reading of this document, the reader may wish to refer to [MLJ Internals](internals.md) for context. -### Overview +## Overview A *model* is an object storing hyperparameters associated with some machine learning algorithm. In MLJ, hyperparameters include configuration @@ -54,7 +68,7 @@ ordinary multivariate regression, for example, this would be the coefficients and intercept. For a general supervised model, it is the (generally minimal) information needed to make new predictions. -The ultimate supertype of all models is `MLJBase.Model`, which +The ultimate supertype of all models is `MLJModelInterface.Model`, which has two abstract subtypes: ```julia @@ -83,31 +97,32 @@ a model instance and a fitresult (plus other data), are called *operations*. `Probabilistic` supervised models optionally implement a `predict_mode` operation (in the case of classifiers) or a `predict_mean` and/or `predict_median` operations (in the case of -regressors) although MLJBase also provides fallbacks that will suffice +regressors) although MLJModelInterface also provides fallbacks that will suffice in most cases. `Unsupervised` models may implement an `inverse_transform` operation. -### New model type declarations and optional clean! method +## New model type declarations and optional clean! method Here is an example of a concrete supervised model type declaration: ```julia -import MLJ +import MLJModelInterface +const MMI = MLJModelInterface -mutable struct RidgeRegressor <: MLJBase.Deterministic +mutable struct RidgeRegressor <: MMI.Deterministic lambda::Float64 end ``` Models (which are mutable) should not be given internal constructors. It is recommended that they be given an external lazy -keyword constructor of the same name. This constructor defines default values for -every field, and optionally corrects invalid field values by calling a `clean!` method -(whose fallback returns an empty message string): +keyword constructor of the same name. This constructor defines default values +for every field, and optionally corrects invalid field values by calling a +`clean!` method (whose fallback returns an empty message string): ```julia -function MLJ.clean!(model::RidgeRegressor) +function MMI.clean!(model::RidgeRegressor) warning = "" if model.lambda < 0 warning *= "Need lambda ≥ 0. Resetting lambda=0. " @@ -119,7 +134,7 @@ end # keyword constructor function RidgeRegressor(; lambda=0.0) model = RidgeRegressor(lambda) - message = MLJBase.clean!(model) + message = MMI.clean!(model) isempty(message) || @warn message return model end @@ -129,7 +144,7 @@ An alternative to declaring the model struct, clean! method and keyword constructor, is to use the `@mlj_model` macro, as in the following example: ```julia -@mlj_model mutable struct YourModel <: MLJBase.Deterministic +@mlj_model mutable struct YourModel <: MMI.Deterministic a::Float64 = 0.5::(_ > 0) b::String = "svd"::(_ in ("svd","qr")) end @@ -150,107 +165,107 @@ You cannot use the `@mlj_model` macro if your model struct has type parameters. -### Supervised models +## Supervised models The compulsory and optional methods to be implemented for each -concrete type `SomeSupervisedModel <: MLJBase.Supervised` are +concrete type `SomeSupervisedModel <: MMI.Supervised` are summarized below. An `=` indicates the return value for a fallback version of the method. -#### Summary of methods +### Summary of methods Compulsory: ```julia -MLJBase.fit(model::SomeSupervisedModel, verbosity::Integer, X, y) -> fitresult, cache, report -MLJBase.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat +MMI.fit(model::SomeSupervisedModel, verbosity::Integer, X, y) -> fitresult, cache, report +MMI.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat ``` Optional, to check and correct invalid hyperparameter values: ```julia -MLJBase.clean!(model::SomeSupervisedModel) = "" +MMI.clean!(model::SomeSupervisedModel) = "" ``` Optional, to return user-friendly form of fitted parameters: ```julia -MLJBase.fitted_params(model::SomeSupervisedModel, fitresult) = fitresult +MMI.fitted_params(model::SomeSupervisedModel, fitresult) = fitresult ``` Optional, to avoid redundant calculations when re-fitting machines associated with a model: ```julia -MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) = - MLJBase.fit(model, verbosity, X, y) +MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) = + MMI.fit(model, verbosity, X, y) ``` Optional, to specify default hyperparameter ranges (for use in tuning): ```julia -MLJBase.hyperparameter_ranges(T::Type) = Tuple(fill(nothing, length(fieldnames(T)))) +MMI.hyperparameter_ranges(T::Type) = Tuple(fill(nothing, length(fieldnames(T)))) ``` Optional, if `SomeSupervisedModel <: Probabilistic`: ```julia -MLJBase.predict_mode(model::SomeSupervisedModel, fitresult, Xnew) = +MMI.predict_mode(model::SomeSupervisedModel, fitresult, Xnew) = mode.(predict(model, fitresult, Xnew)) -MLJBase.predict_mean(model::SomeSupervisedModel, fitresult, Xnew) = +MMI.predict_mean(model::SomeSupervisedModel, fitresult, Xnew) = mean.(predict(model, fitresult, Xnew)) -MLJBase.predict_median(model::SomeSupervisedModel, fitresult, Xnew) = +MMI.predict_median(model::SomeSupervisedModel, fitresult, Xnew) = median.(predict(model, fitresult, Xnew)) ``` Required, if the model is to be registered (findable by general users): ```julia -MLJBase.load_path(::Type{<:SomeSupervisedModel}) = "" -MLJBase.package_name(::Type{<:SomeSupervisedModel}) = "Unknown" -MLJBase.package_uuid(::Type{<:SomeSupervisedModel}) = "Unknown" +MMI.load_path(::Type{<:SomeSupervisedModel}) = "" +MMI.package_name(::Type{<:SomeSupervisedModel}) = "Unknown" +MMI.package_uuid(::Type{<:SomeSupervisedModel}) = "Unknown" ``` ```julia -MLJBase.input_scitype(::Type{<:SomeSupervisedModel}) = Unknown +MMI.input_scitype(::Type{<:SomeSupervisedModel}) = Unknown ``` Strongly recommended, to constrain the form of target data passed to fit: ```julia -MLJBase.target_scitype(::Type{<:SomeSupervisedModel}) = Unknown +MMI.target_scitype(::Type{<:SomeSupervisedModel}) = Unknown ``` Optional but recommended: ```julia -MLJBase.package_url(::Type{<:SomeSupervisedModel}) = "unknown" -MLJBase.is_pure_julia(::Type{<:SomeSupervisedModel}) = false -MLJBase.package_license(::Type{<:SomeSupervisedModel}) = "unknown" +MMI.package_url(::Type{<:SomeSupervisedModel}) = "unknown" +MMI.is_pure_julia(::Type{<:SomeSupervisedModel}) = false +MMI.package_license(::Type{<:SomeSupervisedModel}) = "unknown" ``` If `SomeSupervisedModel` supports sample weights, then instead of the `fit` above, one implements ```julia -MLJBase.fit(model::SomeSupervisedModel, verbosity::Integer, X, y, w=nothing) -> fitresult, cache, report +MMI.fit(model::SomeSupervisedModel, verbosity::Integer, X, y, w=nothing) -> fitresult, cache, report ``` and, if appropriate ```julia -MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) = - MLJBase.fit(model, verbosity, X, y, w) +MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) = + MMI.fit(model, verbosity, X, y, w) ``` Additionally, if `SomeSupervisedModel` supports sample weights, one must declare ```julia -MLJBase.supports_weights(model::Type{<:SomeSupervisedModel}) = true +MMI.supports_weights(model::Type{<:SomeSupervisedModel}) = true ``` -#### The form of data for fitting and predicting +### The form of data for fitting and predicting The model implementer does not have absolute control over the types of data `X`, `y` and `Xnew` appearing in the `fit` and `predict` methods @@ -266,38 +281,38 @@ MLJ recommendation is to specify a `Table` scientific type for `X` matrix input can coerce their inputs appropriately; see below. -##### Additional type coercions +#### Additional type coercions If the core algorithm being wrapped requires data in a different or more specific form, then `fit` will need to coerce the table into the form desired (and the same coercions applied to `X` will have to be repeated for `Xnew` in `predict`). To assist with common cases, MLJ provides the convenience method -`MLJBase.matrix`. `MLJBase.matrix(Xtable)` has type `Matrix{T}` where +`MMI.matrix`. `MMI.matrix(Xtable)` has type `Matrix{T}` where `T` is the tightest common type of elements of `Xtable`, and `Xtable` -is any table. +is any table. -Other auxiliary methods provided by MLJBase for handling tabular data +Other auxiliary methods provided by MLJModelInterface for handling tabular data are: `selectrows`, `selectcols`, `select` and `schema` (for extracting the size, names and eltypes of a table's columns). See [Convenience methods](@ref) below for details. -##### Important convention +#### Important convention It is to be understood that the columns of the table `X` correspond to features and the rows to observations. So, for example, the predict method for a linear regression model might look like `predict(model, -w, Xnew) = MLJBase.matrix(Xnew)*w`, where `w` is the vector of learned +w, Xnew) = MMI.matrix(Xnew)*w`, where `w` is the vector of learned coefficients. -#### The fit method +### The fit method A compulsory `fit` method returns three objects: ```julia -MLJBase.fit(model::SomeSupervisedModel, verbosity::Int, X, y) -> fitresult, cache, report +MMI.fit(model::SomeSupervisedModel, verbosity::Int, X, y) -> fitresult, cache, report ``` *Note.* The `Int` typing of `verbosity` cannot be omitted. @@ -338,11 +353,11 @@ generally avoid doing any of its own logging. above `fit`: ```julia -MLJBase.fit(model::SomeSupervisedModel, verbosity::Int, X, y, w=nothing) -> fitresult, cache, report +MMI.fit(model::SomeSupervisedModel, verbosity::Int, X, y, w=nothing) -> fitresult, cache, report ``` -#### The fitted_params method +### The fitted_params method A `fitted_params` method may be optionally overloaded. It's purpose is to provide MLJ access to a user-friendly representation of the @@ -350,7 +365,7 @@ learned parameters of the model (as opposed to the hyperparameters). They must be extractable from `fitresult`. ```julia -MLJBase.fitted_params(model::SomeSupervisedModel, fitresult) -> friendly_fitresult::NamedTuple +MMI.fitted_params(model::SomeSupervisedModel, fitresult) -> friendly_fitresult::NamedTuple ``` For a linear model, for example, one might declare something like @@ -359,16 +374,16 @@ For a linear model, for example, one might declare something like The fallback is to return `(fitresult=fitresult,)`. -#### The predict method +### The predict method A compulsory `predict` method has the form ```julia -MLJBase.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat +MMI.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat ``` Here `Xnew` will have the same form as the `X` passed to `fit`. -##### Prediction types for deterministic responses. +#### Prediction types for deterministic responses. In the case of `Deterministic` models, `yhat` should have the same scitype as the `y` passed to `fit` (see above). Any `CategoricalValue` @@ -381,7 +396,7 @@ MLJ.classes(y[j])` for all admissible `i` and `j`. (The method `classes` is described under [Convenience methods](@ref) below). Unfortunately, code not written with the preservation of categorical -levels in mind poses special problems. To help with this, MLJBase +levels in mind poses special problems. To help with this, MLJModelInterface provides three utility methods: `int` (for converting a `CategoricalValue` or `CategoricalString` into an integer, the ordering of these integers being consistent with that of the pool), @@ -399,10 +414,10 @@ nominal target `yint` of type `Vector{<:Integer}` then a `fit` method may look something like this: ```julia -function MLJBase.fit(model::SomeSupervisedModel, verbosity, X, y) - yint = MLJBase.int(y) +function MMI.fit(model::SomeSupervisedModel, verbosity, X, y) + yint = MMI.int(y) a_target_element = y[1] # a CategoricalValue/String - decode = MLJBase.decoder(a_target_element) # can be called on integers + decode = MMI.decoder(a_target_element) # can be called on integers core_fitresult = SomePackage.fit(X, yint, verbosity=verbosity) @@ -416,7 +431,7 @@ end while a corresponding deterministic `predict` operation might look like this: ```julia -function MLJBase.predict(model::SomeSupervisedModel, fitresult, Xnew) +function MMI.predict(model::SomeSupervisedModel, fitresult, Xnew) decode, core_fitresult = fitresult yhat = SomePackage.predict(core_fitresult, Xnew) return decode.(yhat) # or decode(yhat) also works @@ -430,18 +445,18 @@ for `SVMClassifier`. Of course, if you are coding a learning algorithm from scratch, rather than wrapping an existing one, these extra measures may be unnecessary. -##### Prediction types for probabilistic responses +#### Prediction types for probabilistic responses In the case of `Probabilistic` models with univariate targets, `yhat` must be an `AbstractVector` whose elements are distributions (one distribution per row of `Xnew`). Presently, a *distribution* is any object `d` for which -`MLJBase.isdistribution(::d) = true`, which is currently restricted to +`MMI.isdistribution(::d) = true`, which is currently restricted to objects subtyping `Distributions.Sampleable` from the package Distributions.jl. -Use the distribution `MLJBase.UnivariateFinite` for `Probabilistic` +Use the distribution `MMI.UnivariateFinite` for `Probabilistic` models predicting a target with `Finite` scitype (classifiers). In this case each element of the training target `y` is a `CategoricalValue` or `CategoricalString`, as in this contrived example: @@ -458,7 +473,7 @@ we need it); this is accessible using the convenience method ```julia julia> yes = y[1] -julia> levels = MLJBase.classes(yes) +julia> levels = MMI.classes(yes) 3-element Array{CategoricalValue{Symbol,UInt32},1}: :maybe :no @@ -471,7 +486,7 @@ y[1]` and `no = y[2]` are to be assigned respective probabilities of follows: ```julia -julia> d = MLJBase.UnivariateFinite([yes, no], [0.2, 0.8]) +julia> d = MMI.UnivariateFinite([yes, no], [0.2, 0.8]) UnivariateFinite(:yes=>0.2, :maybe=>0.0, :no=>0.8) julia> pdf(d, yes) @@ -489,21 +504,21 @@ for an example of a Probabilistic classifier implementation. ```@docs -MLJBase.UnivariateFinite +MMI.UnivariateFinite ``` -*Important note on binary classifiers.* ScientificTypes.jl has no -"Binary" scitype distinct from `Multiclass{2}` or `OrderedFactor{2}`; -`Binary` is just an alias for -`Union{Multiclass{2},OrderedFactor{2}}`. The `target_scitype` of a -binary classifier will generally be `AbstractVector{<:Binary}` and -according to the *mlj* scitype convention, elements of `y` have type -`CategoricalValue` or `CategoricalString`, and *not* `Bool`. See +*Important note on binary classifiers.* There is no "Binary" scitype +distinct from `Multiclass{2}` or `OrderedFactor{2}`; `Binary` is just +an alias for `Union{Multiclass{2},OrderedFactor{2}}`. The +`target_scitype` of a binary classifier will generally be +`AbstractVector{<:Binary}` and according to the *mlj* scitype +convention, elements of `y` have type `CategoricalValue` or +`CategoricalString`, and *not* `Bool`. See [BinaryClassifier](https://github.com/alan-turing-institute/MLJModels.jl/blob/master/src/GLM.jl) for an example. -#### Trait declarations +### Trait declarations Two trait functions allow the implementer to restrict the types of data `X`, `y` and `Xnew` discussed above. The MLJ task interface uses @@ -513,7 +528,7 @@ attempt to use your model with inappropriately typed data. The trait functions `input_scitype` and `target_scitype` take scientific data types as values. We assume here familiarity with -[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) +[MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl) (see [Getting Started](index.md) for the basics). For example, to ensure that the `X` presented to the @@ -521,14 +536,13 @@ For example, to ensure that the `X` presented to the (and hence `AbstractFloat` machine type), one declares ```julia -MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = MLJBase.Table(MLJBase.Continuous) +MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = MMI.Table(MMI.Continuous) ``` or, equivalently, ```julia -using ScientificTypes -MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Continuous) +MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Continuous) ``` If, instead, columns were allowed to have either: (i) a mixture of `Continuous` and `Missing` @@ -536,7 +550,7 @@ values, or (ii) `Count` (i.e., integer) values, then the declaration would be ```julia -MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Union{Continuous,Missing},Count) +MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Union{Continuous,Missing},Count) ``` Similarly, to ensure the target is an AbstractVector whose elements @@ -544,10 +558,10 @@ have `Finite` scitype (and hence `CategoricalValue` or `CategoricalString` machine type) we declare ```julia -MLJBase.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:Finite} +MMI.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:Finite} ``` -##### Multivariate targets +#### Multivariate targets The above remarks continue to hold unchanged for the case multivariate targets. For example, if we declare @@ -556,14 +570,14 @@ targets. For example, if we declare target_scitype(SomeSupervisedModel) = Table(Continuous) ``` -then this constrains the target to be any table whose columns have `Continous` element scitype (i.e., `AbstractFloat`), while +then this constrains the target to be any table whose columns have `Continous` element scitype (i.e., `AbstractFloat`), while ```julia target_scitype(SomeSupervisedModel) = Table(Continuous, Finite{2}) ``` restricts to tables with continuous or binary (ordered or unordered) -columns. +columns. For predicting variable length sequences of, say, binary values (`CategoricalValue`s or `CategoricalString`s with some common size-two @@ -623,7 +637,7 @@ end you might declare (order matters): ```julia -MLJBase.hyperparameter_ranges(::Type{<:MyModel}) = +MMI.hyperparameter_ranges(::Type{<:MyModel}) = (range(Float64, :alpha, lower=0, upper=1, scale=:log), range(Int, :beta, lower=1, upper=Inf, origin=100, unit=50, scale=:log), nothing) @@ -633,50 +647,50 @@ Here is the complete list of trait function declarations for `DecisionTreeClassi ([source](https://github.com/alan-turing-institute/MLJModels.jl/blob/master/src/DecisionTree.jl)): ```julia -MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = MLJBase.Table(MLJBase.Continuous) -MLJBase.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:MLJBase.Finite} -MLJBase.load_path(::Type{<:DecisionTreeClassifier}) = "MLJModels.DecisionTree_.DecisionTreeClassifier" -MLJBase.package_name(::Type{<:DecisionTreeClassifier}) = "DecisionTree" -MLJBase.package_uuid(::Type{<:DecisionTreeClassifier}) = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" -MLJBase.package_url(::Type{<:DecisionTreeClassifier}) = "https://github.com/bensadeghi/DecisionTree.jl" -MLJBase.is_pure_julia(::Type{<:DecisionTreeClassifier}) = true +MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = MMI.Table(MMI.Continuous) +MMI.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:MMI.Finite} +MMI.load_path(::Type{<:DecisionTreeClassifier}) = "MLJModels.DecisionTree_.DecisionTreeClassifier" +MMI.package_name(::Type{<:DecisionTreeClassifier}) = "DecisionTree" +MMI.package_uuid(::Type{<:DecisionTreeClassifier}) = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" +MMI.package_url(::Type{<:DecisionTreeClassifier}) = "https://github.com/bensadeghi/DecisionTree.jl" +MMI.is_pure_julia(::Type{<:DecisionTreeClassifier}) = true ``` -Alternatively these traits can also be declared using `MLJBase.metadata_pkg` and `MLJBase.metadata_model` helper functions as: +Alternatively these traits can also be declared using `MMI.metadata_pkg` and `MMI.metadata_model` helper functions as: ```julia -MLJBase.metadata_pkg(DecisionTreeClassifier,name="DecisionTree", +MMI.metadata_pkg(DecisionTreeClassifier,name="DecisionTree", uuid="7806a523-6efd-50cb-b5f6-3fa6f1930dbb", url="https://github.com/bensadeghi/DecisionTree.jl", julia=true) - -MLJBase.metadata_model(DecisionTreeClassifier, - input=MLJBase.Table(MLJBase.Continuous), - target=AbstractVector{<:MLJBase.Finite}, + +MMI.metadata_model(DecisionTreeClassifier, + input=MMI.Table(MMI.Continuous), + target=AbstractVector{<:MMI.Finite}, path="MLJModels.DecisionTree_.DecisionTreeClassifier") ``` ```@docs -MLJBase.metadata_pkg +MMI.metadata_pkg ``` ```@docs -MLJBase.metadata_model +MMI.metadata_model ``` You can test all your declarations of traits by calling `MLJBase.info_dict(SomeModel)`. -#### Iterative models and the update! method +### Iterative models and the update! method An `update` method may be optionally overloaded to enable a call by MLJ to retrain a model (on the same training data) to avoid repeating -computations unnecessarily. +computations unnecessarily. ```julia -MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) -> fit +MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) -> fit result, cache, report -MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) -> fit +MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) -> fit result, cache, report ``` @@ -685,7 +699,7 @@ sample weights. If an MLJ `Machine` is being `fit!` and it is not the first time, then `update` is called instead of `fit`, unless the machine `fit!` has -been called with a new `rows` keyword argument. However, `MLJBase` +been called with a new `rows` keyword argument. However, `MLJModelInterface` defines a fallback for `update` which just calls `fit`. For context, see [MLJ Internals](internals.md). @@ -699,7 +713,7 @@ generally relevant use-case is iterative models, where calls to increase the number of iterations only restarts the iterative procedure if other hyperparameters have also changed. (A useful method for inspecting model changes in such cases is -`MLJBase.is_same_except`. ) For an example, see the MLJ [ensemble +`MLJModelInterface.is_same_except`. ) For an example, see the MLJ [ensemble code](https://github.com/alan-turing-institute/MLJ.jl/blob/master/src/ensembles.jl). A third use-case is to avoid repeating time-consuming preprocessing of @@ -713,7 +727,7 @@ of `X` and `y`), as this is also passed as an argument to the `update` method. -### Unsupervised models +## Unsupervised models TODO @@ -724,38 +738,38 @@ declares an `output_scitype` trait. Instead of implementing a optional `inverse_transform` operation. -### Convenience methods +## Convenience methods ```@docs -MLJBase.int +MLJModelInterface.int ``` ```@docs -MLJBase.classes +MLJModelInterface.classes ``` ```@docs -MLJBase.decoder +MLJModelInterface.decoder ``` ```@docs -MLJBase.matrix +MLJModelInterface.matrix ``` ```@docs -MLJBase.table +MLJModelInterface.table ``` ```@docs -MLJBase.select +MLJModelInterface.select ``` ```@docs -MLJBase.selectrows +MLJModelInterface.selectrows ``` ```@docs -MLJBase.selectcols +MLJModelInterface.selectcols ``` ```@docs @@ -771,23 +785,23 @@ MLJBase.complement ``` ```@docs -ScientificTypes.schema +MLJScientificTypes.schema ``` ```@docs -MLJBase.nrows +MLJModelInterface.nrows ``` ```@docs -ScientificTypes.scitype +MLJScientificTypes.scitype ``` ```@docs -ScientificTypes.scitype_union +MLJScientificTypes.scitype_union ``` ```@docs -ScientificTypes.elscitype +MLJScientificTypes.elscitype ``` @@ -836,10 +850,18 @@ registration. If changes are made, lodge an new issue at [MLJ](https://github.com/alan-turing-institute/MLJ) requesting your changes to be updated. -### How addd model to the MLJ model registry? +### How add model to the MLJ model registry? -The MLJ model registry is located in the [MLJModels.jl repository](https://github.com/alan-turing-institute/MLJModels.jl). To add a model, you need to follow these steps +The MLJ model registry is located in the [MLJModels.jl +repository](https://github.com/alan-turing-institute/MLJModels.jl). To +add a model, you need to follow these steps 1) Ensure your model conforms to the interface defined above -2) Raise an issue at https://github.com/alan-turing-institute/MLJModels.jl/issues and point out where the MLJ-interface implementation is, e.g. by providing a link to the code. -3) An administrator will then review your implementation and work with you to add the model to the registry + +2) Raise an issue at +https://github.com/alan-turing-institute/MLJModels.jl/issues and point +out where the MLJ-interface implementation is, e.g. by providing a +link to the code. + +3) An administrator will then review your implementation and work with +you to add the model to the registry diff --git a/docs/src/api.md b/docs/src/api.md index 8d8964b7a..b5e9504de 100755 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -1,12 +1,9 @@ # API -### Functions +## Functions - - - +TODO -### Index +## Index -```@index -``` +TODO diff --git a/docs/src/built_in_transformers.md b/docs/src/built_in_transformers.md index 481443f62..258445e33 100644 --- a/docs/src/built_in_transformers.md +++ b/docs/src/built_in_transformers.md @@ -1,6 +1,5 @@ # Built-in Transformers - ```@docs MLJModels.UnivariateStandardizer MLJModels.Standardizer diff --git a/docs/src/common_mlj_workflows.md b/docs/src/common_mlj_workflows.md index 400057bad..ab08cf678 100644 --- a/docs/src/common_mlj_workflows.md +++ b/docs/src/common_mlj_workflows.md @@ -327,7 +327,7 @@ Bound the wrapped model to data: tuned = machine(tuned_forest, X, y) ``` -Fitting the resultant machine optimizes the hyperaparameters specified +Fitting the resultant machine optimizes the hyperparameters specified in `range`, using the specified `tuning` and `resampling` strategies and performance `measure` (possibly a vector of measures), and retrains on all data bound to the machine: @@ -359,7 +359,7 @@ using Plots plot(tuned) ``` -![](workflows_tuning_plot.png) +![](img/workflows_tuning_plot.png) Predicting on new data using the optimized model: @@ -367,7 +367,7 @@ Predicting on new data using the optimized model: predict(tuned, Xnew) ``` -# Constructing a linear pipeline +## Constructing a linear pipeline *Reference:* [Composing Models](composing_models.md) @@ -403,7 +403,7 @@ pipe2 = @pipeline MyPipe2(X -> coerce(X, :age=>Continuous), inverse = z -> exp.(z)) ``` -# Creating a homogeneous ensemble of models +## Creating a homogeneous ensemble of models *Reference:* [Homogeneous Ensembles](homogeneous_ensembles.md) @@ -415,7 +415,7 @@ forest = machine(forest_model, X, y) evaluate!(forest, measure=cross_entropy) ``` -# Performance curves +## Performance curves Generate a plot of performance, as a function of some hyperparameter (building on the preceding example) @@ -432,12 +432,12 @@ curve = learning_curve(forest, verbosity=0) ``` - ```julia +```julia using Plots plot(curve.parameter_values, curve.measurements, xlab=curve.parameter_name, xscale=curve.parameter_scale) ``` -![](workflows_learning_curve.png) +![](img/workflows_learning_curve.png) Multiple curves: @@ -453,8 +453,8 @@ curve = learning_curve(forest, ``` ```julia -plot(curve.parameter_values, curve.measurements, +plot(curve.parameter_values, curve.measurements, xlab=curve.parameter_name, xscale=curve.parameter_scale) ``` -![](workflows_learning_curves.png) +![](img/workflows_learning_curves.png) diff --git a/docs/src/composing_models.md b/docs/src/composing_models.md index 378613774..c3abb4515 100644 --- a/docs/src/composing_models.md +++ b/docs/src/composing_models.md @@ -7,13 +7,12 @@ these learning networks can be applied directly to learning tasks, they are more commonly used to specify new re-usable, stand-alone, composite model types, that behave like any other model type. The main novelty of composite models is that they include other models as -hyper-parameters. +hyper-parameters. That said, MLJ also provides dedicated syntax for the most common composition use-cases, which are described first below. A description of the general framework begins at [Learning Networks](@ref). - ## Linear pipelines In MLJ a *pipeline* is a composite model in which models are chained @@ -25,7 +24,6 @@ To illustrate basic construction of a pipeline, consider the following toy data: ```@setup 7 -import Base.eval using MLJ MLJ.color_off() ``` @@ -164,7 +162,7 @@ extra flexibility is essential. ### Building a simple learning network -![](wrapped_ridge.png) +![](img/wrapped_ridge.png) The diagram above depicts a learning network which standardizes the input data `X`, learns an optimal Box-Cox transformation for the diff --git a/docs/src/evaluating_model_performance.md b/docs/src/evaluating_model_performance.md index d00d99a2c..bc89bb5a5 100644 --- a/docs/src/evaluating_model_performance.md +++ b/docs/src/evaluating_model_performance.md @@ -1,9 +1,9 @@ # Evaluating Model Performance MLJ allows quick evaluation of a supervised model's performance -against a battery of selected losses or scores. For more on available -performance measures, see [Performance -Measures](performance_measures.md). +against a battery of selected losses or scores. +For more on available performance measures, see +[Performance Measures](performance_measures.md). In addition to hold-out and cross-validation, the user can specify their own list of train/test pairs of row indices for resampling, or @@ -12,13 +12,11 @@ define their own re-usable resampling strategies. For simultaneously evaluating *multiple* models and/or data sets, see [Benchmarking](benchmarking.md). - -### Evaluating against a single measure +## Evaluating against a single measure ```@setup evaluation_of_supervised_models -import Base.eval using MLJ -MLJ.color_off() +MLJ.color_off() ``` ```@repl evaluation_of_supervised_models @@ -42,7 +40,7 @@ evaluate!(mach, resampling=cv, measure=l2, verbosity=0) (The latter call is a mutating call as the learned parameters stored in the machine potentially change. ) -### Multiple measures +## Multiple measures ```@repl evaluation_of_supervised_models evaluate!(mach, @@ -50,7 +48,7 @@ evaluate!(mach, measure=[l1, rms, rmslp1], verbosity=0) ``` -### Custom measures and weighted measures +## Custom measures and weighted measures ```@repl evaluation_of_supervised_models my_loss(yhat, y) = maximum((yhat - y).^2); @@ -71,7 +69,7 @@ evaluate!(mach, weights=weights, verbosity=0) ``` -### User-specified train/test sets +## User-specified train/test sets Users can either provide their own list of train/test pairs of row indices for resampling, as in this example: @@ -86,7 +84,7 @@ Or define their own re-usable `ResamplingStrategy` objects, - see [Custom resampling strategies](@ref) below. -### Built-in resampling strategies +## Built-in resampling strategies ```@docs @@ -102,7 +100,7 @@ MLJBase.StratifiedCV ``` -### Custom resampling strategies +## Custom resampling strategies To define your own resampling strategy, make relevant parameters of your strategy the fields of a new type `MyResamplingStrategy <: @@ -113,6 +111,7 @@ MLJ.train_test_pairs(my_strategy::MyResamplingStrategy, rows) MLJ.train_test_pairs(my_strategy::MyResamplingStrategy, rows, y) MLJ.train_test_pairs(my_strategy::MyResamplingStrategy, rows, X, y) ``` + Each method takes a vector of indices `rows` and return a vector `[(t1, e1), (t2, e2), ... (tk, ek)]` of train/test pairs of row indices selected from `rows`. Here `X`, `y` are the input and target @@ -154,7 +153,7 @@ function train_test_pairs(holdout::Holdout, rows) end ``` -### API +## API ```@docs MLJBase.evaluate! diff --git a/docs/src/frequently_asked_questions.md b/docs/src/frequently_asked_questions.md index a4fa5fefb..32e98f27d 100755 --- a/docs/src/frequently_asked_questions.md +++ b/docs/src/frequently_asked_questions.md @@ -1,6 +1,6 @@ # Frequently Asked Questions -### Julia already has a great machine learning toolbox, ScitkitLearn.jl. Why MLJ? +## Julia already has a great machine learning toolbox, ScitkitLearn.jl. Why MLJ? An alternative machine learning toolbox for Julia users is [ScikitLearn.jl](https://github.com/cstjean/ScikitLearn.jl). Initially @@ -27,14 +27,14 @@ term: hyperparameters, using automatic differentiation libraries such as Flux.jl; and (ii) GPU performance boosts without major code refactoring, using CuArrays.jl. - + - **Registry for model metadata.** In ScikitLearn.jl the list of available models, as well as model metadata (whether a model handles categorical inputs, whether is can make probabilistic predictions, etc) must be gleaned from documentation. In MLJ, this information is more structured and is accessible to MLJ via a searchable model registry (without the models needing to be loaded). - + - **Flexible API for model composition.** Pipelines in scikit-learn are more of an afterthought than an integral part of the original design. By contrast, MLJ's user-interaction API was predicated on the @@ -55,7 +55,7 @@ term: [skpro](https://github.com/alan-turing-institute/skpro) project, MLJ aims to improve support for Bayesian statistics and probabilistic graphical models. - + - **Universal adoption of categorical data types.** Python's scientific array library NumPy has no dedicated data type for representing categorical data (i.e., no type that tracks the pool of @@ -71,7 +71,6 @@ term: probabilistic prediction will nevertheless predict a distribution whose support includes the missing class, but which is appropriately weighted with probability zero. - + Finally, we note that a large number of ScikitLearn.jl models are now wrapped for use in MLJ. - diff --git a/docs/src/glossary.md b/docs/src/glossary.md index 032e31fba..9fd8160b5 100755 --- a/docs/src/glossary.md +++ b/docs/src/glossary.md @@ -2,16 +2,15 @@ Note: This glossary includes some detail intended mainly for MLJ developers. -### Basics +## Basics -#### task (object of type `Task`) +### task (object of type `Task`) Data plus a learning objective (e.g., "probabilistic prediction of Sales"). In MLJ a task does not include a description of how the completed task is to be evaluated. - -#### hyperparameters +### hyperparameters Parameters on which some learning algorithm depends, specified before the algorithm is applied, and where learning is interpreted in the @@ -20,16 +19,16 @@ broadest sense. For example, PCA feature reduction is a data, governed by a dimension hyperparameter. Hyperparameters in our sense may specify configuration (eg, number of parallel processes) even when this does not effect the end-product of learning. (But we -exlcude verbosity level.) +exclude verbosity level.) -#### model (object of abstract type `Model`) +### model (object of abstract type `Model`) Object collecting together hyperameters of a single algorithm. Most models are classified either as *supervised* or *unsupervised* models (generally, "transformers"). -#### fit-result (type generally defined outside of MLJ) +### fit-result (type generally defined outside of MLJ) Also known as "learned" or "fitted" parameters, these are "weights", "coefficients", or similar paramaters learned by an algorithm, after @@ -38,7 +37,7 @@ of a random forest, the coefficients and intercept of a linear model, or the rotation and projection matrices of PCA reduction scheme. -#### operation +### operation Data-manipulating operations (methods) parameterized by some fit-result. For supervised learners, the `predict`, `predict_mean`, @@ -49,12 +48,11 @@ on a fit-result (e.g., a broadcasted logarithm) which is then called *static* operation for clarity. An operation that is not static is *dynamic*. - -#### machine (object of type `Machine`) +### machine (object of type `Machine`) An object consisting of: -(1) A model +(1) A model (2) A fit-result (undefined until training) @@ -73,19 +71,17 @@ Machines are trained by calls to a `fit` method which may be passed an optional argument specifying the rows of data to be used in training. - -### Learning Networks and Composite Models +## Learning Networks and Composite Models *Note:* Multiple nodal machines may share the same model, and multiple learning nodes may share the same nodal machine. -#### source node (object of type `Source`) +### source node (object of type `Source`) A container for training data and point of entry for new data in a learning network (see below). - -#### nodal machine (object of type `NodalMachine`) +### nodal machine (object of type `NodalMachine`) Like a machine with the following exceptions: @@ -93,40 +89,33 @@ Like a machine with the following exceptions: in the learning network, instead of data. (2) The object internally records dependencies on other other nodal -machines, as implied by the training arguments, and so on. +machines, as implied by the training arguments, and so on. -#### node (object of type `Node`) +### node (object of type `Node`) Essentially a nodal machine wrapped in an associated operation (e.g., `predict` or `inverse_transform`). It detail, it consists of: -(1) An operation, static or dynamic. +1. An operation, static or dynamic. +1. A nodal machine, void if the operation is static. +1. Upstream connections to other learning or source nodes, specified by a list of *arguments* (one for each argument of the operation). +1. Metadata recording the dependencies of the object's machine, and the dependecies on other nodal machines implied by its arguments, and the training arguments of its nodel machine. -(2) A nodal machine, void if the operation is static. -(3) Upstream connections to other learning or source nodes, specified by a list - of *arguments* (one for each argument of the operation). - -(4) Metadata recording the dependencies of the object's machine, and -the dependecies on other nodal machines implied by its -arguments, and the training arguments of its nodel machine. - - -#### learning network +### learning network An acyclic directed graph implicit in the connections of a collection of source(s) and nodes. Each connected component is ordinarily restricted to have a unique source. -#### wrapper +### wrapper Any model with one or more other models as hyperparameters. -#### composite model +### composite model Any wrapper, or any learning network, "exported" as a model (see [Composing Models](composing_models.md)). - diff --git a/docs/src/MLPackages.png b/docs/src/img/MLPackages.png similarity index 100% rename from docs/src/MLPackages.png rename to docs/src/img/MLPackages.png diff --git a/docs/src/heatmap.png b/docs/src/img/heatmap.png similarity index 100% rename from docs/src/heatmap.png rename to docs/src/img/heatmap.png diff --git a/docs/src/learning_curve42.png b/docs/src/img/learning_curve42.png similarity index 100% rename from docs/src/learning_curve42.png rename to docs/src/img/learning_curve42.png diff --git a/docs/src/learning_curve_n.png b/docs/src/img/learning_curve_n.png similarity index 100% rename from docs/src/learning_curve_n.png rename to docs/src/img/learning_curve_n.png diff --git a/docs/src/learningcurves.png b/docs/src/img/learningcurves.png similarity index 100% rename from docs/src/learningcurves.png rename to docs/src/img/learningcurves.png diff --git a/docs/src/scitypes.png b/docs/src/img/scitypes.png similarity index 100% rename from docs/src/scitypes.png rename to docs/src/img/scitypes.png diff --git a/docs/src/scitypes_small.png b/docs/src/img/scitypes_small.png similarity index 100% rename from docs/src/scitypes_small.png rename to docs/src/img/scitypes_small.png diff --git a/docs/src/tuning_plot.png b/docs/src/img/tuning_plot.png similarity index 100% rename from docs/src/tuning_plot.png rename to docs/src/img/tuning_plot.png diff --git a/docs/src/two_model_stack.png b/docs/src/img/two_model_stack.png similarity index 100% rename from docs/src/two_model_stack.png rename to docs/src/img/two_model_stack.png diff --git a/docs/src/workflows_learning_curve.png b/docs/src/img/workflows_learning_curve.png similarity index 100% rename from docs/src/workflows_learning_curve.png rename to docs/src/img/workflows_learning_curve.png diff --git a/docs/src/workflows_learning_curves.png b/docs/src/img/workflows_learning_curves.png similarity index 100% rename from docs/src/workflows_learning_curves.png rename to docs/src/img/workflows_learning_curves.png diff --git a/docs/src/workflows_tuning_plot.png b/docs/src/img/workflows_tuning_plot.png similarity index 100% rename from docs/src/workflows_tuning_plot.png rename to docs/src/img/workflows_tuning_plot.png diff --git a/docs/src/wrapped_ridge.png b/docs/src/img/wrapped_ridge.png similarity index 100% rename from docs/src/wrapped_ridge.png rename to docs/src/img/wrapped_ridge.png diff --git a/docs/src/index.md b/docs/src/index.md index dab909305..301ba05d9 100755 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -5,15 +5,14 @@ ```@setup doda -import Base.eval # hack b/s auto docs put's code in baremodule -import Random.seed! +import Random.seed! using MLJ using InteractiveUtils MLJ.color_off() -seed!(1234) +seed!(1234) ``` -### Choosing and evaluating a model +## Choosing and evaluating a model To load some demonstration data, add [RDatasets](https://github.com/JuliaStats/RDatasets.jl) to your load @@ -43,7 +42,7 @@ learning algorithm indicated by the struct name. Assuming the DecisionTree.jl package is in your load path, we can use `@load` to load the code defining the `DecisionTreeClassifier` model type. This macro also returns an instance, with default -hyperparameters. +hyperparameters. Drop the `verbosity=1` declaration for silent loading: @@ -59,7 +58,7 @@ how to add the package to your current environment. Once loaded, a model can be evaluated with the `evaluate` method: ```@repl doda -evaluate(tree_model, X, y, +evaluate(tree_model, X, y, resampling=CV(shuffle=true), measure=cross_entropy, verbosity=0) ``` @@ -67,7 +66,7 @@ Evaluating against multiple performance measures is also possible. See [Evaluating Model Performance](evaluating_model_performance.md) for details. -### A preview of data type specification in MLJ +## A preview of data type specification in MLJ The target `y` above is a categorical vector, which is appropriate because our model is a decision tree *classifier*: @@ -109,13 +108,13 @@ yint = Int.(y.refs); scitype(yint) ``` -and using `yint` in place of `y` in classification problems will fail. +and using `yint` in place of `y` in classification problems will fail. For more on scientific types, see [Data containers and scientific types](@ref) below. -### Fit and predict +## Fit and predict To illustrate MLJ's fit and predict interface, let's perform our performance evaluations by hand, but using a simple holdout set, @@ -193,8 +192,7 @@ evaluate!(tree, resampling=Holdout(fraction_train=0.7, shuffle=true), verbosity=0) ``` - -### Next steps +## Next steps To learn a little more about what MLJ can do, browse [Common MLJ Workflows](common_mlj_workflows.md) or MLJ's @@ -203,7 +201,7 @@ returning to the manual as needed. *Read at least the remainder of this page before considering serious use of MLJ.* -### Prerequisites +## Prerequisites MLJ assumes some familiarity with the `CategoricalValue` and `CategoricalString` types from @@ -214,14 +212,14 @@ predictors, a basic acquaintance with also assumed. -### Data containers and scientific types +## Data containers and scientific types The MLJ user should acquaint themselves with some basic assumptions about the form of data expected by MLJ, as outlined -below. +below. ``` -machine(model::Supervised, X, y) +machine(model::Supervised, X, y) machine(model::Unsupervised, X) ``` @@ -232,26 +230,28 @@ as `Array{Float32, 2}`). Similar remarks apply to the input `X` of an unsupervised model. Scientific types are julia types defined in the package -[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl), -which also defines the convention used here (and there called *mlj*) -for assigning a specific scientific type (interpretation) to each -julia object (see the `scitype` examples below). +[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl); +the package +[MLJScientificTypes](https://github.com/alan-turing-institute/MLJScientificTypes.jl) +implements the particular convention used in the MLJ universe for +assigning a specific scientific type (interpretation) to each julia +object (see the `scitype` examples below). The basic "scalar" scientific types are `Continuous`, `Multiclass{N}`, `OrderedFactor{N}` and `Count`. Be sure you read [Container element types](@ref) below to be guarantee your scalar data is interpreted correctly. Tools exist to coerce the data to have the appropriate scientfic type; see -[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) +[MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl) or run `?coerce` for details. - + Additionally, most data containers - such as tuples, vectors, matrices and tables - have a scientific type. -![](scitypes.png) +![](img/scitypes.png) -*Figure 1. Part of the scientific type heirarchy in* ScientificTypes.jl. +*Figure 1. Part of the scientific type hierarchy in* [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl). ```@repl doda scitype(4.6) @@ -262,7 +262,7 @@ X = (x1=x1, x2=rand(4), x3=rand(4)) # a "column table" scitype(X) ``` -#### Tabular data +### Tabular data All data containers compatible with the [Tables.jl](https://github.com/JuliaData/Tables.jl) interface (which @@ -275,8 +275,7 @@ of the columns, which can be individually inspected using `schema`: schema(X) ``` - -#### Inputs +### Inputs Since an MLJ model only specifies the scientific type of data, if that type is `Table` - which is the case for the majority of MLJ models - @@ -288,31 +287,27 @@ MLJ.table(Xmatrix)`. Specifically, the requirement for an arbitrary model's input is `scitype(X) <: input_scitype(model)`. - -#### Targets +### Targets The target `y` expected by MLJ models is generally an -`AbstractVector`. A multivariate target `y` will generally be table. +`AbstractVector`. A multivariate target `y` will generally be table. Specifically, the type requirement for a model target is `scitype(y) <: target_scitype(model)`. -#### Querying a model for acceptable data types +### Querying a model for acceptable data types Given a model instance, one can inspect the admissible scientific types of its input and target by querying the scientific type of the model itself: - + ```@setup doda tree = @load DecisionTreeClassifier ``` -```@julia doda -julia> tree = DecisionTreeClassifier(); -julia> scitype(tree) -(input_scitype = ScientificTypes.Table{#s13} where #s13<:(AbstractArray{#s12,1} where #s12<:Continuous), - target_scitype = AbstractArray{#s21,1} where #s21<:Finite, - is_probabilistic = true,) +```@repl doda +tree = DecisionTreeClassifier(); +scitype(tree) ``` This does not work if relevant model code has not been loaded. In that @@ -324,26 +319,26 @@ info("DecisionTreeClassifier") ``` -#### Container element types +### Container element types -Models in MLJ will always apply the *mlj* convention described in -[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) +Models in MLJ will always apply the `MLJ` convention described in +[MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl) to decide how to interpret the elements of your container types. Here -are the key aspects of that convention: +are the key features of that convention: - Any `AbstractFloat` is interpreted as `Continuous`. -- Any `Integer` is interpreted as `Count`. +- Any `Integer` is interpreted as `Count`. - Any `CategoricalValue` or `CategoricalString`, `x`, is interpreted as `Multiclass` or `OrderedFactor`, depending on the value of - `x.pool.ordered`. - + `x.pool.ordered`. + - `String`s and `Char`s are *not* interpreted as `Finite`; they have `Unknown` scitype. Coerce vectors of strings or characters to `CategoricalVector`s if they represent `Multiclass` or - `OrderedFactor` data. Do `?coerce` and `?unpack` to learn how. - + `OrderedFactor` data. Do `?coerce` and `?unpack` to learn how. + - In particular, *integers* (including `Bool`s) *cannot be used to represent categorical data.* @@ -353,11 +348,3 @@ represented by an ordered `CategoricalValue` or `CategoricalString`. This data will have scitype `OrderedFactor{2}` and the "true" class is understood to be the *second* class in the ordering. - - - - - - - - diff --git a/docs/src/internals.md b/docs/src/internals.md index 90ddae3f7..5a50e7358 100755 --- a/docs/src/internals.md +++ b/docs/src/internals.md @@ -1,13 +1,13 @@ -G# Internals +# Internals -### The machine interface, simplified +## The machine interface, simplified The following is simplified description of the `Machine` interface. See also the [Glossary](glossary.md) -#### The Machine type +### The Machine type -````julia +````julia mutable struct Machine{M Continuous, :x2 => OrderedFactor)` to coerce columns `:x1` and `:x2` of table `X`. -### Ingesting data +## Ingesting data Splitting any table into target and input (note semicolon): @@ -84,7 +84,7 @@ Splitting row indices into train/validation/test: `train, valid, test = partition(eachindex(y), 0.7, 0.2, shuffle=true, rng=1234)` for 70:20:10 ratio -#### Machine construction +## Machine construction Supervised case: @@ -94,13 +94,12 @@ Unsupervised case: `model = OneHotEncoder()` and `mach = machine(model, X)` - -#### Fitting +## Fitting `fit!(mach, rows=1:100, verbosity=1, force=false)` -#### Prediction +## Prediction Supervised case: `predict(mach, Xnew)` or `predict(mach, rows=1:100)` @@ -109,7 +108,7 @@ Similarly, for probabilistic models: `predict_mode`, `predict_mean` and `predict Unsupervised case: `transform(mach, rows=1:100)` or `inverse_transform(mach, rows)`, etc. -#### Inspecting objects +## Inspecting objects `@more` gets detail on last object in REPL @@ -128,8 +127,7 @@ pkg="MultivariateStats")` gets all properties (aka traits) of registered models `report(mach)` gets other training results (e.g. feature rankings) - -#### Resampling strategies +## Resampling strategies `Holdout(fraction_train=…, shuffle=false)` for simple holdout @@ -139,15 +137,15 @@ or a list of pairs of row indices: `[(train1, eval1), (train2, eval2), ... (traink, evalk)]` - -#### Performance estimation +## Performance estimation `evaluate(model, X, y, resampling=CV(), measure=rms, operation=predict, weights=..., verbosity=1)` `evaluate!(mach, resampling=Holdout(), measure=[rms, mav], operation=predict, weights=..., verbosity=1)` `evaluate!(mach, resampling=[(fold1, fold2), (fold2, fold1)], measure=rms)` +## Tuning -#### Ranges for tuning +### Ranges for tuning If `r = range(KNNRegressor(), :K, lower=1, upper = 20, scale=:log)` then `iterator(r, 6) = [1, 2, 3, 6, 11, 20]` @@ -155,13 +153,11 @@ Non-numeric ranges: `r = range(model, :parameter, values=…)`. Nested ranges: Use dot syntax, as in `r = range(EnsembleModel(atom=tree), :(atom.max_depth), ...)` - -#### Tuning strategies +### Tuning strategies `Grid(resolution=10)` for grid search - -#### Tuning model wrapper +### Tuning model wrapper `tuned_model = TunedModel(model=…, tuning=Grid(), resampling=Holdout(), measure=…, operation=predict, ranges=…, minimize=true, full_report=true)` @@ -175,7 +171,7 @@ If using Plots.jl: `plot(curve.parameter_values, curve.measurements, xlab=curve.parameter_name, xscale=curve.parameter_scale)` -#### Built-in performance measures +## Built-in performance measures `l1`, `l2`, `mav`, `rms`, `rmsl`, `rmslp1`, `rmsp`, `misclassification_rate`, `cross_entropy` @@ -184,7 +180,7 @@ If using Plots.jl: `using LossFunctions` to use more measures -#### Transformers +## Transformers Built-ins include: `Standardizer`, `OneHotEncoder`, `UnivariateBoxCoxTransformer`, `FeatureSelector`, `UnivariateStandardizer` @@ -193,12 +189,12 @@ Externals include: `PCA` (in MultivariateStats), `KMeans`, `KMedoids` (in Cluste Full list: do `models(m -> !m[:is_supervised])` -#### Ensemble model wrapper +## Ensemble model wrapper `EnsembleModel(atom=…, weights=Float64[], bagging_fraction=0.8, rng=GLOBAL_RNG, n=100, parallel=true, out_of_bag_measure=[])` -#### Pipelines +## Pipelines With point predictions: @@ -214,7 +210,7 @@ Unsupervised: `pipe = @pipeline MyPipe(stand=Standardizer(), hot=OneHotEncoder())` -#### Define a supervised learning network: +## Define a supervised learning network: `Xs = source(X)` `ys = source(y, kind=:target)` @@ -224,7 +220,7 @@ Unsupervised: `yhat = predict(knn_machine, W, ys)` (final node) -#### Exporting a learning network as stand-alone model: +## Exporting a learning network as stand-alone model: Supervised, with final node `yhat` returning point-predictions: @@ -239,4 +235,3 @@ Supervised, with `yhat` final node returning probabilistic predictions: Unsupervised, with final node `Xout`: `@from_network Composite(pca=network_pca) <= Xout` - diff --git a/docs/src/model_search.md b/docs/src/model_search.md index 0c9faa41c..892af90c2 100644 --- a/docs/src/model_search.md +++ b/docs/src/model_search.md @@ -1,4 +1,4 @@ -# Model Search +# Model Search MLJ has a model registry, allowing the user to search models and their properties, without loading all the packages containing model code. In @@ -7,7 +7,7 @@ machine learning task. The task itself is specified with the help of the `matching` method, and the search executed with the `models` methods, as detailed below. -### Model metadata +## Model metadata *Terminology.* In this section the word "model" refers to the metadata entry in the registry of an actual model `struct`, as appearing @@ -29,7 +29,7 @@ the same name occur in different packages, the package name must be specified, as in `info("LinearRegressor", pkg="GLM")`. -### General model queries +## General model queries We list all models (named tuples) using `models()`, and list the models for which code is already loaded with `localmodels()`: @@ -38,12 +38,12 @@ localmodels() localmodels()[2] ``` -If `models` is passed any `Bool`-valued function `test`, it returns every `model` for which `test(model)` is true, as in +If `models` is passed any `Bool`-valued function `test`, it returns every `model` for which `test(model)` is true, as in ```@repl tokai test(model) = model.is_supervised && - MLJ.Table(Continuous) <: model.input_scitype && - AbstractVector{<:Multiclass{3}} <: model.target_scitype && + model.input_scitype >: MLJ.Table(Continuous) && + model.target_scitype >: AbstractVector{<:Multiclass{3}} && model.prediction_type == :deterministic models(test) ``` @@ -52,7 +52,7 @@ Multiple test arguments may be passed to `models`, which are applied conjunctively. -### Matching models to data +## Matching models to data !!! note The `matching` method described below is experimental and may @@ -64,11 +64,11 @@ command, defined as follows: - `matching(model, X, y) == true` exactly when `model` is supervised and admits inputs and targets with the scientific types of `X` and `y`, respectively - + - `matching(model, X) == true` exactly when `model` is unsupervised and admits inputs with the scientific types of `X`. - -So, to search for all supervised probablistic models handling input + +So, to search for all supervised probabilistic models handling input `X` and target `y`, one can define the testing function `task` by ```julia @@ -82,26 +82,26 @@ models(task) ``` Also defined are `Bool`-valued callable objects `matching(model)`, -`matching(X, y)` and `matching(X)`, with obvious behaviour. For example, -`matching(X, y)(model) = matching(model, X, y)`. +`matching(X, y)` and `matching(X)`, with obvious behaviour. For example, +`matching(X, y)(model) = matching(model, X, y)`. So, to search for all models compatible with input `X` and target `y`, for example, one executes -```julia +```julia models(matching(X, y)) ``` while the preceding search can also be written -```julia +```julia models() do model matching(model, X, y) && model.prediction_type == :probabilistic end ``` -### API +## API ```@docs models diff --git a/docs/src/performance_measures.md b/docs/src/performance_measures.md index 6c4d3b30b..5c1da316b 100644 --- a/docs/src/performance_measures.md +++ b/docs/src/performance_measures.md @@ -4,7 +4,7 @@ In MLJ loss functions, scoring rules, sensitivities, and so on, are collectively to as *measures*. Presently, MLJ includes a few built-in measures, provides support for the loss functions in the [LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl) library, -and allows for users to define their own custom measures. +and allows for users to define their own custom measures. Providing further measures for probabilistic predictors, such as proper scoring rules, and for constructing multi-target product @@ -14,7 +14,7 @@ measures, is a work in progress. described here are defined in MLJBase. -### Built-in measures +## Built-in measures These measures all have the common calling syntax @@ -41,13 +41,13 @@ w = [1, 2, 2, 1]; rms(ŷ, y) # reports an aggregrate loss l1(ŷ, y, w) # reports per observation losses y = categorical(["male", "female", "female"]) -male = y[1]; female = y[2]; +male = y[1]; female = y[2]; d = UnivariateFinite([male, female], [0.55, 0.45]); ŷ = [d, d, d]; cross_entropy(ŷ, y) ``` -### Traits and custom measures +## Traits and custom measures Notice that `l1` reports per-sample evaluations, while `rms` only reports an aggregated result. This and other behavior can be @@ -71,20 +71,20 @@ method, and elsewhere in MLJ, provided it is a function or callable object conforming to the above syntactic conventions. By default, a custom measure is understood to: -- be a loss function (rather than a score) +- be a loss function (rather than a score) - report an aggregated value (rather than per-sample evaluations) - be feature-independent -To override this behavior one simply overloads the appropriate trait, +To override this behaviour one simply overloads the appropriate trait, as shown in the following examples: ```@repl losses_and_scores -y = [1, 2, 3, 4]; -ŷ = [2, 3, 3, 3]; -w = [1, 2, 2, 1]; -my_loss(ŷ, y) = maximum((ŷ - y).^2); +y = [1, 2, 3, 4]; +ŷ = [2, 3, 3, 3]; +w = [1, 2, 2, 1]; +my_loss(ŷ, y) = maximum((ŷ - y).^2); my_loss(ŷ, y) my_per_sample_loss(ŷ, y) = abs.(ŷ - y); MLJ.reports_each_observation(::typeof(my_per_sample_loss)) = true; @@ -106,19 +106,19 @@ measure implementing one non-weighted version, and possibly a second weighted version. *Implementation detail:* Internally, every measure is evaluated using -the syntax +the syntax ```julia MLJ.value(measure, ŷ, X, y, w) ``` and the traits determine what can be ignored and how `measure` is actually called. If `w=nothing` then the non-weighted form of `measure` is -dipatched. +dispatched. -### Using LossFunctions.jl +## Using LossFunctions.jl The [LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl) package includes "distance loss" functions for `Continuous` targets, -and "marginal loss" functins for `Binary` targets. While the +and "marginal loss" functions for `Binary` targets. While the LossFunctions,jl interface differs from the present one (for, example `Binary` observations must be +1 or -1), one can safely pass the loss functions defined there to any MLJ algorithm, which re-interprets it @@ -136,7 +136,7 @@ evaluate!(mach, resampling=holdout, operation=predict, weights=w, - verbosity=0) + verbosity=0) ``` *Note:* Although `ZeroOneLoss(ŷ, y)` makes no sense (neither `ŷ` nor @@ -144,13 +144,13 @@ evaluate!(mach, adaptor `MLJ.value` as discussed above: ```@repl losses_and_scores -ŷ = predict(mach, X); +ŷ = predict(mach, X); loss = MLJ.value(ZeroOneLoss(), ŷ, X, y, w) # X is ignored here mean(loss) ≈ misclassification_rate(mode.(ŷ), y, w) ``` -### List of built-in measures (excluding LossFunctions.jl losses) +## List of built-in measures (excluding LossFunctions.jl losses) ```@docs l1 @@ -208,22 +208,6 @@ matthews_correlation auc ``` -```@docs -tp -``` - -```@docs -tn -``` - -```@docs -fp -``` - -```@docs -fn -``` - ```@docs tpr ``` @@ -243,7 +227,7 @@ fnr FScore ``` -### Other performance related tools +## Other performance related tools ```@docs ConfusionMatrix @@ -256,5 +240,3 @@ confusion_matrix ```@docs roc_curve ``` - - diff --git a/docs/src/simple_user_defined_models.md b/docs/src/simple_user_defined_models.md index 2745b6001..5cda92665 100755 --- a/docs/src/simple_user_defined_models.md +++ b/docs/src/simple_user_defined_models.md @@ -35,8 +35,7 @@ Use](adding_models_for_general_use.md). For an unsupervised model, implement `transform` and, optionally, `inverse_transform` using the same signature at `predict` below. - -### A simple deterministic regressor +## A simple deterministic regressor Here's a quick-and-dirty implementation of a ridge regressor with no intercept: @@ -61,7 +60,6 @@ MLJBase.predict(::MyRegressor, fitresult, Xnew) = MLJBase.matrix(Xnew) * fitresu ```` ``` @setup regressor_example -import Base.eval import MLJBase using LinearAlgebra MLJBase.color_off() @@ -70,7 +68,7 @@ mutable struct MyRegressor <: MLJBase.Deterministic end MyRegressor(; lambda=0.1) = MyRegressor(lambda) function MLJBase.fit(model::MyRegressor, X, y) - x = MLJBase.matrix(X) + x = MLJBase.matrix(X) fitresult = (x'x + model.lambda*I)\(x'y) return fitresult end @@ -88,7 +86,7 @@ evaluate!(regressor, resampling=CV(), measure=rms, verbosity=0) ``` -### A simple probabilistic classifier +## A simple probabilistic classifier The following probabilistic model simply fits a probability distribution to the `MultiClass` training target (i.e., ignores `X`) diff --git a/docs/src/tuning_models.md b/docs/src/tuning_models.md index 629d4f53e..c47a81c49 100644 --- a/docs/src/tuning_models.md +++ b/docs/src/tuning_models.md @@ -9,12 +9,7 @@ optimal model, one just calls `predict(mach, Xnew)`. In this way the wrapped model may be viewed as a "self-tuning" version of the unwrapped model. - -### Tuning a single hyperparameter using a grid search - -```@setup goof -import Base.eval -``` +## Tuning a single hyperparameter using a grid search ```@repl goof using MLJ @@ -77,7 +72,7 @@ predict(self_tuning_tree, Xnew) ``` -### Tuning multiple nested hyperparameters +## Tuning multiple nested hyperparameters The following model has another model, namely a `DecisionTreeRegressor`, as a hyperparameter: @@ -123,12 +118,12 @@ using Plots plot(self_tuning_forest) ``` -![](tuning_plot.png) +![](img/tuning_plot.png) For more options in a grid search, see the `Grid` docstring below. -### API +## API ```@docs MLJBase.range diff --git a/src/MLJ.jl b/src/MLJ.jl index e7e67f2b2..134273544 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -21,12 +21,12 @@ export matching export pdf, mode, median, mean, shuffle!, categorical, shuffle, levels, levels!, std, support -# re-export from ScientificTypes: -export GrayImage, ColorImage, Image, - Found, Continuous, Finite, Infinite, - OrderedFactor, Unknown, - Count, Multiclass, Binary, Scientific, - scitype, scitype_union, coerce, schema, autotype, elscitype +# re-exports from (MLJ)ScientificTypes via MLJBase +export Scientific, Found, Unknown, Known, Finite, Infinite, + OrderedFactor, Multiclass, Count, Continuous, Textual, + Binary, ColorImage, GrayImage, Image, Table +export scitype, scitype_union, elscitype, nonmissing, trait +export coerce, coerce!, autotype, schema, info # re-export from MLJBase: export nrows, nfeatures, color_off, color_on, @@ -73,7 +73,7 @@ export measures, truepositive_rate, truenegative_rate, falsepositive_rate, falsenegative_rate, negativepredicitive_value, positivepredictive_value, - tp, tn, fp, fn, tpr, tnr, fpr, fnr, + tpr, tnr, fpr, fnr, falsediscovery_rate, fdr, npv, ppv, recall, sensitivity, hit_rate, miss_rate, specificity, selectivity, f1score, f1, fallout @@ -90,7 +90,6 @@ export models, localmodels, @load, load, info, OneHotEncoder, UnivariateDiscretizer, FillImputer - ## METHOD IMPORT # from the Standard Library: @@ -99,14 +98,11 @@ import Pkg import Pkg.TOML # from the MLJ universe: -using ScientificTypes using MLJBase -import MLJBase using MLJTuning using MLJModels -using Tables -using CategoricalArrays +using Tables, CategoricalArrays import Distributions import Distributions: pdf, mode import Statistics, StatsBase, LinearAlgebra, Random @@ -114,14 +110,12 @@ import Random: AbstractRNG, MersenneTwister using ProgressMeter using ComputationalResources using ComputationalResources: CPUProcesses -using DocStringExtensions: SIGNATURES, TYPEDEF # to be extended: -import MLJBase: fit, update, clean!, fit!, - predict, fitted_params, - show_as_constructed, == +import MLJBase: fit, update, clean!, fit!, predict, fitted_params, + show_as_constructed, == import MLJModels: models - +import MLJScientificTypes ## CONSTANTS diff --git a/src/ensembles.jl b/src/ensembles.jl index 1fbbceb2d..09ebf4a80 100644 --- a/src/ensembles.jl +++ b/src/ensembles.jl @@ -7,7 +7,6 @@ Base.show(stream::IO, t::Random.MersenneTwister) = # Atom is atomic model type, eg, DecisionTree # R will be the tightest type of the atom fit-results. -using StatsBase mutable struct WrappedEnsemble{R,Atom <: Supervised} <: MLJType atom::Atom ensemble::Vector{R} @@ -27,33 +26,32 @@ function WrappedEnsemble(atom, ensemble::AbstractVector{L}) where L end # to enable trait-based dispatch of predict: -predict(wens::WrappedEnsemble{R,Atom}, - atomic_weights, Xnew) where {R,Atom<:Deterministic} = +function predict(wens::WrappedEnsemble{R,Atom}, atomic_weights, Xnew + ) where {R,Atom<:Deterministic} predict(wens, atomic_weights, Xnew, Deterministic, target_scitype(Atom)) -predict(wens::WrappedEnsemble{R,Atom}, - atomic_weights, Xnew) where {R,Atom<:Probabilistic} = +end + +function predict(wens::WrappedEnsemble{R,Atom}, atomic_weights, Xnew + ) where {R,Atom<:Probabilistic} predict(wens, atomic_weights, Xnew, Probabilistic, target_scitype(Atom)) +end -function predict(wens::WrappedEnsemble, - atomic_weights, - Xnew, +function predict(wens::WrappedEnsemble, atomic_weights, Xnew, ::Type{Deterministic}, ::Type{<:AbstractVector{<:Finite}}) - # atomic_weights ignored in this case - ensemble = wens.ensemble - atom = wens.atom - + atom = wens.atom n_atoms = length(ensemble) n_atoms > 0 || @error "Empty ensemble cannot make predictions." # TODO: make this more memory efficient but note that the type of # Xnew is unknown (ie, model dependent) - predictions = - reduce(hcat, [predict(atom, fitresult, Xnew) for fitresult in ensemble]) - classes = levels(predictions) - n = size(predictions, 1) + preds_gen = (predict(atom, fitresult, Xnew) for fitresult in ensemble) + predictions = hcat(preds_gen...) + + classes = levels(predictions) + n = size(predictions, 1) prediction = categorical(vcat([mode(predictions[i,:]) for i in 1:n], classes))[1:n] return prediction @@ -61,61 +59,56 @@ end function predict(wens::WrappedEnsemble, atomic_weights, Xnew, ::Type{Deterministic}, ::Type{<:AbstractVector{<:Continuous}}) + # considering atomic weights ensemble = wens.ensemble - - atom = wens.atom - - n_atoms = length(ensemble) + atom = wens.atom + n_atoms = length(ensemble) n_atoms > 0 || @error "Empty ensemble cannot make predictions." # TODO: make more memory efficient: - predictions = reduce(hcat, [atomic_weights[k]*predict(atom, ensemble[k], Xnew) for k in 1:n_atoms]) - prediction = [sum(predictions[i,:]) for i in 1:size(predictions, 1)] + preds_gen = (atomic_weights[k] * predict(atom, ensemble[k], Xnew) + for k in 1:n_atoms) + predictions = hcat(preds_gen...) + prediction = [sum(predictions[i,:]) for i in 1:size(predictions, 1)] return prediction end function predict(wens::WrappedEnsemble, atomic_weights, Xnew, ::Type{Probabilistic}, ::Type{<:AbstractVector{<:Finite}}) - ensemble = wens.ensemble - - atom = wens.atom - - n_atoms = length(ensemble) + atom = wens.atom + n_atoms = length(ensemble) n_atoms > 0 || @error "Empty ensemble cannot make predictions." # TODO: make this more memory efficient but note that the type of # Xnew is unknown (ie, model dependent): - # a matrix of probability distributions: - predictions = reduce(hcat, [predict(atom, fitresult, Xnew) for fitresult in ensemble]) - n_rows = size(predictions, 1) + preds_gen = (predict(atom, fitresult, Xnew) for fitresult in ensemble) + predictions = hcat(preds_gen...) + n_rows = size(predictions, 1) # the weighted averages over the ensemble of the discrete pdf's: - predictions = [MLJBase.average([predictions[i,k] for k in 1:n_atoms], weights=atomic_weights) for i in 1:n_rows] + predictions = [average([predictions[i, k] for k in 1:n_atoms], weights=atomic_weights) for i in 1:n_rows] return predictions end function predict(wens::WrappedEnsemble, atomic_weights, Xnew, ::Type{Probabilistic}, ::Type{<:AbstractVector{<:Continuous}}) - ensemble = wens.ensemble - - atom = wens.atom - - n_atoms = length(ensemble) + atom = wens.atom + n_atoms = length(ensemble) n_atoms > 0 || @error "Empty ensemble cannot make predictions." # TODO: make this more memory efficient but note that the type of # Xnew is unknown (ie, model dependent): - # a matrix of probability distributions: - predictions = reduce(hcat, [predict(atom, fitresult, Xnew) for fitresult in ensemble]) + preds_gen = (predict(atom, fitresult, Xnew) for fitresult in ensemble) + predictions = hcat(preds_gen...) # n_rows = size(predictions, 1) # # the weighted average over the ensemble of the pdf means and pdf variances: diff --git a/src/scitypes.jl b/src/scitypes.jl index 3b6f41bb4..5f81b3770 100644 --- a/src/scitypes.jl +++ b/src/scitypes.jl @@ -1,18 +1,20 @@ ## SUPERVISED +const MST = MLJScientificTypes # only used in this file + struct SupervisedScitype{input_scitype, target_scitype, prediction_type} end -ScientificTypes.scitype(model::Deterministic, ::ScientificTypes.MLJ) = +MST.scitype(model::Deterministic, ::MST.MLJ) = SupervisedScitype{input_scitype(model), target_scitype(model), :deterministic} -ScientificTypes.scitype(model::Probabilistic, ::ScientificTypes.MLJ) = +MST.scitype(model::Probabilistic, ::MST.MLJ) = SupervisedScitype{input_scitype(model), target_scitype(model), :probabilistic} -ScientificTypes.scitype(model::Interval, ::ScientificTypes.MLJ) = +MST.scitype(model::Interval, ::MST.MLJ) = SupervisedScitype{input_scitype(model), target_scitype(model), :interval} @@ -48,7 +50,7 @@ end struct UnsupervisedScitype{input_scitype, output_scitype} end -ScientificTypes.scitype(model::Unsupervised, ::ScientificTypes.MLJ) = +MST.scitype(model::Unsupervised, ::MST.MLJ) = UnsupervisedScitype{input_scitype(model), MLJBase.output_scitype(model)} @@ -87,7 +89,7 @@ struct MeasureScitype{target_scitype, is_feature_dependent, supports_weights} end -ScientificTypes.scitype(measure, ::ScientificTypes.MLJ, ::Val{:measure}) = +MST.scitype(measure, ::MST.MLJ, ::Val{:measure}) = MeasureScitype{target_scitype(measure), prediction_type(measure), orientation(measure), diff --git a/src/tuning.jl b/src/tuning.jl index 187183758..4b3c3de4f 100644 --- a/src/tuning.jl +++ b/src/tuning.jl @@ -1,5 +1,6 @@ -abstract type TuningStrategy <: MLJ.MLJType end -const ParameterName=Union{Symbol,Expr} +abstract type TuningStrategy <: MLJType end + +const ParameterName = Union{Symbol,Expr} """ Grid(resolution=10, acceleration=DEFAULT_RESOURCE[]) diff --git a/test/ensembles.jl b/test/ensembles.jl index e08860456..72f2adb12 100644 --- a/test/ensembles.jl +++ b/test/ensembles.jl @@ -1,13 +1,12 @@ module TestEnsembles -# using Revise using Test using Random using MLJ using MLJBase import MLJModels using CategoricalArrays -using Distributions +import Distributions @load KNNRegressor @@ -159,8 +158,8 @@ train, test = partition(1:length(y), 0.8); ensemble_model = MLJ.ProbabilisticEnsembleModel(atom=atom) ensemble_model.n = 10 fitresult, cache, report = MLJ.fit(ensemble_model, 1, X, y) -d1 = fit(Distributions.Normal, [1,1,2,2]) -d2 = fit(Distributions.Normal, [1,1,1,2]) +d1 = Distributions.fit(Distributions.Normal, [1,1,2,2]) +d2 = Distributions.fit(Distributions.Normal, [1,1,1,2]) # @test reduce(* , [d.μ ≈ d1.μ || d.μ ≈ d2.μ for d in fitresult.ensemble]) # @test reduce(* , [d.σ ≈ d1.σ || d.σ ≈ d2.σ for d in fitresult.ensemble]) d=predict(ensemble_model, fitresult, MLJ.selectrows(X, test))[1] @@ -170,7 +169,7 @@ end ensemble_model.bagging_fraction = 1.0 fitresult, cache, report = MLJ.fit(ensemble_model, 1, X, y) d = predict(ensemble_model, fitresult, MLJ.selectrows(X, test))[1] -d3 = fit(Distributions.Normal, y) +d3 = Distributions.fit(Distributions.Normal, y) @test pdf(d, 1.52) ≈ pdf(d3, 1.52) atomic_weights = rand(10) atomic_weights = atomic_weights/sum(atomic_weights) diff --git a/test/runtests.jl b/test/runtests.jl index 4025d9aa4..f4122caae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,10 +2,10 @@ using Distributed addprocs(2) @everywhere begin -using MLJ -using MLJBase -using Test -using Random + using MLJ + using MLJBase + using Test + using Random end @testset "ensembles" begin @@ -19,5 +19,3 @@ end @testset "scitypes" begin @test include("scitypes.jl") end - -