From 9082570c24305a81995f8384e2e2f30a08ab9daf Mon Sep 17 00:00:00 2001 From: Okon Samuel <39421418+OkonSamuel@users.noreply.github.com> Date: Thu, 12 Mar 2020 14:35:37 +0100 Subject: [PATCH 1/3] updated list of ScikitLearn models in Readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 763ce43cc..a1743d4a3 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,7 @@ the most up-to-date list, run `using MLJ; models()`. [MultivariateStats.jl] | RidgeRegressor, PCA, KernelPCA, ICA, LDA, BayesianLDA, SubspaceLDA, BayesianSubspaceLDA | high | † [NaiveBayes.jl] | GaussianNBClassifier, MultinomialNBClassifier, HybridNBClassifier | low | [NearestNeighbors.jl] | KNNClassifier, KNNRegressor | high | -[ScikitLearn.jl] | SVMClassifier, SVMRegressor, SVMNuClassifier, SVMNuRegressor, SVMLClassifier, SVMLRegressor, ARDRegressor, BayesianRidgeRegressor, ElasticNetRegressor, ElasticNetCVRegressor, HuberRegressor, LarsRegressor, LarsCVRegressor, LassoRegressor, LassoCVRegressor, LassoLarsRegressor, LassoLarsCVRegressor, LassoLarsICRegressor, LinearRegressor, OrthogonalMatchingPursuitRegressor, OrthogonalMatchingPursuitCVRegressor, PassiveAggressiveRegressor, RidgeRegressor, RidgeCVRegressor, SGDRegressor, TheilSenRegressor, LogisticClassifier, LogisticCVClassifier, PerceptronClassifier, RidgeClassifier, RidgeCVClassifier, PassiveAggressiveClassifier, SGDClassifier, GaussianProcessRegressor, GaussianProcessClassifier, AdaBoostRegressor, AdaBoostClassifier, BaggingRegressor, BaggingClassifier, GradientBoostingRegressor, GradientBoostingClassifier, RandomForestRegressor, RandomForestClassifier, GaussianNB, MultinomialNB, ComplementNB, BayesianLDA, BayesianQDA | high | † +[ScikitLearn.jl] | ARDRegressor, AdaBoostClassifier, AdaBoostRegressor, AffinityPropagation, AgglomerativeClustering, BaggingClassifier, BaggingRegressor, BayesianLDA, BayesianQDA, BayesianRidgeRegressor, BernoulliNBClassifier, Birch, ComplementNBClassifier, DBSCAN, DummyClassifier, DummyRegressor, ElasticNetCVRegressor, ElasticNetRegressor, ExtraTreesClassifier, ExtraTreesRegressor, FeatureAgglomeration, GaussianNBClassifier, GaussianProcessClassifier, GaussianProcessRegressor, GradientBoostingClassifier, GradientBoostingRegressor, HuberRegressor, KMeans, KNeighborsClassifier, KNeighborsRegressor, LarsCVRegressor, LarsRegressor, LassoCVRegressor, LassoLarsCVRegressor, LassoLarsICRegressor, LassoLarsRegressor, LassoRegressor, LinearRegressor, LogisticCVClassifier, LogisticClassifier, MeanShift, MiniBatchKMeans, MultiTaskElasticNetCVRegressor, MultiTaskElasticNetRegressor, MultiTaskLassoCVRegressor, MultiTaskLassoRegressor, MultinomialNBClassifier, OPTICS, OrthogonalMatchingPursuitCVRegressor, OrthogonalMatchingPursuitRegressor, PassiveAggressiveClassifier, PassiveAggressiveRegressor, PerceptronClassifier, ProbabilisticSGDClassifier, RANSACRegressor, RandomForestClassifier, RandomForestRegressor, RidgeCVClassifier, RidgeCVRegressor, RidgeClassifier, RidgeRegressor, SGDClassifier, SGDRegressor, SVMClassifier, SVMLClassifier, SVMLRegressor, SVMNuClassifier, SVMNuRegressor, SVMRegressor, SpectralClustering, TheilSenRegressor | high | † [XGBoost.jl] | XGBoostRegressor, XGBoostClassifier, XGBoostCount | high | **Note** (†): some models are missing, your help is welcome to complete the interface. Get in touch with Thibaut Lienart on Slack if you would like to help, thanks! From ae8af5b7962e8b54f2e999be3b985efad1fe1e6c Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 15 Mar 2020 08:04:50 +1300 Subject: [PATCH 2/3] enable serialization of machines #138 #292 --- Project.toml | 4 ++-- docs/src/machines.md | 4 ++-- src/MLJ.jl | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index a368ebe7f..ea10aa927 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJ" uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" authors = ["Anthony D. Blaom "] -version = "0.10" +version = "0.10.1" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" @@ -24,7 +24,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" CategoricalArrays = "^0.7" ComputationalResources = "^0.3" Distributions = "^0.21,^0.22" -MLJBase = "^0.12" +MLJBase = "^0.12.1" MLJModels = "^0.9" MLJScientificTypes = "^0.2.1" MLJTuning = "^0.2" diff --git a/docs/src/machines.md b/docs/src/machines.md index 65f4fcd09..6efd53d98 100644 --- a/docs/src/machines.md +++ b/docs/src/machines.md @@ -92,8 +92,8 @@ See [Composing Models](composing_models.md) for more on this advanced feature. ## Inspecting machines There are two methods for inspecting the outcomes of training in -MLJ. To obtain a named-tuple describing the learned parameters, in a -user-friendly way if possible, use `fitted_params(mach)`. All other +MLJ. To obtain a named-tuple describing the learned parameters (in a +user-friendly way where possible) use `fitted_params(mach)`. All other training-related outcomes are inspected with `report(mach)`. ```@example machines diff --git a/src/MLJ.jl b/src/MLJ.jl index d1e938c3b..425da1c22 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -107,6 +107,7 @@ import Pkg.TOML # from the MLJ universe: using MLJBase +import MLJBase.save using MLJTuning using MLJModels From 91a53fdb8f2ae2a42359163392a67a3b122a4b76 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 15 Mar 2020 08:31:49 +1300 Subject: [PATCH 3/3] update manual re serialization --- docs/src/machines.md | 78 ++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 24 deletions(-) diff --git a/docs/src/machines.md b/docs/src/machines.md index 6efd53d98..b0446c90f 100644 --- a/docs/src/machines.md +++ b/docs/src/machines.md @@ -59,13 +59,62 @@ fit!(mach, rows=1:100); fit!(mach, rows=1:100); ``` +## Inspecting machines + +There are two methods for inspecting the outcomes of training in +MLJ. To obtain a named-tuple describing the learned parameters (in a +user-friendly way where possible) use `fitted_params(mach)`. All other +training-related outcomes are inspected with `report(mach)`. + +```@example machines +X, y = @load_iris +pca = @load PCA +mach = machine(pca, X) +fit!(mach) +``` + +```@repl machines +fitted_params(mach) +report(mach) +``` + +## Saving machines + +To save a machine to file, use the [`MLJ.save`](@ref) command: + +```julia +tree = @load DecisionTreeClassifier +mach = fit!(machine(tree, X, y)) +MLJ.save("my_machine.jlso", mach) +``` + +To de-serialize, one uses the `machine` constructor: + +```julia +mach2 = machine("my_machine.jlso") +predict(mach2, Xnew); +``` + +The machine `mach2` cannot be retrained; however, by providing data to +the constructor one can enable retraining using the saved model +hyperparameters (which overwrites the saved learned parameters): + +```julia +mach3 = machine("my_machine.jlso", Xnew, ynew) +fit!(mach3) +``` + + +## Internals + For a supervised machine the `predict` method calls a lower-level `MLJBase.predict` method, dispatched on the underlying model and the `fitresult` (see below). To see `predict` in action, as well as its unsupervised cousins `transform` and `inverse_transform`, see [Getting Started](index.md). -Here is a complete list of the fields of a machine: +The fields of a `Machine` instance (which should not generally be +accessed byt the user) are: - `model` - the struct containing the hyperparameters to be used in calls to `fit!` @@ -84,33 +133,14 @@ Here is a complete list of the fields of a machine: Instead of data `X` and `y`, the `machine` constructor can be provided `Node` or `Source` objects ("dynamic data") to obtain a -`NodalMachine`, rather than a regular `Machine` object, which includes -the same fields listed above. -See [Composing Models](composing_models.md) for more on this advanced feature. - - -## Inspecting machines - -There are two methods for inspecting the outcomes of training in -MLJ. To obtain a named-tuple describing the learned parameters (in a -user-friendly way where possible) use `fitted_params(mach)`. All other -training-related outcomes are inspected with `report(mach)`. - -```@example machines -X, y = @load_iris -pca = @load PCA -mach = machine(pca, X) -fit!(mach) -``` - -```@repl machines -fitted_params(mach) -report(mach) -``` +`NodalMachine`, rather than a regular `Machine` object, which has the +fields listed above and some others. See [Composing +Models](composing_models.md) for more on this advanced feature. ## API Reference ```@docs fit! +MLJBase.save ```