This demo assumes you have certain packages in your active package environment. To activate a new environment, "MyNewEnv", with just these packages, do this in a new REPL session:
using Pkg
Pkg.activate("MyNewEnv")
Pkg.add(["MLJ", "RDatasets", "DataFrames", "MLJDecisionTreeInterface",
"MLJMultivariateStatsInterface", "NearestNeighborModels", "MLJGLMInterface",
@@ -256,8 +256,8 @@
rng = Random._GLOBAL_RNG())
Bind the model and data together in a machine, which will additionally, store the learned parameters (fitresults) when fit:
mach = machine(tree, X, y)
untrained Machine; caches model-specific representations of data
model: DecisionTreeClassifier(max_depth = 2, …)
args:
- 1: Source @906 ⏎ Table{AbstractVector{Continuous}}
- 2: Source @359 ⏎ AbstractVector{Multiclass{2}}
+ 1: Source @822 ⏎ Table{AbstractVector{Continuous}}
+ 2: Source @362 ⏎ AbstractVector{Multiclass{2}}
Split row indices into training and evaluation rows:
train, test = partition(eachindex(y), 0.7); # 70:30 split
([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 … 131, 132, 133, 134, 135, 136, 137, 138, 139, 140], [141, 142, 143, 144, 145, 146, 147, 148, 149, 150 … 191, 192, 193, 194, 195, 196, 197, 198, 199, 200])
Fit on the train data set and evaluate on the test data set:
fit!(mach, rows=train)
yhat = predict(mach, X[test,:])
LogLoss(tol=1e-4)(yhat, y[test])
1.0788055664326648
Note LogLoss()
has aliases log_loss
and cross_entropy
.
Predict on the new data set:
Xnew = (FL = rand(3), RW = rand(3), CL = rand(3), CW = rand(3), BD = rand(3))
@@ -330,14 +330,14 @@
┌───┬──────────────────────┬──────────────┬─────────────┐
│ │ measure │ operation │ measurement │
├───┼──────────────────────┼──────────────┼─────────────┤
-│ A │ LogLoss( │ predict │ 4.81 │
+│ A │ LogLoss( │ predict │ 4.3 │
│ │ tol = 2.22045e-16) │ │ │
│ B │ Accuracy() │ predict_mode │ 0.736 │
└───┴──────────────────────┴──────────────┴─────────────┘
┌───┬───────────────────────┬─────────┐
│ │ per_fold │ 1.96*SE │
├───┼───────────────────────┼─────────┤
-│ A │ [5.1, 6.48, 3.07] │ 2.38 │
+│ A │ [5.1, 4.97, 3.01] │ 1.62 │
│ B │ [0.696, 0.739, 0.769] │ 0.0513 │
└───┴───────────────────────┴─────────┘
Changing a hyperparameter and re-evaluating:
tree.max_depth = 3
@@ -373,20 +373,20 @@
mach = machine(ols, X, y) |> fit!
trained Machine; caches model-specific representations of data
model: LinearRegressor(fit_intercept = true, …)
args:
- 1: Source @645 ⏎ Table{AbstractVector{Continuous}}
- 2: Source @770 ⏎ AbstractVector{Continuous}
+ 1: Source @420 ⏎ Table{AbstractVector{Continuous}}
+ 2: Source @896 ⏎ AbstractVector{Continuous}
Get a named tuple representing the learned parameters, human-readable if appropriate:
fitted_params(mach)
(features = [:x1, :x2],
- coef = [1.0019484491170485, -2.0124583022683673],
- intercept = 0.05839955441025016,)
Get other training-related information:
report(mach)
(stderror = [0.007289046266540614, 0.009314702321351547, 0.009664751997931865],
+ coef = [0.9991493052514759, -2.000770727737916],
+ intercept = 0.04558920911757358,)
Get other training-related information:
report(mach)
(stderror = [0.0075716576252445695, 0.010270084681692026, 0.009571713656806065],
dof_residual = 97.0,
- vcov = [5.3130195475769666e-5 -4.737168085144333e-5 -4.924311372223852e-5; -4.737168085144333e-5 8.676367933539189e-5 1.3161433444949447e-5; -4.924311372223852e-5 1.3161433444949447e-5 9.340743118152798e-5],
- deviance = 0.07678443409575168,
+ vcov = [5.7329999193924235e-5 -5.429443842036848e-5 -4.7225605422306874e-5; -5.429443842036848e-5 0.0001054746393691252 5.6999071938576035e-6; -4.7225605422306874e-5 5.6999071938576035e-6 9.161770232788773e-5],
+ deviance = 0.07659888168821351,
coef_table = ──────────────────────────────────────────────────────────────────────────────
Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95%
──────────────────────────────────────────────────────────────────────────────
-(Intercept) 0.0583996 0.00728905 8.01 <1e-11 0.0439328 0.0728663
-x1 1.00195 0.0093147 107.57 <1e-99 0.983461 1.02044
-x2 -2.01246 0.00966475 -208.23 <1e-99 -2.03164 -1.99328
+(Intercept) 0.0455892 0.00757166 6.02 <1e-07 0.0305616 0.0606169
+x1 0.999149 0.0102701 97.29 <1e-97 0.978766 1.01953
+x2 -2.00077 0.00957171 -209.03 <1e-99 -2.01977 -1.98177
──────────────────────────────────────────────────────────────────────────────,)
Load data:
X, y = @load_iris # a table and a vector
train, test = partition(eachindex(y), 0.97, shuffle=true, rng=123)
([125, 100, 130, 9, 70, 148, 39, 64, 6, 107 … 110, 59, 139, 21, 112, 144, 140, 72, 109, 41], [106, 147, 47, 5])
Instantiate and fit the model/machine:
PCA = @load PCA
pca = PCA(maxoutdim=2)
@@ -394,12 +394,12 @@
fit!(mach, rows=train)
trained Machine; caches model-specific representations of data
model: PCA(maxoutdim = 2, …)
args:
- 1: Source @053 ⏎ Table{AbstractVector{Continuous}}
+ 1: Source @625 ⏎ Table{AbstractVector{Continuous}}
Transform selected data bound to the machine:
transform(mach, rows=test);
(x1 = [-3.394282685448322, -1.5219827578765053, 2.53824745518522, 2.7299639893931382],
x2 = [0.547245022374522, -0.36842368617126425, 0.5199299511335688, 0.3448466122232349],)
Transform new data:
Xnew = (sepal_length=rand(3), sepal_width=rand(3),
petal_length=rand(3), petal_width=rand(3));
-transform(mach, Xnew)
(x1 = [4.60254619833418, 4.963408439322138, 4.73352667809396],
- x2 = [-4.450747224690028, -4.340052887208079, -4.323758570369482],)
y = rand(100);
+transform(mach, Xnew)
(x1 = [4.932980176376836, 4.673447918876899, 5.286789315108594],
+ x2 = [-4.587828781511142, -4.427755497747251, -5.031367248586764],)
y = rand(100);
stand = Standardizer()
mach = machine(stand, y)
fit!(mach)
@@ -462,13 +462,13 @@
logger = nothing)
Bound the wrapped model to data:
mach = machine(tuned_forest, X, y)
untrained Machine; does not cache data
model: ProbabilisticTunedModel(model = ProbabilisticEnsembleModel(model = DecisionTreeClassifier(max_depth = -1, …), …), …)
args:
- 1: Source @313 ⏎ Table{AbstractVector{Continuous}}
- 2: Source @689 ⏎ AbstractVector{Multiclass{3}}
+ 1: Source @176 ⏎ Table{AbstractVector{Continuous}}
+ 2: Source @073 ⏎ AbstractVector{Multiclass{3}}
Fitting the resultant machine optimizes the hyperparameters specified in range
, using the specified tuning
and resampling
strategies and performance measure
(possibly a vector of measures), and retrains on all data bound to the machine:
fit!(mach)
trained Machine; does not cache data
model: ProbabilisticTunedModel(model = ProbabilisticEnsembleModel(model = DecisionTreeClassifier(max_depth = -1, …), …), …)
args:
- 1: Source @313 ⏎ Table{AbstractVector{Continuous}}
- 2: Source @689 ⏎ AbstractVector{Multiclass{3}}
+ 1: Source @176 ⏎ Table{AbstractVector{Continuous}}
+ 2: Source @073 ⏎ AbstractVector{Multiclass{3}}
Inspecting the optimal model:
F = fitted_params(mach)
(best_model = ProbabilisticEnsembleModel(model = DecisionTreeClassifier(max_depth = -1, …), …),
best_fitted_params = (fitresult = WrappedEnsemble(atom = DecisionTreeClassifier(max_depth = -1, …), …),),)
F.best_model
ProbabilisticEnsembleModel(
model = DecisionTreeClassifier(
@@ -476,7 +476,7 @@
min_samples_leaf = 1,
min_samples_split = 2,
min_purity_increase = 0.0,
- n_subfeatures = 4,
+ n_subfeatures = 3,
post_prune = false,
merge_purity_threshold = 1.0,
display_depth = 5,
@@ -489,12 +489,12 @@
acceleration = CPU1{Nothing}(nothing),
out_of_bag_measure = Any[])
Inspecting details of tuning procedure:
r = report(mach);
keys(r)
(:best_model, :best_history_entry, :history, :best_report, :plotting)
r.history[[1,end]]
2-element Vector{@NamedTuple{model::MLJEnsembles.ProbabilisticEnsembleModel{MLJDecisionTreeInterface.DecisionTreeClassifier}, measure::Vector{StatisticalMeasuresBase.RobustMeasure{StatisticalMeasuresBase.FussyMeasure{StatisticalMeasuresBase.RobustMeasure{StatisticalMeasures._BrierLossType}, typeof(StatisticalMeasures.l2_check)}}}, measurement::Vector{Float64}, per_fold::Vector{Vector{Float64}}, evaluation::CompactPerformanceEvaluation{MLJEnsembles.ProbabilisticEnsembleModel{MLJDecisionTreeInterface.DecisionTreeClassifier}, Vector{StatisticalMeasuresBase.RobustMeasure{StatisticalMeasuresBase.FussyMeasure{StatisticalMeasuresBase.RobustMeasure{StatisticalMeasures._BrierLossType}, typeof(StatisticalMeasures.l2_check)}}}, Vector{Float64}, Vector{typeof(predict)}, Vector{Vector{Float64}}, Vector{Vector{Vector{Float64}}}, CV}}}:
- (model = ProbabilisticEnsembleModel(model = DecisionTreeClassifier(max_depth = -1, …), …), measure = [BrierLoss()], measurement = [0.11061688888888872], per_fold = [[0.008769777777777862, 0.00018311111111112943, 0.13994577777777764, 0.15614133333333288, 0.14898399999999967, 0.20967733333333313]], evaluation = CompactPerformanceEvaluation(0.111,))
- (model = ProbabilisticEnsembleModel(model = DecisionTreeClassifier(max_depth = -1, …), …), measure = [BrierLoss()], measurement = [0.12125549176954746], per_fold = [[0.02781777777777793, 0.007603555555555701, 0.19223187037037057, 0.1535252222222222, 0.1663280555555555, 0.18002646913580272]], evaluation = CompactPerformanceEvaluation(0.121,))
Visualizing these results:
using Plots
+ (model = ProbabilisticEnsembleModel(model = DecisionTreeClassifier(max_depth = -1, …), …), measure = [BrierLoss()], measurement = [0.10329451851851834], per_fold = [[-0.0, -0.0, 0.12643466666666656, 0.15470222222222174, 0.13779822222222193, 0.20083199999999976]], evaluation = CompactPerformanceEvaluation(0.103,))
+ (model = ProbabilisticEnsembleModel(model = DecisionTreeClassifier(max_depth = -1, …), …), measure = [BrierLoss()], measurement = [0.11934060905349804], per_fold = [[0.026442666666666767, 0.005732444444444598, 0.1926373333333334, 0.14254809876543217, 0.1626662222222222, 0.1860168888888891]], evaluation = CompactPerformanceEvaluation(0.119,))
Visualizing these results:
using Plots
plot(mach)
Predicting on new data using the optimized model trained on all data:
predict(mach, Xnew)
3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:
UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)
UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)
- UnivariateFinite{Multiclass{3}}(setosa=>0.767, versicolor=>0.213, virginica=>0.02)
Reference: Linear Pipelines
Constructing a linear (unbranching) pipeline with a learned target transformation/inverse transformation:
X, y = @load_reduced_ames
+ UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)
Reference: Linear Pipelines
Constructing a linear (unbranching) pipeline with a learned target transformation/inverse transformation:
X, y = @load_reduced_ames
KNN = @load KNNRegressor
knn_with_target = TransformedTargetModel(model=KNN(K=3), transformer=Standardizer())
TransformedTargetModelDeterministic(
model = KNNRegressor(
@@ -558,14 +558,14 @@
┌──────────────────────┬───────────┬─────────────┐
│ measure │ operation │ measurement │
├──────────────────────┼───────────┼─────────────┤
-│ LogLoss( │ predict │ 0.428 │
+│ LogLoss( │ predict │ 0.429 │
│ tol = 2.22045e-16) │ │ │
└──────────────────────┴───────────┴─────────────┘
-┌────────────────────────────────────────────────┬─────────┐
-│ per_fold │ 1.96*SE │
-├────────────────────────────────────────────────┼─────────┤
-│ [3.89e-15, 3.89e-15, 0.294, 0.41, 1.56, 0.299] │ 0.51 │
-└────────────────────────────────────────────────┴─────────┘
+┌─────────────────────────────────────────────────┬─────────┐
+│ per_fold │ 1.96*SE │
+├─────────────────────────────────────────────────┼─────────┤
+│ [3.89e-15, 3.89e-15, 0.302, 0.381, 1.56, 0.329] │ 0.507 │
+└─────────────────────────────────────────────────┴─────────┘
Generate a plot of performance, as a function of some hyperparameter (building on the preceding example)
Single performance curve:
r = range(forest, :n, lower=1, upper=1000, scale=:log10)
curve = learning_curve(mach,
range=r,
@@ -575,7 +575,7 @@
verbosity=0)
(parameter_name = "n",
parameter_scale = :log10,
parameter_values = [1, 2, 3, 4, 5, 6, 7, 8, 10, 11 … 281, 324, 373, 429, 494, 569, 655, 754, 869, 1000],
- measurements = [4.004850376568572, 4.1126732713223415, 4.067922726718731, 4.123999873775369, 4.150105956717014, 2.688089225524209, 2.715285824731319, 2.7309139415415857, 2.7444858783511297, 2.7476450089856033 … 1.269185619048552, 1.2786364928754186, 1.2725212042652867, 1.2789570911204242, 1.2797130430389276, 1.2768033472128724, 1.2644056972193418, 1.2598962094386172, 1.2612790173706743, 1.2557508210679436],)
using Plots
+ measurements = [8.009700753137146, 7.3165535725772, 4.165577152378119, 2.7016641697125308, 2.7264652068796558, 2.667200175335509, 2.679693430839872, 2.6990484091188085, 2.711284561225735, 1.95524844163632 … 1.2474446228963525, 1.2455088836705839, 1.243424421444324, 1.2363329736702997, 1.239539419310721, 1.2384777558609936, 1.2373480020980578, 1.243692344943664, 1.2429655812800875, 1.2395704269170391],)
using Plots
plot(curve.parameter_values, curve.measurements,
xlab=curve.parameter_name, xscale=curve.parameter_scale)
Multiple curves:
curve = learning_curve(mach,
range=r,
@@ -587,5 +587,5 @@
verbosity=0)
(parameter_name = "n",
parameter_scale = :log10,
parameter_values = [1, 2, 3, 4, 5, 6, 7, 8, 10, 11 … 281, 324, 373, 429, 494, 569, 655, 754, 869, 1000],
- measurements = [4.004850376568572 8.009700753137146 16.820371581588002 9.611640903764574; 4.004850376568572 8.009700753137146 9.087929700674836 9.611640903764574; … ; 1.2099979316961877 1.2316766858863117 1.266241881645686 1.274322191002287; 1.214989736207193 1.2334567682916915 1.2684272251885533 1.2728908797309264],)
plot(curve.parameter_values, curve.measurements,
- xlab=curve.parameter_name, xscale=curve.parameter_scale)