diff --git a/Project.toml b/Project.toml
index 96c00d723..f5752a0fa 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,25 +1,21 @@
 name = "MLJ"
 uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.8.0"
+version = "0.9.0"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
+MLJScientificTypes = "2e2323e0-db8b-457b-ae0d-bdfb3bc63afd"
 MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
@@ -27,13 +23,12 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 [compat]
 CategoricalArrays = "^0.7"
 ComputationalResources = "^0.3"
-Distributions = "^0.21"
-DocStringExtensions = "^0.8"
-MLJBase = "^0.10"
-MLJTuning = "^0.1.1"
-MLJModels = "^0.7"
+Distributions = "^0.21,^0.22"
+MLJBase = "^0.11"
+MLJModels = "^0.8"
+MLJScientificTypes = "^0.1"
+MLJTuning = "^0.1"
 ProgressMeter = "^1.1"
-ScientificTypes = "^0.5.1"
 StatsBase = "^0.32"
 Tables = "^0.2"
 julia = "1"
diff --git a/README.md b/README.md
index ae149b98e..7e9aeeaf8 100644
--- a/README.md
+++ b/README.md
@@ -95,7 +95,7 @@ The MLJ universe is made out of several repositories some of which can be used i
 
 * (⟂) [MLJBase.jl](https://github.com/alan-turing-institute/MLJBase.jl) offers essential tools to load and interpret data, describe ML models and use metrics; it is the repository you should interface with if you wish to make your package accessible via MLJ,
 * [MLJ.jl](https://github.com/alan-turing-institute/MLJ.jl) offers tools to compose, tune and evaluate models,
-* [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl) contains interfaces to a number of important model-providing packages such as,  [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), [ScikitLearn.jl](https://github.com/bensadeghi/ScikitLearn.jl) or [XGBoost.jl](https://github.com/dmlc/XGBoost.jl) as well as a few built-in transformations (one hot encoding, standardisation, ...), it also hosts the *model registry* which keeps track of all models accessible via MLJ,
+* [MLJModels.jl](https://github.com/alan-turing-institute/MLJModels.jl) contains interfaces to a number of important model-providing packages such as,  [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), [ScikitLearn.jl](https://github.com/cstjean/ScikitLearn.jl) or [XGBoost.jl](https://github.com/dmlc/XGBoost.jl) as well as a few built-in transformations (one hot encoding, standardisation, ...), it also hosts the *model registry* which keeps track of all models accessible via MLJ,
 * (⟂) [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) a lightweight package to help MLJ articulate it's conventions about how different types of data (`2.71`, `"male"`, `CategoricalArray{Int}`, etc ) should be *interpreted* by models (`Continuous`, `Textual`, `AbstractArray{Multiclass}`, etc). 
 * (⟂) [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl) an experimental package for a wide range of penalised linear models such as Lasso, Elastic-Net, Robust regression, LAD regression, etc.
 * [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl) an experimental package to use Flux within MLJ.
diff --git a/docs/Project.toml b/docs/Project.toml
index be643b4b1..5cdc6653a 100755
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -6,23 +6,24 @@ DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
+MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
+MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
+MLJScientificTypes = "2e2323e0-db8b-457b-ae0d-bdfb3bc63afd"
 MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
 RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
 TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
 
 [compat]
-DecisionTree = "0.8, 0.9.1"
-Documenter = "^0.22,0.23"
-MLJBase = "^0.10"
-ScientificTypes = "^0.5"
+Documenter = "^0.24"
+MLJBase = "^0.11"
+MLJModelInterface = "^0.1"
+MLJModels = "^0.8"
+MLJScientificTypes = "^0.1"
+MLJTuning = "^0.1"
 julia = "1.2"
diff --git a/docs/make.jl b/docs/make.jl
index af240921e..85adf42f6 100755
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,40 +1,48 @@
 if Base.HOME_PROJECT[] !== nothing
     Base.HOME_PROJECT[] = abspath(Base.HOME_PROJECT[])
 end
+
 using Pkg
 using Documenter
 using MLJ
-using MLJBase
-using MLJTuning
-using MLJModels
-using ScientificTypes
+import MLJBase
+import MLJTuning
+import MLJModels
+import MLJScientificTypes
+import MLJModelInterface
+using CategoricalArrays # avoid types like CategoricalArrays.Categorica
+using LossFunctions
+
+const MMI = MLJModelInterface
 
 # using Literate
 # Literate.markdown("common_mlj_workflows.jl", ".",
 #                   codefence = "```@example workflows" => "```")
 
-pages = Any["Getting Started"=>"index.md",
-            "Common MLJ Workflows" => "common_mlj_workflows.md",
-            "Model Search" => "model_search.md",
-            "Machines" => "machines.md",
-            "Evaluating Model Performance"=>"evaluating_model_performance.md",
-            "Performance Measures"=> "performance_measures.md",
-            "Tuning Models" => "tuning_models.md",
-            "Learning Curves" => "learning_curves.md",
-            "Built-in Transformers" => "built_in_transformers.md",
-            "Composing Models" => "composing_models.md",
-            "Homogeneous Ensembles" => "homogeneous_ensembles.md",
-            "Simple User Defined Models" => "simple_user_defined_models.md",
-            "Adding Models for General Use" => "adding_models_for_general_use.md",
-            "Benchmarking" => "benchmarking.md",
-            "Internals"=>"internals.md",
-            "Glossary"=>"glossary.md",
-            "API"=>"api.md",
-            "MLJ Cheatsheet" => "mlj_cheatsheet.md",
-            "MLJ News"=>"NEWS.md",
-            "FAQ" => "frequently_asked_questions.md",
-            "Julia BlogPost"=>"julia_blogpost.md",
-            "Acceleration and Parallelism"=>"acceleration_and_parallelism.md"]
+pages = [
+    "Getting Started" => "index.md",
+    "Common MLJ Workflows" => "common_mlj_workflows.md",
+    "Model Search" => "model_search.md",
+    "Machines" => "machines.md",
+    "Evaluating Model Performance" => "evaluating_model_performance.md",
+    "Performance Measures" => "performance_measures.md",
+    "Tuning Models" => "tuning_models.md",
+    "Learning Curves" => "learning_curves.md",
+    "Built-in Transformers" => "built_in_transformers.md",
+    "Composing Models" => "composing_models.md",
+    "Homogeneous Ensembles" => "homogeneous_ensembles.md",
+    "Simple User Defined Models" => "simple_user_defined_models.md",
+    "Adding Models for General Use" => "adding_models_for_general_use.md",
+    "Benchmarking" => "benchmarking.md",
+    "Internals" => "internals.md",
+    "Glossary" => "glossary.md",
+    # "API" => "api.md", # NOTE: commented as currently empty
+    "MLJ Cheatsheet" => "mlj_cheatsheet.md",
+    "MLJ News" => "NEWS.md",
+    "FAQ" => "frequently_asked_questions.md",
+    "Julia BlogPost" => "julia_blogpost.md",
+    "Acceleration and Parallelism" => "acceleration_and_parallelism.md"
+    ]
 
 for p in pages
     println(first(p))
@@ -42,9 +50,9 @@ end
 
 makedocs(
     sitename = "MLJ",
-    format = Documenter.HTML(),
-    modules = [MLJ, MLJBase, MLJTuning, MLJModels, ScientificTypes],
-    pages=pages)
+    format   = Documenter.HTML(),
+    modules  = [MLJ, MLJBase, MLJTuning, MLJModels, MLJScientificTypes, MLJModelInterface],
+    pages    = pages)
 
 # By default Documenter does not deploy docs just for PR
 # this causes issues with how we're doing things and ends
diff --git "a/docs/src/\nworkflows_learning_curves.png" "b/docs/src/_old/\nworkflows_learning_curves.png"
similarity index 100%
rename from "docs/src/\nworkflows_learning_curves.png"
rename to "docs/src/_old/\nworkflows_learning_curves.png"
diff --git a/docs/src/0.jpg b/docs/src/_old/0.jpg
similarity index 100%
rename from docs/src/0.jpg
rename to docs/src/_old/0.jpg
diff --git a/docs/src/0_small.jpg b/docs/src/_old/0_small.jpg
similarity index 100%
rename from docs/src/0_small.jpg
rename to docs/src/_old/0_small.jpg
diff --git a/docs/src/ATI_logo_black.png b/docs/src/_old/ATI_logo_black.png
similarity index 100%
rename from docs/src/ATI_logo_black.png
rename to docs/src/_old/ATI_logo_black.png
diff --git a/docs/src/common_mlj_workflows.ipynb b/docs/src/_old/common_mlj_workflows.ipynb
similarity index 100%
rename from docs/src/common_mlj_workflows.ipynb
rename to docs/src/_old/common_mlj_workflows.ipynb
diff --git a/docs/src/scitypes.xml b/docs/src/_old/scitypes.xml
similarity index 100%
rename from docs/src/scitypes.xml
rename to docs/src/_old/scitypes.xml
diff --git a/docs/src/scitypes_original.png b/docs/src/_old/scitypes_original.png
similarity index 100%
rename from docs/src/scitypes_original.png
rename to docs/src/_old/scitypes_original.png
diff --git a/docs/src/tiny_demo.ipynb b/docs/src/_old/tiny_demo.ipynb
similarity index 100%
rename from docs/src/tiny_demo.ipynb
rename to docs/src/_old/tiny_demo.ipynb
diff --git a/docs/src/tour.ipynb b/docs/src/_old/tour.ipynb
similarity index 100%
rename from docs/src/tour.ipynb
rename to docs/src/_old/tour.ipynb
diff --git a/docs/src/two_model_stack.dia b/docs/src/_old/two_model_stack.dia
similarity index 100%
rename from docs/src/two_model_stack.dia
rename to docs/src/_old/two_model_stack.dia
diff --git a/docs/src/two_parameter_tuning_plot.png b/docs/src/_old/two_parameter_tuning_plot.png
similarity index 100%
rename from docs/src/two_parameter_tuning_plot.png
rename to docs/src/_old/two_parameter_tuning_plot.png
diff --git a/docs/src/workflows_learning_curves_large.png b/docs/src/_old/workflows_learning_curves_large.png
similarity index 100%
rename from docs/src/workflows_learning_curves_large.png
rename to docs/src/_old/workflows_learning_curves_large.png
diff --git a/docs/src/workflows_tuning_plot_large.png b/docs/src/_old/workflows_tuning_plot_large.png
similarity index 100%
rename from docs/src/workflows_tuning_plot_large.png
rename to docs/src/_old/workflows_tuning_plot_large.png
diff --git a/docs/src/wrapped_ridge.dia b/docs/src/_old/wrapped_ridge.dia
similarity index 100%
rename from docs/src/wrapped_ridge.dia
rename to docs/src/_old/wrapped_ridge.dia
diff --git a/docs/src/acceleration_and_parallelism.md b/docs/src/acceleration_and_parallelism.md
index 874df0f06..8473b5114 100644
--- a/docs/src/acceleration_and_parallelism.md
+++ b/docs/src/acceleration_and_parallelism.md
@@ -8,7 +8,7 @@
     subject to breaking changes during minor or major releases without
     warning.
 
-### User-facing interface
+## User-facing interface
 
 To enable composable, extensible acceleration of core MLJ methods,
 [ComputationalResources.jl](https://github.com/timholy/ComputationalResources.jl)
diff --git a/docs/src/adding_models_for_general_use.md b/docs/src/adding_models_for_general_use.md
index 615331b0d..b8ea5413f 100755
--- a/docs/src/adding_models_for_general_use.md
+++ b/docs/src/adding_models_for_general_use.md
@@ -1,24 +1,38 @@
 # Adding Models for General Use
 
-This guide outlines in detail the specification of the MLJ model interface and
-provides guidelines for implementing the interface for models intended
-for general use. For sample implementations, see
+This guide outlines in the specification of the MLJ model interface
+and provides detailed guidelines for implementing the interface for
+models intended for general use. See also the more condensed
+[Step-by-Step Guide for Adding Models](@ref).
+
+For sample implementations, see
 [MLJModels/src](https://github.com/alan-turing-institute/MLJModels.jl/tree/master/src).
 
 The machine learning tools provided by MLJ can be applied to the
 models in any package that imports the package
-[MLJBase](https://github.com/alan-turing-institute/MLJBase.jl) and
+[MLJModelInterface](https://github.com/alan-turing-institute/MLJModelInterface.jl) and
 implements the API defined there, as outlined below. For a
 quick-and-dirty implementation of user-defined models see [Simple User
 Defined Models](simple_user_defined_models.md).  To make new models
 available to all MLJ users, see [Where to place code implementing new
 models](@ref).
 
+**Important.**
+[MLJModelInterface](https://github.com/alan-turing-institute/MLJModelInterface.jl)
+is a very light-weight interface allowing you to *define* your
+interface, but does not provide the functionality required to use or
+test your interface. So, while you only need to add
+`MLJModelInterface` to your project's [deps] for testing purposes, you
+need to add
+[MLJBase](https://github.com/alan-turing-institute/MLJBase.jl) to your project's
+[extras] and [targets]. In testing, simply use `MLJBase` in place of
+`MLJModelInterface`.
+
 It is assumed the reader has read [Getting Started](index.md).
 To implement the API described here, some familiarity with the
 following packages is also helpful:
 
-- [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl)
+- [MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl)
   (for specifying model requirements of data)
 
 - [Distributions.jl](https://github.com/JuliaStats/Distributions.jl)
@@ -38,7 +52,7 @@ reading of this document, the reader may wish to refer to [MLJ
 Internals](internals.md) for context.
 
 
-### Overview
+## Overview
 
 A *model* is an object storing hyperparameters associated with some
 machine learning algorithm.  In MLJ, hyperparameters include configuration
@@ -54,7 +68,7 @@ ordinary multivariate regression, for example, this would be the
 coefficients and intercept. For a general supervised model, it is the
 (generally minimal) information needed to make new predictions.
 
-The ultimate supertype of all models is `MLJBase.Model`, which
+The ultimate supertype of all models is `MLJModelInterface.Model`, which
 has two abstract subtypes:
 
 ```julia
@@ -83,31 +97,32 @@ a model instance and a fitresult (plus other data), are called
 *operations*. `Probabilistic` supervised models optionally implement a
 `predict_mode` operation (in the case of classifiers) or a
 `predict_mean` and/or `predict_median` operations (in the case of
-regressors) although MLJBase also provides fallbacks that will suffice
+regressors) although MLJModelInterface also provides fallbacks that will suffice
 in most cases. `Unsupervised` models may implement an
 `inverse_transform` operation.
 
 
-### New model type declarations and optional clean! method
+## New model type declarations and optional clean! method
 
 Here is an example of a concrete supervised model type declaration:
 
 ```julia
-import MLJ
+import MLJModelInterface
+const MMI = MLJModelInterface
 
-mutable struct RidgeRegressor <: MLJBase.Deterministic
+mutable struct RidgeRegressor <: MMI.Deterministic
     lambda::Float64
 end
 ```
 
 Models (which are mutable) should not be given internal
 constructors. It is recommended that they be given an external lazy
-keyword constructor of the same name. This constructor defines default values for
-every field, and optionally corrects invalid field values by calling a `clean!` method
-(whose fallback returns an empty message string):
+keyword constructor of the same name. This constructor defines default values
+for every field, and optionally corrects invalid field values by calling a
+`clean!` method (whose fallback returns an empty message string):
 
 ```julia
-function MLJ.clean!(model::RidgeRegressor)
+function MMI.clean!(model::RidgeRegressor)
     warning = ""
     if model.lambda < 0
         warning *= "Need lambda ≥ 0. Resetting lambda=0. "
@@ -119,7 +134,7 @@ end
 # keyword constructor
 function RidgeRegressor(; lambda=0.0)
     model = RidgeRegressor(lambda)
-    message = MLJBase.clean!(model)
+    message = MMI.clean!(model)
     isempty(message) || @warn message
     return model
 end
@@ -129,7 +144,7 @@ An alternative to declaring the model struct, clean! method and keyword
 constructor, is to use the `@mlj_model` macro, as in the following example:
 
 ```julia
-@mlj_model mutable struct YourModel <: MLJBase.Deterministic
+@mlj_model mutable struct YourModel <: MMI.Deterministic
     a::Float64 = 0.5::(_ > 0)
     b::String  = "svd"::(_ in ("svd","qr"))
 end
@@ -150,107 +165,107 @@ You cannot use the `@mlj_model` macro if your model struct has type
 parameters.
 
 
-### Supervised models
+## Supervised models
 
 The compulsory and optional methods to be implemented for each
-concrete type `SomeSupervisedModel <: MLJBase.Supervised` are
+concrete type `SomeSupervisedModel <: MMI.Supervised` are
 summarized below. An `=` indicates the return value for a fallback
 version of the method.
 
 
-#### Summary of methods
+### Summary of methods
 
 Compulsory:
 
 ```julia
-MLJBase.fit(model::SomeSupervisedModel, verbosity::Integer, X, y) -> fitresult, cache, report
-MLJBase.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat
+MMI.fit(model::SomeSupervisedModel, verbosity::Integer, X, y) -> fitresult, cache, report
+MMI.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat
 ```
 
 Optional, to check and correct invalid hyperparameter values:
 
 ```julia
-MLJBase.clean!(model::SomeSupervisedModel) = ""
+MMI.clean!(model::SomeSupervisedModel) = ""
 ```
 
 Optional, to return user-friendly form of fitted parameters:
 
 ```julia
-MLJBase.fitted_params(model::SomeSupervisedModel, fitresult) = fitresult
+MMI.fitted_params(model::SomeSupervisedModel, fitresult) = fitresult
 ```
 
 Optional, to avoid redundant calculations when re-fitting machines
 associated with a model:
 
 ```julia
-MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) =
-   MLJBase.fit(model, verbosity, X, y)
+MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) =
+   MMI.fit(model, verbosity, X, y)
 ```
 
 Optional, to specify default hyperparameter ranges (for use in tuning):
 
 ```julia
-MLJBase.hyperparameter_ranges(T::Type) = Tuple(fill(nothing, length(fieldnames(T))))
+MMI.hyperparameter_ranges(T::Type) = Tuple(fill(nothing, length(fieldnames(T))))
 ```
 
 Optional, if `SomeSupervisedModel <: Probabilistic`:
 
 ```julia
-MLJBase.predict_mode(model::SomeSupervisedModel, fitresult, Xnew) =
+MMI.predict_mode(model::SomeSupervisedModel, fitresult, Xnew) =
     mode.(predict(model, fitresult, Xnew))
-MLJBase.predict_mean(model::SomeSupervisedModel, fitresult, Xnew) =
+MMI.predict_mean(model::SomeSupervisedModel, fitresult, Xnew) =
     mean.(predict(model, fitresult, Xnew))
-MLJBase.predict_median(model::SomeSupervisedModel, fitresult, Xnew) =
+MMI.predict_median(model::SomeSupervisedModel, fitresult, Xnew) =
     median.(predict(model, fitresult, Xnew))
 ```
 
 Required, if the model is to be registered (findable by general users):
 
 ```julia
-MLJBase.load_path(::Type{<:SomeSupervisedModel})    = ""
-MLJBase.package_name(::Type{<:SomeSupervisedModel}) = "Unknown"
-MLJBase.package_uuid(::Type{<:SomeSupervisedModel}) = "Unknown"
+MMI.load_path(::Type{<:SomeSupervisedModel})    = ""
+MMI.package_name(::Type{<:SomeSupervisedModel}) = "Unknown"
+MMI.package_uuid(::Type{<:SomeSupervisedModel}) = "Unknown"
 ```
 
 ```julia
-MLJBase.input_scitype(::Type{<:SomeSupervisedModel}) = Unknown
+MMI.input_scitype(::Type{<:SomeSupervisedModel}) = Unknown
 ```
 
 Strongly recommended, to constrain the form of target data passed to fit:
 
 ```julia
-MLJBase.target_scitype(::Type{<:SomeSupervisedModel}) = Unknown
+MMI.target_scitype(::Type{<:SomeSupervisedModel}) = Unknown
 ```
 
 Optional but recommended:
 
 ```julia
-MLJBase.package_url(::Type{<:SomeSupervisedModel})  = "unknown"
-MLJBase.is_pure_julia(::Type{<:SomeSupervisedModel}) = false
-MLJBase.package_license(::Type{<:SomeSupervisedModel}) = "unknown"
+MMI.package_url(::Type{<:SomeSupervisedModel})  = "unknown"
+MMI.is_pure_julia(::Type{<:SomeSupervisedModel}) = false
+MMI.package_license(::Type{<:SomeSupervisedModel}) = "unknown"
 ```
 
 If `SomeSupervisedModel` supports sample weights, then instead of the `fit` above, one implements
 
 ```julia
-MLJBase.fit(model::SomeSupervisedModel, verbosity::Integer, X, y, w=nothing) -> fitresult, cache, report
+MMI.fit(model::SomeSupervisedModel, verbosity::Integer, X, y, w=nothing) -> fitresult, cache, report
 ```
 
 and, if appropriate
 
 ```julia
-MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) =
-   MLJBase.fit(model, verbosity, X, y, w)
+MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) =
+   MMI.fit(model, verbosity, X, y, w)
 ```
 
 Additionally, if `SomeSupervisedModel` supports sample weights, one must declare
 
 ```julia
-MLJBase.supports_weights(model::Type{<:SomeSupervisedModel}) = true
+MMI.supports_weights(model::Type{<:SomeSupervisedModel}) = true
 ```
 
 
-#### The form of data for fitting and predicting
+### The form of data for fitting and predicting
 
 The model implementer does not have absolute control over the types of
 data `X`, `y` and `Xnew` appearing in the `fit` and `predict` methods
@@ -266,38 +281,38 @@ MLJ recommendation is to specify a `Table` scientific type for `X`
 matrix input can coerce their inputs appropriately; see below.
 
 
-##### Additional type coercions
+#### Additional type coercions
 
 If the core algorithm being wrapped requires data in a different or
 more specific form, then `fit` will need to coerce the table into the
 form desired (and the same coercions applied to `X` will have to be
 repeated for `Xnew` in `predict`). To assist with common cases, MLJ
 provides the convenience method
-`MLJBase.matrix`. `MLJBase.matrix(Xtable)` has type `Matrix{T}` where
+`MMI.matrix`. `MMI.matrix(Xtable)` has type `Matrix{T}` where
 `T` is the tightest common type of elements of `Xtable`, and `Xtable`
-is any table. 
+is any table.
 
-Other auxiliary methods provided by MLJBase for handling tabular data
+Other auxiliary methods provided by MLJModelInterface for handling tabular data
 are: `selectrows`, `selectcols`, `select` and `schema` (for extracting
 the size, names and eltypes of a table's columns). See [Convenience
 methods](@ref) below for details.
 
 
-##### Important convention
+#### Important convention
 
 It is to be understood that the columns of the table `X` correspond to
 features and the rows to observations. So, for example, the predict
 method for a linear regression model might look like `predict(model,
-w, Xnew) = MLJBase.matrix(Xnew)*w`, where `w` is the vector of learned
+w, Xnew) = MMI.matrix(Xnew)*w`, where `w` is the vector of learned
 coefficients.
 
 
-#### The fit method
+### The fit method
 
 A compulsory `fit` method returns three objects:
 
 ```julia
-MLJBase.fit(model::SomeSupervisedModel, verbosity::Int, X, y) -> fitresult, cache, report
+MMI.fit(model::SomeSupervisedModel, verbosity::Int, X, y) -> fitresult, cache, report
 ```
 
 *Note.* The `Int` typing of `verbosity` cannot be omitted.
@@ -338,11 +353,11 @@ generally avoid doing any of its own logging.
 above `fit`:
 
 ```julia
-MLJBase.fit(model::SomeSupervisedModel, verbosity::Int, X, y, w=nothing) -> fitresult, cache, report
+MMI.fit(model::SomeSupervisedModel, verbosity::Int, X, y, w=nothing) -> fitresult, cache, report
 ```
 
 
-#### The fitted_params method
+### The fitted_params method
 
 A `fitted_params` method may be optionally overloaded. It's purpose is
 to provide MLJ access to a user-friendly representation of the
@@ -350,7 +365,7 @@ learned parameters of the model (as opposed to the
 hyperparameters). They must be extractable from `fitresult`.
 
 ```julia
-MLJBase.fitted_params(model::SomeSupervisedModel, fitresult) -> friendly_fitresult::NamedTuple
+MMI.fitted_params(model::SomeSupervisedModel, fitresult) -> friendly_fitresult::NamedTuple
 ```
 
 For a linear model, for example, one might declare something like
@@ -359,16 +374,16 @@ For a linear model, for example, one might declare something like
 The fallback is to return `(fitresult=fitresult,)`.
 
 
-#### The predict method
+### The predict method
 
 A compulsory `predict` method has the form
 ```julia
-MLJBase.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat
+MMI.predict(model::SomeSupervisedModel, fitresult, Xnew) -> yhat
 ```
 
 Here `Xnew` will have the same form as the `X` passed to `fit`.
 
-##### Prediction types for deterministic responses.
+#### Prediction types for deterministic responses.
 
 In the case of `Deterministic` models, `yhat` should have the same
 scitype as the `y` passed to `fit` (see above). Any `CategoricalValue`
@@ -381,7 +396,7 @@ MLJ.classes(y[j])` for all admissible `i` and `j`. (The method
 `classes` is described under [Convenience methods](@ref) below).
 
 Unfortunately, code not written with the preservation of categorical
-levels in mind poses special problems. To help with this, MLJBase
+levels in mind poses special problems. To help with this, MLJModelInterface
 provides three utility methods: `int` (for converting a
 `CategoricalValue` or `CategoricalString` into an integer, the
 ordering of these integers being consistent with that of the pool),
@@ -399,10 +414,10 @@ nominal target `yint` of type `Vector{<:Integer}` then a `fit` method
 may look something like this:
 
 ```julia
-function MLJBase.fit(model::SomeSupervisedModel, verbosity, X, y)
-    yint = MLJBase.int(y)
+function MMI.fit(model::SomeSupervisedModel, verbosity, X, y)
+    yint = MMI.int(y)
     a_target_element = y[1]                    # a CategoricalValue/String
-    decode = MLJBase.decoder(a_target_element) # can be called on integers
+    decode = MMI.decoder(a_target_element) # can be called on integers
 
     core_fitresult = SomePackage.fit(X, yint, verbosity=verbosity)
 
@@ -416,7 +431,7 @@ end
 while a corresponding deterministic `predict` operation might look like this:
 
 ```julia
-function MLJBase.predict(model::SomeSupervisedModel, fitresult, Xnew)
+function MMI.predict(model::SomeSupervisedModel, fitresult, Xnew)
     decode, core_fitresult = fitresult
     yhat = SomePackage.predict(core_fitresult, Xnew)
     return decode.(yhat)  # or decode(yhat) also works
@@ -430,18 +445,18 @@ for `SVMClassifier`.
 Of course, if you are coding a learning algorithm from scratch, rather
 than wrapping an existing one, these extra measures may be unnecessary.
 
-##### Prediction types for probabilistic responses
+#### Prediction types for probabilistic responses
 
 In the case of `Probabilistic` models with univariate targets, `yhat`
 must be an `AbstractVector` whose elements are distributions (one distribution
 per row of `Xnew`).
 
 Presently, a *distribution* is any object `d` for which
-`MLJBase.isdistribution(::d) = true`, which is currently restricted to
+`MMI.isdistribution(::d) = true`, which is currently restricted to
 objects subtyping `Distributions.Sampleable` from the package
 Distributions.jl.
 
-Use the distribution `MLJBase.UnivariateFinite` for `Probabilistic`
+Use the distribution `MMI.UnivariateFinite` for `Probabilistic`
 models predicting a target with `Finite` scitype (classifiers). In
 this case each element of the training target `y` is a
 `CategoricalValue` or `CategoricalString`, as in this contrived example:
@@ -458,7 +473,7 @@ we need it); this is accessible using the convenience method
 
 ```julia
 julia> yes = y[1]
-julia> levels = MLJBase.classes(yes)
+julia> levels = MMI.classes(yes)
 3-element Array{CategoricalValue{Symbol,UInt32},1}:
  :maybe
  :no
@@ -471,7 +486,7 @@ y[1]` and `no = y[2]` are to be assigned respective probabilities of
 follows:
 
 ```julia
-julia> d = MLJBase.UnivariateFinite([yes, no], [0.2, 0.8])
+julia> d = MMI.UnivariateFinite([yes, no], [0.2, 0.8])
 UnivariateFinite(:yes=>0.2, :maybe=>0.0, :no=>0.8)
 
 julia> pdf(d, yes)
@@ -489,21 +504,21 @@ for an example of a Probabilistic classifier implementation.
 
 
 ```@docs
-MLJBase.UnivariateFinite
+MMI.UnivariateFinite
 ```
 
-*Important note on binary classifiers.* ScientificTypes.jl has no
-"Binary" scitype distinct from `Multiclass{2}` or `OrderedFactor{2}`;
-`Binary` is just an alias for
-`Union{Multiclass{2},OrderedFactor{2}}`. The `target_scitype` of a
-binary classifier will generally be `AbstractVector{<:Binary}` and
-according to the *mlj* scitype convention, elements of `y` have type
-`CategoricalValue` or `CategoricalString`, and *not* `Bool`. See
+*Important note on binary classifiers.* There is no "Binary" scitype
+distinct from `Multiclass{2}` or `OrderedFactor{2}`; `Binary` is just
+an alias for `Union{Multiclass{2},OrderedFactor{2}}`. The
+`target_scitype` of a binary classifier will generally be
+`AbstractVector{<:Binary}` and according to the *mlj* scitype
+convention, elements of `y` have type `CategoricalValue` or
+`CategoricalString`, and *not* `Bool`. See
 [BinaryClassifier](https://github.com/alan-turing-institute/MLJModels.jl/blob/master/src/GLM.jl)
 for an example.
 
 
-#### Trait declarations
+### Trait declarations
 
 Two trait functions allow the implementer to restrict the types of
 data `X`, `y` and `Xnew` discussed above. The MLJ task interface uses
@@ -513,7 +528,7 @@ attempt to use your model with inappropriately typed data.
 
 The trait functions `input_scitype` and `target_scitype` take
 scientific data types as values. We assume here familiarity with
-[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl)
+[MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl)
 (see [Getting Started](index.md) for the basics).
 
 For example, to ensure that the `X` presented to the
@@ -521,14 +536,13 @@ For example, to ensure that the `X` presented to the
 (and hence `AbstractFloat` machine type), one declares
 
 ```julia
-MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = MLJBase.Table(MLJBase.Continuous)
+MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = MMI.Table(MMI.Continuous)
 ```
 
 or, equivalently,
 
 ```julia
-using ScientificTypes
-MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Continuous)
+MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Continuous)
 ```
 
 If, instead, columns were allowed to have either: (i) a mixture of `Continuous` and `Missing`
@@ -536,7 +550,7 @@ values, or (ii) `Count` (i.e., integer) values, then the
 declaration would be
 
 ```julia
-MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Union{Continuous,Missing},Count)
+MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = Table(Union{Continuous,Missing},Count)
 ```
 
 Similarly, to ensure the target is an AbstractVector whose elements
@@ -544,10 +558,10 @@ have `Finite` scitype (and hence `CategoricalValue` or
 `CategoricalString` machine type) we declare
 
 ```julia
-MLJBase.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:Finite}
+MMI.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:Finite}
 ```
 
-##### Multivariate targets
+#### Multivariate targets
 
 The above remarks continue to hold unchanged for the case multivariate
 targets.  For example, if we declare
@@ -556,14 +570,14 @@ targets.  For example, if we declare
 target_scitype(SomeSupervisedModel) = Table(Continuous)
 ```
 
-then this constrains the target to be any table whose columns have `Continous` element scitype (i.e., `AbstractFloat`), while 
+then this constrains the target to be any table whose columns have `Continous` element scitype (i.e., `AbstractFloat`), while
 
 ```julia
 target_scitype(SomeSupervisedModel) = Table(Continuous, Finite{2})
 ```
 
 restricts to tables with continuous or binary (ordered or unordered)
-columns. 
+columns.
 
 For predicting variable length sequences of, say, binary values
 (`CategoricalValue`s or `CategoricalString`s with some common size-two
@@ -623,7 +637,7 @@ end
 you might declare (order matters):
 
 ```julia
-MLJBase.hyperparameter_ranges(::Type{<:MyModel}) = 
+MMI.hyperparameter_ranges(::Type{<:MyModel}) =
     (range(Float64, :alpha, lower=0, upper=1, scale=:log),
 	 range(Int, :beta, lower=1, upper=Inf, origin=100, unit=50, scale=:log),
 	 nothing)
@@ -633,50 +647,50 @@ Here is the complete list of trait function declarations for `DecisionTreeClassi
 ([source](https://github.com/alan-turing-institute/MLJModels.jl/blob/master/src/DecisionTree.jl)):
 
 ```julia
-MLJBase.input_scitype(::Type{<:DecisionTreeClassifier}) = MLJBase.Table(MLJBase.Continuous)
-MLJBase.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:MLJBase.Finite}
-MLJBase.load_path(::Type{<:DecisionTreeClassifier}) = "MLJModels.DecisionTree_.DecisionTreeClassifier"
-MLJBase.package_name(::Type{<:DecisionTreeClassifier}) = "DecisionTree"
-MLJBase.package_uuid(::Type{<:DecisionTreeClassifier}) = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
-MLJBase.package_url(::Type{<:DecisionTreeClassifier}) = "https://github.com/bensadeghi/DecisionTree.jl"
-MLJBase.is_pure_julia(::Type{<:DecisionTreeClassifier}) = true
+MMI.input_scitype(::Type{<:DecisionTreeClassifier}) = MMI.Table(MMI.Continuous)
+MMI.target_scitype(::Type{<:DecisionTreeClassifier}) = AbstractVector{<:MMI.Finite}
+MMI.load_path(::Type{<:DecisionTreeClassifier}) = "MLJModels.DecisionTree_.DecisionTreeClassifier"
+MMI.package_name(::Type{<:DecisionTreeClassifier}) = "DecisionTree"
+MMI.package_uuid(::Type{<:DecisionTreeClassifier}) = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+MMI.package_url(::Type{<:DecisionTreeClassifier}) = "https://github.com/bensadeghi/DecisionTree.jl"
+MMI.is_pure_julia(::Type{<:DecisionTreeClassifier}) = true
 ```
 
-Alternatively these traits can also be declared using `MLJBase.metadata_pkg` and `MLJBase.metadata_model` helper functions as:
+Alternatively these traits can also be declared using `MMI.metadata_pkg` and `MMI.metadata_model` helper functions as:
 
 ```julia
-MLJBase.metadata_pkg(DecisionTreeClassifier,name="DecisionTree",
+MMI.metadata_pkg(DecisionTreeClassifier,name="DecisionTree",
                      uuid="7806a523-6efd-50cb-b5f6-3fa6f1930dbb",
                      url="https://github.com/bensadeghi/DecisionTree.jl",
                      julia=true)   
-            
-MLJBase.metadata_model(DecisionTreeClassifier,
-                        input=MLJBase.Table(MLJBase.Continuous),
-                        target=AbstractVector{<:MLJBase.Finite},
+
+MMI.metadata_model(DecisionTreeClassifier,
+                        input=MMI.Table(MMI.Continuous),
+                        target=AbstractVector{<:MMI.Finite},
                         path="MLJModels.DecisionTree_.DecisionTreeClassifier")
 ```
 
 ```@docs
-MLJBase.metadata_pkg
+MMI.metadata_pkg
 ```
 
 ```@docs
-MLJBase.metadata_model
+MMI.metadata_model
 ```
 
 You can test all your declarations of traits by calling `MLJBase.info_dict(SomeModel)`.
 
 
-#### Iterative models and the update! method
+### Iterative models and the update! method
 
 An `update` method may be optionally overloaded to enable a call by
 MLJ to retrain a model (on the same training data) to avoid repeating
-computations unnecessarily. 
+computations unnecessarily.
 
 ```julia
-MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) -> fit
+MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y) -> fit
 result, cache, report
-MLJBase.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) -> fit
+MMI.update(model::SomeSupervisedModel, verbosity, old_fitresult, old_cache, X, y, w=nothing) -> fit
 result, cache, report
 ```
 
@@ -685,7 +699,7 @@ sample weights.
 
 If an MLJ `Machine` is being `fit!` and it is not the first time, then
 `update` is called instead of `fit`, unless the machine `fit!` has
-been called with a new `rows` keyword argument. However, `MLJBase`
+been called with a new `rows` keyword argument. However, `MLJModelInterface`
 defines a fallback for `update` which just calls `fit`. For context,
 see [MLJ Internals](internals.md).
 
@@ -699,7 +713,7 @@ generally relevant use-case is iterative models, where calls to
 increase the number of iterations only restarts the iterative
 procedure if other hyperparameters have also changed. (A useful method
 for inspecting model changes in such cases is
-`MLJBase.is_same_except`. ) For an example, see the MLJ [ensemble
+`MLJModelInterface.is_same_except`. ) For an example, see the MLJ [ensemble
 code](https://github.com/alan-turing-institute/MLJ.jl/blob/master/src/ensembles.jl).
 
 A third use-case is to avoid repeating time-consuming preprocessing of
@@ -713,7 +727,7 @@ of `X` and `y`), as this is also passed as an argument to the `update`
 method.
 
 
-### Unsupervised models
+## Unsupervised models
 
 TODO
 
@@ -724,38 +738,38 @@ declares an `output_scitype` trait. Instead of implementing a
 optional `inverse_transform` operation.
 
 
-### Convenience methods
+## Convenience methods
 
 ```@docs
-MLJBase.int
+MLJModelInterface.int
 ```
 
 ```@docs
-MLJBase.classes
+MLJModelInterface.classes
 ```
 
 ```@docs
-MLJBase.decoder
+MLJModelInterface.decoder
 ```
 
 ```@docs
-MLJBase.matrix
+MLJModelInterface.matrix
 ```
 
 ```@docs
-MLJBase.table
+MLJModelInterface.table
 ```
 
 ```@docs
-MLJBase.select
+MLJModelInterface.select
 ```
 
 ```@docs
-MLJBase.selectrows
+MLJModelInterface.selectrows
 ```
 
 ```@docs
-MLJBase.selectcols
+MLJModelInterface.selectcols
 ```
 
 ```@docs
@@ -771,23 +785,23 @@ MLJBase.complement
 ```
 
 ```@docs
-ScientificTypes.schema
+MLJScientificTypes.schema
 ```
 
 ```@docs
-MLJBase.nrows
+MLJModelInterface.nrows
 ```
 
 ```@docs
-ScientificTypes.scitype
+MLJScientificTypes.scitype
 ```
 
 ```@docs
-ScientificTypes.scitype_union
+MLJScientificTypes.scitype_union
 ```
 
 ```@docs
-ScientificTypes.elscitype
+MLJScientificTypes.elscitype
 ```
 
 
@@ -836,10 +850,18 @@ registration. If changes are made, lodge an new issue at
 [MLJ](https://github.com/alan-turing-institute/MLJ) requesting your
 changes to be updated.
 
-### How addd model to the MLJ model registry?
+### How add model to the MLJ model registry?
 
-The MLJ model registry is located in the [MLJModels.jl repository](https://github.com/alan-turing-institute/MLJModels.jl). To add a model, you need to follow these steps
+The MLJ model registry is located in the [MLJModels.jl
+repository](https://github.com/alan-turing-institute/MLJModels.jl). To
+add a model, you need to follow these steps
 
 1) Ensure your model conforms to the interface defined above
-2) Raise an issue at https://github.com/alan-turing-institute/MLJModels.jl/issues and point out where the MLJ-interface implementation is, e.g. by providing a link to the code.
-3) An administrator will then review your implementation and work with you to add the model to the registry
+
+2) Raise an issue at
+https://github.com/alan-turing-institute/MLJModels.jl/issues and point
+out where the MLJ-interface implementation is, e.g. by providing a
+link to the code.
+
+3) An administrator will then review your implementation and work with
+you to add the model to the registry
diff --git a/docs/src/api.md b/docs/src/api.md
index 8d8964b7a..b5e9504de 100755
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -1,12 +1,9 @@
 # API
 
-### Functions
+## Functions
 
-<!-- ```@autodocs -->
-<!-- Modules = [MLJ,MLJBase,MLJModels] -->
-<!-- ``` -->
+TODO
 
-### Index
+## Index
 
-```@index
-```
+TODO
diff --git a/docs/src/built_in_transformers.md b/docs/src/built_in_transformers.md
index 481443f62..258445e33 100644
--- a/docs/src/built_in_transformers.md
+++ b/docs/src/built_in_transformers.md
@@ -1,6 +1,5 @@
 # Built-in Transformers
 
-
 ```@docs
 MLJModels.UnivariateStandardizer
 MLJModels.Standardizer
diff --git a/docs/src/common_mlj_workflows.md b/docs/src/common_mlj_workflows.md
index 400057bad..ab08cf678 100644
--- a/docs/src/common_mlj_workflows.md
+++ b/docs/src/common_mlj_workflows.md
@@ -327,7 +327,7 @@ Bound the wrapped model to data:
 tuned = machine(tuned_forest, X, y)
 ```
 
-Fitting the resultant machine optimizes the hyperaparameters specified
+Fitting the resultant machine optimizes the hyperparameters specified
 in `range`, using the specified `tuning` and `resampling` strategies
 and performance `measure` (possibly a vector of measures), and
 retrains on all data bound to the machine:
@@ -359,7 +359,7 @@ using Plots
 plot(tuned)
 ```
 
-![](workflows_tuning_plot.png)
+![](img/workflows_tuning_plot.png)
 
 Predicting on new data using the optimized model:
 
@@ -367,7 +367,7 @@ Predicting on new data using the optimized model:
 predict(tuned, Xnew)
 ```
 
-# Constructing a linear pipeline
+## Constructing a linear pipeline
 
 *Reference:*   [Composing Models](composing_models.md)
 
@@ -403,7 +403,7 @@ pipe2 = @pipeline MyPipe2(X -> coerce(X, :age=>Continuous),
                                inverse = z -> exp.(z))
 ```
 
-# Creating a homogeneous ensemble of models
+## Creating a homogeneous ensemble of models
 
 *Reference:* [Homogeneous Ensembles](homogeneous_ensembles.md)
 
@@ -415,7 +415,7 @@ forest = machine(forest_model, X, y)
 evaluate!(forest, measure=cross_entropy)
 ```
 
-# Performance curves
+## Performance curves
 
 Generate a plot of performance, as a function of some hyperparameter
 (building on the preceding example)
@@ -432,12 +432,12 @@ curve = learning_curve(forest,
                             verbosity=0)
 ```
 
-    ```julia
+```julia
 using Plots
 plot(curve.parameter_values, curve.measurements, xlab=curve.parameter_name, xscale=curve.parameter_scale)
 ```
 
-![](workflows_learning_curve.png)
+![](img/workflows_learning_curve.png)
 
 Multiple curves:
 
@@ -453,8 +453,8 @@ curve = learning_curve(forest,
 ```
 
 ```julia
-plot(curve.parameter_values, curve.measurements, 
+plot(curve.parameter_values, curve.measurements,
 xlab=curve.parameter_name, xscale=curve.parameter_scale)
 ```
 
-![](workflows_learning_curves.png)
+![](img/workflows_learning_curves.png)
diff --git a/docs/src/composing_models.md b/docs/src/composing_models.md
index 378613774..c3abb4515 100644
--- a/docs/src/composing_models.md
+++ b/docs/src/composing_models.md
@@ -7,13 +7,12 @@ these learning networks can be applied directly to learning tasks,
 they are more commonly used to specify new re-usable, stand-alone,
 composite model types, that behave like any other model type. The main
 novelty of composite models is that they include other models as
-hyper-parameters. 
+hyper-parameters.
 
 That said, MLJ also provides dedicated syntax for the most common
 composition use-cases, which are described first below. A description
 of the general framework begins at [Learning Networks](@ref).
 
-
 ## Linear pipelines
 
 In MLJ a *pipeline* is a composite model in which models are chained
@@ -25,7 +24,6 @@ To illustrate basic construction of a pipeline, consider the following
 toy data:
 
 ```@setup 7
-import Base.eval
 using MLJ
 MLJ.color_off()
 ```
@@ -164,7 +162,7 @@ extra flexibility is essential.
 
 ### Building a simple learning network
 
-![](wrapped_ridge.png)
+![](img/wrapped_ridge.png)
 
 The diagram above depicts a learning network which standardizes the
 input data `X`, learns an optimal Box-Cox transformation for the
diff --git a/docs/src/evaluating_model_performance.md b/docs/src/evaluating_model_performance.md
index d00d99a2c..bc89bb5a5 100644
--- a/docs/src/evaluating_model_performance.md
+++ b/docs/src/evaluating_model_performance.md
@@ -1,9 +1,9 @@
 # Evaluating Model Performance
 
 MLJ allows quick evaluation of a supervised model's performance
-against a battery of selected losses or scores. For more on available
-performance measures, see [Performance
-Measures](performance_measures.md).
+against a battery of selected losses or scores.
+For more on available performance measures, see
+[Performance Measures](performance_measures.md).
 
 In addition to hold-out and cross-validation, the user can specify
 their own list of train/test pairs of row indices for resampling, or
@@ -12,13 +12,11 @@ define their own re-usable resampling strategies.
 For simultaneously evaluating *multiple* models and/or data
 sets, see [Benchmarking](benchmarking.md).
 
-
-### Evaluating against a single measure
+## Evaluating against a single measure
 
 ```@setup evaluation_of_supervised_models
-import Base.eval
 using MLJ
-MLJ.color_off() 
+MLJ.color_off()
 ```
 
 ```@repl evaluation_of_supervised_models
@@ -42,7 +40,7 @@ evaluate!(mach, resampling=cv, measure=l2, verbosity=0)
 (The latter call is a mutating call as the learned parameters stored in the
 machine potentially change. )
 
-### Multiple measures
+## Multiple measures
 
 ```@repl evaluation_of_supervised_models
 evaluate!(mach,
@@ -50,7 +48,7 @@ evaluate!(mach,
           measure=[l1, rms, rmslp1], verbosity=0)
 ```
 
-### Custom measures and weighted measures
+## Custom measures and weighted measures
 
 ```@repl evaluation_of_supervised_models
 my_loss(yhat, y) = maximum((yhat - y).^2);
@@ -71,7 +69,7 @@ evaluate!(mach,
           weights=weights, verbosity=0)
 ```
 
-### User-specified train/test sets
+## User-specified train/test sets
 
 Users can either provide their own list of train/test pairs of row indices for resampling, as in this example:
 
@@ -86,7 +84,7 @@ Or define their own re-usable `ResamplingStrategy` objects, - see
 [Custom resampling strategies](@ref) below.
 
 
-### Built-in resampling strategies
+## Built-in resampling strategies
 
 
 ```@docs
@@ -102,7 +100,7 @@ MLJBase.StratifiedCV
 ```
 
 
-### Custom resampling strategies
+## Custom resampling strategies
 
 To define your own resampling strategy, make relevant parameters of
 your strategy the fields of a new type `MyResamplingStrategy <:
@@ -113,6 +111,7 @@ MLJ.train_test_pairs(my_strategy::MyResamplingStrategy, rows)
 MLJ.train_test_pairs(my_strategy::MyResamplingStrategy, rows, y)
 MLJ.train_test_pairs(my_strategy::MyResamplingStrategy, rows, X, y)
 ```
+
 Each method takes a vector of indices `rows` and return a
 vector `[(t1, e1), (t2, e2), ... (tk, ek)]` of train/test pairs of row
 indices selected from `rows`. Here `X`, `y` are the input and target
@@ -154,7 +153,7 @@ function train_test_pairs(holdout::Holdout, rows)
 end
 ```
 
-### API
+## API
 
 ```@docs
 MLJBase.evaluate!
diff --git a/docs/src/frequently_asked_questions.md b/docs/src/frequently_asked_questions.md
index a4fa5fefb..32e98f27d 100755
--- a/docs/src/frequently_asked_questions.md
+++ b/docs/src/frequently_asked_questions.md
@@ -1,6 +1,6 @@
 # Frequently Asked Questions
 
-### Julia already has a great machine learning toolbox, ScitkitLearn.jl. Why MLJ?
+## Julia already has a great machine learning toolbox, ScitkitLearn.jl. Why MLJ?
 
 An alternative machine learning toolbox for Julia users is
 [ScikitLearn.jl](https://github.com/cstjean/ScikitLearn.jl). Initially
@@ -27,14 +27,14 @@ term:
   hyperparameters, using automatic differentiation libraries such as
   Flux.jl; and (ii) GPU performance boosts without major code
   refactoring, using CuArrays.jl.
-  
+
 - **Registry for model metadata.** In ScikitLearn.jl the list of
   available models, as well as model metadata (whether a model handles
   categorical inputs, whether is can make probabilistic predictions,
   etc) must be gleaned from documentation. In MLJ, this information is
   more structured and is accessible to MLJ via a searchable model
   registry (without the models needing to be loaded).
-  
+
 - **Flexible API for model composition.** Pipelines in scikit-learn are
   more of an afterthought than an integral part of the original
   design. By contrast, MLJ's user-interaction API was predicated on the
@@ -55,7 +55,7 @@ term:
   [skpro](https://github.com/alan-turing-institute/skpro) project, MLJ
   aims to improve support for Bayesian statistics and probabilistic
   graphical models.
-  
+
 - **Universal adoption of categorical data types.** Python's
   scientific array library NumPy has no dedicated data type for
   representing categorical data (i.e., no type that tracks the pool of
@@ -71,7 +71,6 @@ term:
   probabilistic prediction will nevertheless predict a distribution
   whose support includes the missing class, but which is appropriately
   weighted with probability zero.
-  
+
 Finally, we note that a large number of ScikitLearn.jl models are now
 wrapped for use in MLJ.
-  
diff --git a/docs/src/glossary.md b/docs/src/glossary.md
index 032e31fba..9fd8160b5 100755
--- a/docs/src/glossary.md
+++ b/docs/src/glossary.md
@@ -2,16 +2,15 @@
 
 Note: This glossary includes some detail intended mainly for MLJ developers.
 
-### Basics
+## Basics
 
-#### task (object of type `Task`)
+### task (object of type `Task`)
 
 Data plus a learning objective (e.g., "probabilistic prediction of
 Sales"). In MLJ a task does not include a description of how the
 completed task is to be evaluated.
 
-
-#### hyperparameters
+### hyperparameters
 
 Parameters on which some learning algorithm depends, specified before
 the algorithm is applied, and where learning is interpreted in the
@@ -20,16 +19,16 @@ broadest sense. For example, PCA feature reduction is a
 data, governed by a dimension hyperparameter. Hyperparameters in our
 sense may specify configuration (eg, number of parallel processes)
 even when this does not effect the end-product of learning. (But we
-exlcude verbosity level.)
+exclude verbosity level.)
 
-#### model (object of abstract type `Model`)
+### model (object of abstract type `Model`)
 
 Object collecting together hyperameters of a single algorithm. Most
 models are classified either as *supervised* or *unsupervised* models
 (generally, "transformers").
 
 
-#### fit-result (type generally defined outside of MLJ)
+### fit-result (type generally defined outside of MLJ)
 
 Also known as "learned" or "fitted" parameters, these are "weights",
 "coefficients", or similar paramaters learned by an algorithm, after
@@ -38,7 +37,7 @@ of a random forest, the coefficients and intercept of a linear model,
 or the rotation and projection matrices of PCA reduction scheme.
 
 
-#### operation
+### operation
 
 Data-manipulating operations (methods) parameterized by some
 fit-result. For supervised learners, the `predict`, `predict_mean`,
@@ -49,12 +48,11 @@ on a fit-result (e.g., a broadcasted logarithm) which is then called
 *static* operation for clarity. An operation that is not static is
 *dynamic*.
 
-
-#### machine (object of type `Machine`)
+### machine (object of type `Machine`)
 
 An object consisting of:
 
-(1) A model 
+(1) A model
 
 (2) A fit-result (undefined until training)
 
@@ -73,19 +71,17 @@ Machines are trained by calls to a `fit` method which may be
 passed an optional argument specifying the rows of data to be used in
 training.
 
-
-### Learning Networks and Composite Models
+## Learning Networks and Composite Models
 
 *Note:* Multiple nodal machines may share the same model, and
 multiple learning nodes may share the same nodal machine.
 
-#### source node (object of type `Source`)
+### source node (object of type `Source`)
 
 A container for training data and point of entry for new data in a
 learning network (see below).
 
-
-#### nodal machine (object of type `NodalMachine`)
+### nodal machine (object of type `NodalMachine`)
 
 Like a machine with the following exceptions:
 
@@ -93,40 +89,33 @@ Like a machine with the following exceptions:
 in the learning network, instead of data.
 
 (2) The object internally records dependencies on other other nodal
-machines, as implied by the training arguments, and so on. 
+machines, as implied by the training arguments, and so on.
 
 
-####  node (object of type `Node`)
+###  node (object of type `Node`)
 
 Essentially a nodal machine wrapped in an associated operation
 (e.g., `predict` or `inverse_transform`). It detail, it consists of:
 
-(1) An operation, static or dynamic.
+1. An operation, static or dynamic.
+1. A nodal machine, void if the operation is static.
+1. Upstream connections to other learning or source nodes, specified by a list of *arguments* (one for each argument of the operation).
+1. Metadata recording the dependencies of the object's machine, and the dependecies on other nodal machines implied by its arguments, and the training arguments of its nodel machine.
 
-(2) A nodal machine, void if the operation is static.
 
-(3) Upstream connections to other learning or source nodes, specified by a list
-   of *arguments* (one for each argument of the operation).
-   
-(4) Metadata recording the dependencies of the object's machine, and
-the dependecies on other nodal machines implied by its
-arguments, and the training arguments of its nodel machine.
-
-
-#### learning network 
+### learning network
 
 An acyclic directed graph implicit in the connections of a collection
 of source(s) and nodes. Each connected component is ordinarily
 restricted to have a unique source.
 
 
-#### wrapper
+### wrapper
 
 Any model with one or more other models as hyperparameters.
 
 
-#### composite model
+### composite model
 
 Any wrapper, or any learning network, "exported" as a model (see
 [Composing Models](composing_models.md)).
-
diff --git a/docs/src/MLPackages.png b/docs/src/img/MLPackages.png
similarity index 100%
rename from docs/src/MLPackages.png
rename to docs/src/img/MLPackages.png
diff --git a/docs/src/heatmap.png b/docs/src/img/heatmap.png
similarity index 100%
rename from docs/src/heatmap.png
rename to docs/src/img/heatmap.png
diff --git a/docs/src/learning_curve42.png b/docs/src/img/learning_curve42.png
similarity index 100%
rename from docs/src/learning_curve42.png
rename to docs/src/img/learning_curve42.png
diff --git a/docs/src/learning_curve_n.png b/docs/src/img/learning_curve_n.png
similarity index 100%
rename from docs/src/learning_curve_n.png
rename to docs/src/img/learning_curve_n.png
diff --git a/docs/src/learningcurves.png b/docs/src/img/learningcurves.png
similarity index 100%
rename from docs/src/learningcurves.png
rename to docs/src/img/learningcurves.png
diff --git a/docs/src/scitypes.png b/docs/src/img/scitypes.png
similarity index 100%
rename from docs/src/scitypes.png
rename to docs/src/img/scitypes.png
diff --git a/docs/src/scitypes_small.png b/docs/src/img/scitypes_small.png
similarity index 100%
rename from docs/src/scitypes_small.png
rename to docs/src/img/scitypes_small.png
diff --git a/docs/src/tuning_plot.png b/docs/src/img/tuning_plot.png
similarity index 100%
rename from docs/src/tuning_plot.png
rename to docs/src/img/tuning_plot.png
diff --git a/docs/src/two_model_stack.png b/docs/src/img/two_model_stack.png
similarity index 100%
rename from docs/src/two_model_stack.png
rename to docs/src/img/two_model_stack.png
diff --git a/docs/src/workflows_learning_curve.png b/docs/src/img/workflows_learning_curve.png
similarity index 100%
rename from docs/src/workflows_learning_curve.png
rename to docs/src/img/workflows_learning_curve.png
diff --git a/docs/src/workflows_learning_curves.png b/docs/src/img/workflows_learning_curves.png
similarity index 100%
rename from docs/src/workflows_learning_curves.png
rename to docs/src/img/workflows_learning_curves.png
diff --git a/docs/src/workflows_tuning_plot.png b/docs/src/img/workflows_tuning_plot.png
similarity index 100%
rename from docs/src/workflows_tuning_plot.png
rename to docs/src/img/workflows_tuning_plot.png
diff --git a/docs/src/wrapped_ridge.png b/docs/src/img/wrapped_ridge.png
similarity index 100%
rename from docs/src/wrapped_ridge.png
rename to docs/src/img/wrapped_ridge.png
diff --git a/docs/src/index.md b/docs/src/index.md
index dab909305..301ba05d9 100755
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -5,15 +5,14 @@
 
 
 ```@setup doda
-import Base.eval # hack b/s auto docs put's code in baremodule
-import Random.seed! 
+import Random.seed!
 using MLJ
 using InteractiveUtils
 MLJ.color_off()
-seed!(1234) 
+seed!(1234)
 ```
 
-### Choosing and evaluating a model
+## Choosing and evaluating a model
 
 To load some demonstration data, add
 [RDatasets](https://github.com/JuliaStats/RDatasets.jl) to your load
@@ -43,7 +42,7 @@ learning algorithm indicated by the struct name.
 Assuming the DecisionTree.jl package is in your load path, we can use
 `@load` to load the code defining the `DecisionTreeClassifier` model
 type. This macro also returns an instance, with default
-hyperparameters. 
+hyperparameters.
 
 Drop the `verbosity=1` declaration for silent loading:
 
@@ -59,7 +58,7 @@ how to add the package to your current environment.
 Once loaded, a model can be evaluated with the `evaluate` method:
 
 ```@repl doda
-evaluate(tree_model, X, y, 
+evaluate(tree_model, X, y,
          resampling=CV(shuffle=true), measure=cross_entropy, verbosity=0)
 ```
 
@@ -67,7 +66,7 @@ Evaluating against multiple performance measures is also possible. See
 [Evaluating Model Performance](evaluating_model_performance.md) for details.
 
 
-### A preview of data type specification in MLJ
+## A preview of data type specification in MLJ
 
 The target `y` above is a categorical vector, which is appropriate
 because our model is a decision tree *classifier*:
@@ -109,13 +108,13 @@ yint = Int.(y.refs);
 scitype(yint)
 ```
 
-and using `yint` in place of `y` in classification problems will fail. 
+and using `yint` in place of `y` in classification problems will fail.
 
 For more on scientific types, see [Data containers and scientific
 types](@ref) below.
 
 
-### Fit and predict
+## Fit and predict
 
 To illustrate MLJ's fit and predict interface, let's perform our
 performance evaluations by hand, but using a simple holdout set,
@@ -193,8 +192,7 @@ evaluate!(tree, resampling=Holdout(fraction_train=0.7, shuffle=true),
           verbosity=0)
 ```
 
-
-### Next steps
+## Next steps
 
 To learn a little more about what MLJ can do, browse [Common MLJ
 Workflows](common_mlj_workflows.md) or MLJ's
@@ -203,7 +201,7 @@ returning to the manual as needed. *Read at least the remainder of
 this page before considering serious use of MLJ.*
 
 
-### Prerequisites
+## Prerequisites
 
 MLJ assumes some familiarity with the `CategoricalValue` and
 `CategoricalString` types from
@@ -214,14 +212,14 @@ predictors, a basic acquaintance with
 also assumed.
 
 
-### Data containers and scientific types
+## Data containers and scientific types
 
 The MLJ user should acquaint themselves with some
 basic assumptions about the form of data expected by MLJ, as outlined
-below. 
+below.
 
 ```
-machine(model::Supervised, X, y) 
+machine(model::Supervised, X, y)
 machine(model::Unsupervised, X)
 ```
 
@@ -232,26 +230,28 @@ as `Array{Float32, 2}`). Similar remarks apply to the input `X` of an
 unsupervised model.
 
 Scientific types are julia types defined in the package
-[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl),
-which also defines the convention used here (and there called *mlj*)
-for assigning a specific scientific type (interpretation) to each
-julia object (see the `scitype` examples below).
+[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl);
+the package
+[MLJScientificTypes](https://github.com/alan-turing-institute/MLJScientificTypes.jl)
+implements the particular convention used in the MLJ universe for
+assigning a specific scientific type (interpretation) to each julia
+object (see the `scitype` examples below).
 
 The basic "scalar" scientific types are `Continuous`, `Multiclass{N}`,
 `OrderedFactor{N}` and `Count`. Be sure you read [Container element
 types](@ref) below to be guarantee your scalar data is interpreted
 correctly. Tools exist to coerce the data to have the appropriate
 scientfic type; see
-[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl)
+[MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl)
 or run `?coerce` for details.
- 
+
 Additionally, most data containers - such as tuples,
 vectors, matrices and tables - have a scientific type.
 
 
-![](scitypes.png)
+![](img/scitypes.png)
 
-*Figure 1. Part of the scientific type heirarchy in* ScientificTypes.jl.
+*Figure 1. Part of the scientific type hierarchy in* [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl).
 
 ```@repl doda
 scitype(4.6)
@@ -262,7 +262,7 @@ X = (x1=x1, x2=rand(4), x3=rand(4))  # a "column table"
 scitype(X)
 ```
 
-#### Tabular data
+### Tabular data
 
 All data containers compatible with the
 [Tables.jl](https://github.com/JuliaData/Tables.jl) interface (which
@@ -275,8 +275,7 @@ of the columns, which can be individually inspected using `schema`:
 schema(X)
 ```
 
-
-#### Inputs
+### Inputs
 
 Since an MLJ model only specifies the scientific type of data, if that
 type is `Table` - which is the case for the majority of MLJ models -
@@ -288,31 +287,27 @@ MLJ.table(Xmatrix)`.
 Specifically, the requirement for an arbitrary model's input is `scitype(X)
 <: input_scitype(model)`.
 
-
-#### Targets
+### Targets
 
 The target `y` expected by MLJ models is generally an
-`AbstractVector`. A multivariate target `y` will generally be table. 
+`AbstractVector`. A multivariate target `y` will generally be table.
 
 Specifically, the type requirement for a model target is `scitype(y) <:
 target_scitype(model)`.
 
-#### Querying a model for acceptable data types 
+### Querying a model for acceptable data types
 
 Given a model instance, one can inspect the admissible scientific
 types of its input and target by querying the scientific type of
 the model itself:
- 
+
 ```@setup doda
 tree = @load DecisionTreeClassifier
 ```
 
-```@julia doda
-julia> tree = DecisionTreeClassifier();
-julia> scitype(tree)
-(input_scitype = ScientificTypes.Table{#s13} where #s13<:(AbstractArray{#s12,1} where #s12<:Continuous),
- target_scitype = AbstractArray{#s21,1} where #s21<:Finite,
- is_probabilistic = true,)
+```@repl doda
+tree = DecisionTreeClassifier();
+scitype(tree)
 ```
 
 This does not work if relevant model code has not been loaded. In that
@@ -324,26 +319,26 @@ info("DecisionTreeClassifier")
 ```
 
 
-#### Container element types
+### Container element types
 
-Models in MLJ will always apply the *mlj* convention described in
-[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl)
+Models in MLJ will always apply the `MLJ` convention described in
+[MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl)
 to decide how to interpret the elements of your container types. Here
-are the key aspects of that convention:
+are the key features of that convention:
 
 - Any `AbstractFloat` is interpreted as `Continuous`.
 
-- Any `Integer` is interpreted as `Count`. 
+- Any `Integer` is interpreted as `Count`.
 
 - Any `CategoricalValue` or `CategoricalString`, `x`, is interpreted
   as `Multiclass` or `OrderedFactor`, depending on the value of
-  `x.pool.ordered`. 
-    
+  `x.pool.ordered`.
+
 - `String`s and `Char`s are *not* interpreted as `Finite`; they have
   `Unknown` scitype. Coerce vectors of strings or characters to
   `CategoricalVector`s if they represent `Multiclass` or
-  `OrderedFactor` data. Do `?coerce` and `?unpack` to learn how. 
-  
+  `OrderedFactor` data. Do `?coerce` and `?unpack` to learn how.
+
 - In particular, *integers* (including `Bool`s) *cannot be used to
   represent categorical data.*
 
@@ -353,11 +348,3 @@ represented by an ordered `CategoricalValue` or
 `CategoricalString`. This data will have scitype `OrderedFactor{2}`
 and the "true" class is understood to be the *second* class in the
 ordering.
-
-
-
-
-
-
-
-
diff --git a/docs/src/internals.md b/docs/src/internals.md
index 90ddae3f7..5a50e7358 100755
--- a/docs/src/internals.md
+++ b/docs/src/internals.md
@@ -1,13 +1,13 @@
-G# Internals
+# Internals
 
-### The machine interface, simplified
+## The machine interface, simplified
 
 The following is simplified description of the `Machine`
 interface. See also the [Glossary](glossary.md)
 
-#### The Machine type
+### The Machine type
 
-````julia 
+````julia
 mutable struct Machine{M<Model}
 
     model::M
@@ -15,8 +15,8 @@ mutable struct Machine{M<Model}
     cache
     args::Tuple    # e.g., (X, y) for supervised models
     report
-    previous_rows # remember last rows used 
-    
+    previous_rows # remember last rows used
+
     function Machine{M}(model::M, args...) where M<:Model
         machine = new{M}(model)
         machine.args = args
@@ -26,31 +26,31 @@ mutable struct Machine{M<Model}
 
 end
 ````
-    
-#### Constructor
+
+### Constructor
 
 ````julia
 machine(model::M, Xtable, y) = Machine{M}(model, Xtable, y)
 ````
 
-#### fit! and predict/transform
+### fit! and predict/transform
 
 ````julia
-function fit!(machine::Machine; rows=nothing, force=false, verbosity=1) 
+function fit!(machine::Machine; rows=nothing, force=false, verbosity=1)
 
     warning = clean!(mach.model)
-    isempty(warning) || verbosity < 0 || @warn warning 
+    isempty(warning) || verbosity < 0 || @warn warning
 
     if rows === nothing
-        rows = (:) 
+        rows = (:)
     end
 
-    rows_have_changed  = (!isdefined(mach, :previous_rows) || 
+    rows_have_changed  = (!isdefined(mach, :previous_rows) ||
 	    rows != mach.previous_rows)
 
     args = [MLJ.selectrows(arg, rows) for arg in mach.args]
-	
-    if !isdefined(mach, :fitresult) || rows_have_changed || force 
+
+    if !isdefined(mach, :fitresult) || rows_have_changed || force
         mach.fitresult, mach.cache, report =
             fit(mach.model, verbosity, args...)
     else # call `update`:
@@ -86,7 +86,3 @@ function transform(machine::Machine{<:Unsupervised}, Xnew)
     end
 end
 ````
-
-
-
-
diff --git a/docs/src/julia_blogpost.md b/docs/src/julia_blogpost.md
index bacfa5db9..a1013d1a1 100644
--- a/docs/src/julia_blogpost.md
+++ b/docs/src/julia_blogpost.md
@@ -5,9 +5,9 @@ Anthony Blaom, Diego Arenas, Franz Kiraly, Yiannis Simillides, Sebastian Vollmer
 **May 1st, 2019.** Blog post also posted on the [Julia Language Blog](https://julialang.org/blog/2019/05/beyond-ml-pipelines-with-mlj)
 
 
-![](learningcurves.png) | ![](heatmap.png)
+![](img/learningcurves.png) | ![](img/heatmap.png)
 ------------------------|--------------------------
-![](wrapped_ridge.png)  | ![](MLPackages.png)
+![](img/wrapped_ridge.png)  | ![](img/MLPackages.png)
 
 
 ## Introduction
@@ -31,17 +31,17 @@ composition.
 
 - [MLJ vs ScikitLearn.jl](https://alan-turing-institute.github.io/MLJ.jl/dev/frequently_asked_questions/)  
 
-- Video from [London Julia User Group meetup in March 2019](https://www.youtube.com/watch?v=CfHkjNmj1eE) (skip to [demo at 21'39](https://youtu.be/CfHkjNmj1eE?t=21m39s)) &nbsp; 
+- Video from [London Julia User Group meetup in March 2019](https://www.youtube.com/watch?v=CfHkjNmj1eE) (skip to [demo at 21'39](https://youtu.be/CfHkjNmj1eE?t=21m39s)) &nbsp;
 
 - [![London Julia User Group](0_small.jpg)](http://www.youtube.com/watch?v=CfHkjNmj1eE)
 
-- The MLJ [tour](https://github.com/alan-turing-institute/MLJ.jl/blob/master/docs/src/tour.ipynb) 
+- The MLJ [tour](https://github.com/alan-turing-institute/MLJ.jl/blob/master/docs/src/tour.ipynb)
 
 - Building a [self-tuning random forest](https://github.com/alan-turing-institute/MLJ.jl/blob/master/examples/random_forest.ipynb)
- 
+
 - An MLJ [docker image](https://github.com/ysimillides/mlj-docker) (including tour)
 
-- Implementing the MLJ interface for a [new model](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/) 
+- Implementing the MLJ interface for a [new model](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/)
 
 - How to [contribute](https://github.com/alan-turing-institute/MLJ.jl/blob/master/CONTRIBUTE.md)
 
@@ -60,19 +60,19 @@ MLJ already has substantial functionality:
 - **Automatic tuning.** Automated tuning of hyperparameters, including
   composite models. Tuning implemented as a model wrapper for
   composition with other meta-algorithms.
-  
+
 - **Homogeneous model ensembling.**
 
 - **Registry for model metadata.** Metadata available without loading
   model code. Basis of a "task" interface and facilitates
   model composition.
-  
+
 - **Task interface.** Automatically match models to specified learning
   tasks, to streamline benchmarking and model selection.
-  
+
 - **Clean probabilistic API.** Improves support for Bayesian
   statistics and probabilistic graphical models.
-  
+
 - **Data container agnostic.** Present and manipulate data in your
   favorite Tables.jl format.
 
@@ -93,7 +93,7 @@ see this
 [FAQ](https://github.com/alan-turing-institute/MLJ.jl/blob/master/docs/src/frequently_asked_questions.md).
 
 
-## Learning networks 
+## Learning networks
 
 MLJ's model composition interface is flexible enough to implement, for
 example, the [model
@@ -104,7 +104,7 @@ in prediction and training modes is different. This can be seen from
 the following schematic of a simple two-model stack, viewed as a
 network:
 
-![](two_model_stack.png)
+![](img/two_model_stack.png)
 
 ## Building a simple network
 
@@ -146,7 +146,7 @@ Xnewt = transform(hot, Xnew);
 yhat = predict(tree, Xnewt);
 yhat[1:3]
  3-element Array{Float64,1}:
-  223956.9999999999 
+  223956.9999999999
   320142.85714285733
   161227.49999999994
 ```
@@ -169,7 +169,7 @@ yhat = predict(tree, Xt)
 
 If we like, we can think of a node as *dynamic data* - "data" because
 it can be called (indexed) on rows, but "dynamic" because the result
-depends on the outcome of training events, which in turn depend on 
+depends on the outcome of training events, which in turn depend on
 hyperparameter values. For example, after fitting the completed pipeline,
 we can make new predictions like this:
 
@@ -184,7 +184,7 @@ fit!(yhat, rows=1:1300)
 yhat(rows=1301:1302) # to predict on rows of source node
 yhat(Xnew)           # to predict on new data
 156-element Array{Float64,1}:
- 223956.9999999999 
+ 223956.9999999999
  320142.85714285733
  ...
 ```
@@ -223,4 +223,4 @@ read. In this respect we have been inspired by [On Machine Learning
 and Programming Languages](https://julialang.org/blog/2017/12/ml&pl).
 
 ## Invitation to the community
-We now invite the community to try out our newly registered packages, [MLJ](https://github.com/alan-turing-institute/MLJ.jl)alongside [MLJModels](https://github.com/alan-turing-institute/MLJModels.jl), and provide any feedback or suggestions you may have going forward. We are also particularly interested in hearing how you would use our package, and what features it may be lacking. 
+We now invite the community to try out our newly registered packages, [MLJ](https://github.com/alan-turing-institute/MLJ.jl)alongside [MLJModels](https://github.com/alan-turing-institute/MLJModels.jl), and provide any feedback or suggestions you may have going forward. We are also particularly interested in hearing how you would use our package, and what features it may be lacking.
diff --git a/docs/src/learning_curves.md b/docs/src/learning_curves.md
index f08dc858e..05d282a0f 100644
--- a/docs/src/learning_curves.md
+++ b/docs/src/learning_curves.md
@@ -32,7 +32,7 @@ plot(curve.parameter_values,
      ylab = "CV estimate of RMS error")
 ```
 
-![](learning_curve42.png)
+![](img/learning_curve42.png)
 
 In the case of the number of iterations in some iterative model,
 `learning_curve` will not restart the training from scratch for each
@@ -57,10 +57,10 @@ plot(curves.parameter_values,
      ylab="Holdout estimate of RMS error")
 ```
 
-![](learning_curve_n.png)
+![](img/learning_curve_n.png)
 
 
-### API reference
+## API reference
 
 ```@docs
 MLJTuning.learning_curve
diff --git a/docs/src/machines.md b/docs/src/machines.md
index ee9966839..65f4fcd09 100644
--- a/docs/src/machines.md
+++ b/docs/src/machines.md
@@ -20,9 +20,10 @@ X, y = @load_iris;
 mach = machine(forest, X, y)
 fit!(mach, verbosity=2);
 ```
-    
+
 Generally, changing a hyperparameter triggers retraining on calls to
 subsequent `fit!`:
+
 ```@repl machines
 forest.bagging_fraction=0.5
 fit!(mach, verbosity=2);
@@ -48,14 +49,14 @@ However, retraining can be forced:
 fit!(mach, force=true);
 ```
 
-And is retriggered if the view of the data changes:
+And is re-triggered if the view of the data changes:
 
-```@repl machines 
+```@repl machines
 fit!(mach, rows=1:100);
 ```
 
-```@repl machines 
-fit!(mach, rows=1:100); 
+```@repl machines
+fit!(mach, rows=1:100);
 ```
 
 For a supervised machine the `predict` method calls a lower-level
@@ -84,11 +85,11 @@ Here is a complete list of the fields of a machine:
 Instead of data `X` and `y`, the `machine` constructor can be provided
 `Node` or `Source` objects ("dynamic data") to obtain a
 `NodalMachine`, rather than a regular `Machine` object, which includes
-the same fields listed above. See [Composing
-Models](composing_models.md) for more on this advanced feature.
+the same fields listed above.
+See [Composing Models](composing_models.md) for more on this advanced feature.
 
 
-### Inspecting machines
+## Inspecting machines
 
 There are two methods for inspecting the outcomes of training in
 MLJ. To obtain a named-tuple describing the learned parameters, in a
@@ -108,7 +109,7 @@ report(mach)
 ```
 
 
-### API Reference
+## API Reference
 
 ```@docs
 fit!
diff --git a/docs/src/mlj_cheatsheet.md b/docs/src/mlj_cheatsheet.md
index 3cb25b235..719cf8dd2 100644
--- a/docs/src/mlj_cheatsheet.md
+++ b/docs/src/mlj_cheatsheet.md
@@ -1,14 +1,14 @@
 # MLJ Cheatsheet
 
 
-#### Starting an interactive MLJ session
+## Starting an interactive MLJ session
 
 ```@repl cheat
 using MLJ
 MLJ_VERSION # version of MLJ for this cheatsheet
 ```
 
-#### Model search and code loading
+## Model search and code loading
 
 `info("PCA")` retrieves registry metadata for the model called "PCA"
 
@@ -43,11 +43,11 @@ end
 instantiates a model provided by multiple packages
 
 
-#### Scitypes and coercion
+## Scitypes and coercion
 
 `scitype(x)` is the scientific type of `x`. For example `scitype(2.4) = Continuous`
 
-![scitypes.png](scitypes_small.png)
+![scitypes.png](img/scitypes_small.png)
 
 type                                       | scitype
 -------------------------------------------|----------------------------------
@@ -64,7 +64,7 @@ Use `schema(X)` to get the column scitypes of a table `X`
 `coerce(X, :x1 => Continuous, :x2 => OrderedFactor)` to coerce columns `:x1` and `:x2` of table `X`.
 
 
-### Ingesting data
+## Ingesting data
 
 Splitting any table into target and input (note semicolon):
 
@@ -84,7 +84,7 @@ Splitting row indices into train/validation/test:
 `train, valid, test = partition(eachindex(y), 0.7, 0.2, shuffle=true, rng=1234)` for 70:20:10 ratio
 
 
-#### Machine construction
+## Machine construction
 
 Supervised case:
 
@@ -94,13 +94,12 @@ Unsupervised case:
 
 `model = OneHotEncoder()` and `mach = machine(model, X)`
 
-
-#### Fitting
+## Fitting
 
 `fit!(mach, rows=1:100, verbosity=1, force=false)`
 
 
-#### Prediction
+## Prediction
 
 Supervised case: `predict(mach, Xnew)` or `predict(mach, rows=1:100)`
 
@@ -109,7 +108,7 @@ Similarly, for probabilistic models: `predict_mode`, `predict_mean` and `predict
 Unsupervised case: `transform(mach, rows=1:100)` or `inverse_transform(mach, rows)`, etc.
 
 
-#### Inspecting objects
+## Inspecting objects
 
 `@more` gets detail on last object in REPL
 
@@ -128,8 +127,7 @@ pkg="MultivariateStats")` gets all properties (aka traits) of registered models
 
 `report(mach)` gets other training results (e.g. feature rankings)
 
-
-#### Resampling strategies
+## Resampling strategies
 
 `Holdout(fraction_train=…, shuffle=false)` for simple holdout
 
@@ -139,15 +137,15 @@ or a list of pairs of row indices:
 
 `[(train1, eval1), (train2, eval2), ... (traink, evalk)]`
 
-
-#### Performance estimation
+## Performance estimation
 
 `evaluate(model, X, y, resampling=CV(), measure=rms, operation=predict, weights=..., verbosity=1)`
 `evaluate!(mach, resampling=Holdout(), measure=[rms, mav], operation=predict, weights=..., verbosity=1)`
 `evaluate!(mach, resampling=[(fold1, fold2), (fold2, fold1)], measure=rms)`
 
+## Tuning
 
-#### Ranges for tuning
+### Ranges for tuning
 
 If `r = range(KNNRegressor(), :K, lower=1, upper = 20, scale=:log)` then `iterator(r, 6) = [1, 2, 3, 6, 11, 20]`
 
@@ -155,13 +153,11 @@ Non-numeric ranges: `r = range(model, :parameter, values=…)`.
 
 Nested ranges: Use dot syntax, as in `r = range(EnsembleModel(atom=tree), :(atom.max_depth), ...)`
 
-
-#### Tuning strategies
+### Tuning strategies
 
 `Grid(resolution=10)` for grid search
 
-
-#### Tuning model wrapper
+### Tuning model wrapper
 
 `tuned_model = TunedModel(model=…, tuning=Grid(), resampling=Holdout(), measure=…, operation=predict, ranges=…, minimize=true, full_report=true)`
 
@@ -175,7 +171,7 @@ If using Plots.jl:
 `plot(curve.parameter_values, curve.measurements, xlab=curve.parameter_name, xscale=curve.parameter_scale)`
 
 
-#### Built-in performance measures
+## Built-in performance measures
 
 `l1`, `l2`, `mav`, `rms`, `rmsl`, `rmslp1`, `rmsp`, `misclassification_rate`, `cross_entropy`
 
@@ -184,7 +180,7 @@ If using Plots.jl:
 `using LossFunctions` to use more measures
 
 
-#### Transformers
+## Transformers
 
 Built-ins include: `Standardizer`, `OneHotEncoder`, `UnivariateBoxCoxTransformer`, `FeatureSelector`, `UnivariateStandardizer`
 
@@ -193,12 +189,12 @@ Externals include: `PCA` (in MultivariateStats), `KMeans`, `KMedoids` (in Cluste
 Full list: do `models(m -> !m[:is_supervised])`
 
 
-#### Ensemble model wrapper
+## Ensemble model wrapper
 
 `EnsembleModel(atom=…, weights=Float64[], bagging_fraction=0.8, rng=GLOBAL_RNG, n=100, parallel=true, out_of_bag_measure=[])`
 
 
-#### Pipelines
+## Pipelines
 
 With point predictions:
 
@@ -214,7 +210,7 @@ Unsupervised:
 `pipe = @pipeline MyPipe(stand=Standardizer(), hot=OneHotEncoder())`
 
 
-#### Define a supervised learning network:
+## Define a supervised learning network:
 
 `Xs = source(X)`
 `ys = source(y, kind=:target)`
@@ -224,7 +220,7 @@ Unsupervised:
 `yhat = predict(knn_machine, W, ys)` (final node)
 
 
-#### Exporting a learning network as stand-alone model:
+## Exporting a learning network as stand-alone model:
 
 Supervised, with final node `yhat` returning point-predictions:
 
@@ -239,4 +235,3 @@ Supervised, with `yhat` final node returning probabilistic predictions:
 Unsupervised, with final node `Xout`:
 
 `@from_network Composite(pca=network_pca) <= Xout`
-
diff --git a/docs/src/model_search.md b/docs/src/model_search.md
index 0c9faa41c..892af90c2 100644
--- a/docs/src/model_search.md
+++ b/docs/src/model_search.md
@@ -1,4 +1,4 @@
-# Model Search 
+# Model Search
 
 MLJ has a model registry, allowing the user to search models and their
 properties, without loading all the packages containing model code. In
@@ -7,7 +7,7 @@ machine learning task. The task itself is specified with the help of
 the `matching` method, and the search executed with the `models`
 methods, as detailed below.
 
-### Model metadata
+## Model metadata
 
 *Terminology.* In this section the word "model" refers to the metadata
 entry in the registry of an actual model `struct`, as appearing
@@ -29,7 +29,7 @@ the same name occur in different packages, the package name must be
 specified, as in `info("LinearRegressor", pkg="GLM")`.
 
 
-### General model queries
+## General model queries
 
 We list all models (named tuples) using `models()`, and list the models for which code is  already loaded with `localmodels()`:
 
@@ -38,12 +38,12 @@ localmodels()
 localmodels()[2]
 ```
 
-If `models` is passed any `Bool`-valued function `test`, it returns every `model` for which `test(model)` is true, as in 
+If `models` is passed any `Bool`-valued function `test`, it returns every `model` for which `test(model)` is true, as in
 
 ```@repl tokai
 test(model) = model.is_supervised &&
-                MLJ.Table(Continuous) <: model.input_scitype &&
-                AbstractVector{<:Multiclass{3}} <: model.target_scitype &&
+                model.input_scitype >: MLJ.Table(Continuous) &&
+                model.target_scitype >: AbstractVector{<:Multiclass{3}} &&
                 model.prediction_type == :deterministic
 models(test)
 ```
@@ -52,7 +52,7 @@ Multiple test arguments may be passed to `models`, which are applied
 conjunctively.
 
 
-### Matching models to data 
+## Matching models to data
 
 !!! note
     The `matching` method described below is experimental and may
@@ -64,11 +64,11 @@ command, defined as follows:
 - `matching(model, X, y) == true` exactly when `model` is supervised
    and admits inputs and targets with the scientific types of `X` and
    `y`, respectively
-   
+
 - `matching(model, X) == true` exactly when `model` is unsupervised
    and admits inputs with the scientific types of `X`.
-   
-So, to search for all supervised probablistic models handling input
+
+So, to search for all supervised probabilistic models handling input
 `X` and target `y`, one can define the testing function `task` by
 
 ```julia
@@ -82,26 +82,26 @@ models(task)
 ```
 
 Also defined are `Bool`-valued callable objects `matching(model)`,
-`matching(X, y)` and `matching(X)`, with obvious behaviour. For example, 
-`matching(X, y)(model) = matching(model, X, y)`. 
+`matching(X, y)` and `matching(X)`, with obvious behaviour. For example,
+`matching(X, y)(model) = matching(model, X, y)`.
 
 So, to search for all models compatible with input `X` and target `y`,
 for example, one executes
 
-```julia 
+```julia
 models(matching(X, y))
 ```
 
 while the preceding search can also be written
 
-```julia 
+```julia
 models() do model
     matching(model, X, y) &&
     model.prediction_type == :probabilistic
 end
 ```
 
-### API
+## API
 
 ```@docs
 models
diff --git a/docs/src/performance_measures.md b/docs/src/performance_measures.md
index 6c4d3b30b..5c1da316b 100644
--- a/docs/src/performance_measures.md
+++ b/docs/src/performance_measures.md
@@ -4,7 +4,7 @@ In MLJ loss functions, scoring rules, sensitivities, and so on, are collectively
 to as *measures*. Presently, MLJ includes a few built-in measures,
 provides support for the loss functions in the
 [LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl) library,
-and allows for users to define their own custom measures. 
+and allows for users to define their own custom measures.
 
 Providing further measures for probabilistic predictors, such as
 proper scoring rules, and for constructing multi-target product
@@ -14,7 +14,7 @@ measures, is a work in progress.
  described here are defined in MLJBase.
 
 
-### Built-in measures
+## Built-in measures
 
 These measures all have the common calling syntax
 
@@ -41,13 +41,13 @@ w = [1, 2, 2, 1];
 rms(ŷ, y) # reports an aggregrate loss
 l1(ŷ, y, w) # reports per observation losses
 y = categorical(["male", "female", "female"])
-male = y[1]; female = y[2]; 
+male = y[1]; female = y[2];
 d = UnivariateFinite([male, female], [0.55, 0.45]);
 ŷ = [d, d, d];
 cross_entropy(ŷ, y)
 ```
 
-### Traits and custom measures
+## Traits and custom measures
 
 Notice that `l1` reports per-sample evaluations, while `rms`
 only reports an aggregated result. This and other behavior can be
@@ -71,20 +71,20 @@ method, and elsewhere in MLJ, provided it is a function or callable
 object conforming to the above syntactic conventions. By default, a
 custom measure is understood to:
 
-- be a loss function (rather than a score) 
+- be a loss function (rather than a score)
 
 - report an aggregated value (rather than per-sample evaluations)
 
 - be feature-independent
 
-To override this behavior one simply overloads the appropriate trait,
+To override this behaviour one simply overloads the appropriate trait,
 as shown in the following examples:
 
 ```@repl losses_and_scores
-y = [1, 2, 3, 4]; 
-ŷ = [2, 3, 3, 3]; 
-w = [1, 2, 2, 1]; 
-my_loss(ŷ, y) = maximum((ŷ - y).^2); 
+y = [1, 2, 3, 4];
+ŷ = [2, 3, 3, 3];
+w = [1, 2, 2, 1];
+my_loss(ŷ, y) = maximum((ŷ - y).^2);
 my_loss(ŷ, y)
 my_per_sample_loss(ŷ, y) = abs.(ŷ - y);
 MLJ.reports_each_observation(::typeof(my_per_sample_loss)) = true;
@@ -106,19 +106,19 @@ measure implementing one non-weighted version, and possibly a second
 weighted version.
 
 *Implementation detail:* Internally, every measure is evaluated using
-the syntax 
+the syntax
 
 ```julia
 MLJ.value(measure, ŷ, X, y, w)
 ```
 and the traits determine what can be ignored and how `measure` is actually called. If `w=nothing` then the non-weighted form of `measure` is
-dipatched. 
+dispatched.
 
-### Using LossFunctions.jl
+## Using LossFunctions.jl
 
 The [LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl)
 package includes "distance loss" functions for `Continuous` targets,
-and "marginal loss" functins for `Binary` targets. While the
+and "marginal loss" functions for `Binary` targets. While the
 LossFunctions,jl interface differs from the present one (for, example
 `Binary` observations must be +1 or -1), one can safely pass the loss
 functions defined there to any MLJ algorithm, which re-interprets it
@@ -136,7 +136,7 @@ evaluate!(mach,
           resampling=holdout,
           operation=predict,
           weights=w,
-          verbosity=0) 
+          verbosity=0)
 ```
 
 *Note:* Although `ZeroOneLoss(ŷ, y)` makes no sense (neither `ŷ` nor
@@ -144,13 +144,13 @@ evaluate!(mach,
 adaptor `MLJ.value` as discussed above:
 
 ```@repl losses_and_scores
-ŷ = predict(mach, X); 
+ŷ = predict(mach, X);
 loss = MLJ.value(ZeroOneLoss(), ŷ, X, y, w) # X is ignored here
 mean(loss) ≈ misclassification_rate(mode.(ŷ), y, w)
 ```
 
 
-### List of built-in measures (excluding LossFunctions.jl losses)
+## List of built-in measures (excluding LossFunctions.jl losses)
 
 ```@docs
 l1
@@ -208,22 +208,6 @@ matthews_correlation
 auc
 ```
 
-```@docs
-tp
-```
-
-```@docs
-tn
-```
-
-```@docs
-fp
-```
-
-```@docs
-fn
-```
-
 ```@docs
 tpr
 ```
@@ -243,7 +227,7 @@ fnr
 FScore
 ```
 
-### Other performance related tools 
+## Other performance related tools
 
 ```@docs
 ConfusionMatrix
@@ -256,5 +240,3 @@ confusion_matrix
 ```@docs
 roc_curve
 ```
-
-
diff --git a/docs/src/simple_user_defined_models.md b/docs/src/simple_user_defined_models.md
index 2745b6001..5cda92665 100755
--- a/docs/src/simple_user_defined_models.md
+++ b/docs/src/simple_user_defined_models.md
@@ -35,8 +35,7 @@ Use](adding_models_for_general_use.md).
 For an unsupervised model, implement `transform` and, optionally,
 `inverse_transform` using the same signature at `predict` below.
 
-
-### A simple deterministic regressor
+## A simple deterministic regressor
 
 Here's a quick-and-dirty implementation of a ridge regressor with no intercept:
 
@@ -61,7 +60,6 @@ MLJBase.predict(::MyRegressor, fitresult, Xnew) = MLJBase.matrix(Xnew) * fitresu
 ````
 
 ``` @setup regressor_example
-import Base.eval
 import MLJBase
 using LinearAlgebra
 MLJBase.color_off()
@@ -70,7 +68,7 @@ mutable struct MyRegressor <: MLJBase.Deterministic
 end
 MyRegressor(; lambda=0.1) = MyRegressor(lambda)
 function MLJBase.fit(model::MyRegressor, X, y)
-    x = MLJBase.matrix(X) 
+    x = MLJBase.matrix(X)
     fitresult = (x'x + model.lambda*I)\(x'y)
     return fitresult
 end
@@ -88,7 +86,7 @@ evaluate!(regressor, resampling=CV(), measure=rms, verbosity=0)
 
 ```
 
-### A simple probabilistic classifier
+## A simple probabilistic classifier
 
 The following probabilistic model simply fits a probability
 distribution to the `MultiClass` training target (i.e., ignores `X`)
diff --git a/docs/src/tuning_models.md b/docs/src/tuning_models.md
index 629d4f53e..c47a81c49 100644
--- a/docs/src/tuning_models.md
+++ b/docs/src/tuning_models.md
@@ -9,12 +9,7 @@ optimal model, one just calls `predict(mach, Xnew)`. In this way the
 wrapped model may be viewed as a "self-tuning" version of the
 unwrapped model.
 
-
-### Tuning a single hyperparameter using a grid search
-
-```@setup goof
-import Base.eval
-```
+## Tuning a single hyperparameter using a grid search
 
 ```@repl goof
 using MLJ
@@ -77,7 +72,7 @@ predict(self_tuning_tree, Xnew)
 ```
 
 
-### Tuning multiple nested hyperparameters
+## Tuning multiple nested hyperparameters
 
 The following model has another model, namely a `DecisionTreeRegressor`, as a
 hyperparameter:
@@ -123,12 +118,12 @@ using Plots
 plot(self_tuning_forest)
 ```
 
-![](tuning_plot.png)
+![](img/tuning_plot.png)
 
 For more options in a grid search, see the `Grid` docstring below.
 
 
-### API
+## API
 
 ```@docs
 MLJBase.range
diff --git a/src/MLJ.jl b/src/MLJ.jl
index e7e67f2b2..134273544 100644
--- a/src/MLJ.jl
+++ b/src/MLJ.jl
@@ -21,12 +21,12 @@ export matching
 export pdf, mode, median, mean, shuffle!, categorical, shuffle,
     levels, levels!, std, support
 
-# re-export from ScientificTypes:
-export GrayImage, ColorImage, Image,
-    Found, Continuous, Finite, Infinite,
-    OrderedFactor, Unknown,
-    Count, Multiclass, Binary, Scientific,
-    scitype, scitype_union, coerce, schema, autotype, elscitype
+# re-exports from (MLJ)ScientificTypes via MLJBase
+export Scientific, Found, Unknown, Known, Finite, Infinite,
+       OrderedFactor, Multiclass, Count, Continuous, Textual,
+       Binary, ColorImage, GrayImage, Image, Table
+export scitype, scitype_union, elscitype, nonmissing, trait
+export coerce, coerce!, autotype, schema, info
 
 # re-export from MLJBase:
 export nrows, nfeatures, color_off, color_on,
@@ -73,7 +73,7 @@ export measures,
     truepositive_rate, truenegative_rate, falsepositive_rate,
     falsenegative_rate, negativepredicitive_value,
     positivepredictive_value,
-    tp, tn, fp, fn, tpr, tnr, fpr, fnr,
+    tpr, tnr, fpr, fnr,
     falsediscovery_rate, fdr, npv, ppv,
     recall, sensitivity, hit_rate, miss_rate,
     specificity, selectivity, f1score, f1, fallout
@@ -90,7 +90,6 @@ export models, localmodels, @load, load, info,
     OneHotEncoder, UnivariateDiscretizer,
     FillImputer
 
-
 ## METHOD IMPORT
 
 # from the Standard Library:
@@ -99,14 +98,11 @@ import Pkg
 import Pkg.TOML
 
 # from the MLJ universe:
-using ScientificTypes
 using MLJBase
-import MLJBase
 using MLJTuning
 using MLJModels
 
-using Tables
-using  CategoricalArrays
+using Tables, CategoricalArrays
 import Distributions
 import Distributions: pdf, mode
 import Statistics, StatsBase, LinearAlgebra, Random
@@ -114,14 +110,12 @@ import Random: AbstractRNG, MersenneTwister
 using ProgressMeter
 using ComputationalResources
 using ComputationalResources: CPUProcesses
-using DocStringExtensions: SIGNATURES, TYPEDEF
 
 # to be extended:
-import MLJBase: fit, update, clean!, fit!,
-    predict, fitted_params,
-    show_as_constructed, ==
+import MLJBase: fit, update, clean!, fit!, predict, fitted_params,
+                show_as_constructed, ==
 import MLJModels: models
-
+import MLJScientificTypes
 
 ## CONSTANTS
 
diff --git a/src/ensembles.jl b/src/ensembles.jl
index 1fbbceb2d..09ebf4a80 100644
--- a/src/ensembles.jl
+++ b/src/ensembles.jl
@@ -7,7 +7,6 @@ Base.show(stream::IO, t::Random.MersenneTwister) =
 
 # Atom is atomic model type, eg, DecisionTree
 # R will be the tightest type of the atom fit-results.
-using StatsBase
 mutable struct WrappedEnsemble{R,Atom <: Supervised} <: MLJType
     atom::Atom
     ensemble::Vector{R}
@@ -27,33 +26,32 @@ function WrappedEnsemble(atom, ensemble::AbstractVector{L}) where L
 end
 
 # to enable trait-based dispatch of predict:
-predict(wens::WrappedEnsemble{R,Atom},
-        atomic_weights, Xnew) where {R,Atom<:Deterministic} =
+function predict(wens::WrappedEnsemble{R,Atom}, atomic_weights, Xnew
+                 ) where {R,Atom<:Deterministic}
     predict(wens, atomic_weights, Xnew, Deterministic, target_scitype(Atom))
-predict(wens::WrappedEnsemble{R,Atom},
-        atomic_weights, Xnew) where {R,Atom<:Probabilistic} =
+end
+
+function predict(wens::WrappedEnsemble{R,Atom}, atomic_weights, Xnew
+                 ) where {R,Atom<:Probabilistic}
     predict(wens, atomic_weights, Xnew, Probabilistic, target_scitype(Atom))
+end
 
-function predict(wens::WrappedEnsemble,
-                 atomic_weights,
-                 Xnew,
+function predict(wens::WrappedEnsemble, atomic_weights, Xnew,
                  ::Type{Deterministic}, ::Type{<:AbstractVector{<:Finite}})
-
     # atomic_weights ignored in this case
-
     ensemble = wens.ensemble
-    atom = wens.atom
-
+    atom     = wens.atom
     n_atoms = length(ensemble)
 
     n_atoms > 0  || @error "Empty ensemble cannot make predictions."
 
     # TODO: make this more memory efficient but note that the type of
     # Xnew is unknown (ie, model dependent)
-    predictions =
-        reduce(hcat, [predict(atom, fitresult, Xnew) for fitresult in ensemble])
-    classes = levels(predictions)
-    n = size(predictions, 1)
+    preds_gen   = (predict(atom, fitresult, Xnew) for fitresult in ensemble)
+    predictions = hcat(preds_gen...)
+
+    classes    = levels(predictions)
+    n          = size(predictions, 1)
     prediction =
         categorical(vcat([mode(predictions[i,:]) for i in 1:n], classes))[1:n]
     return prediction
@@ -61,61 +59,56 @@ end
 
 function predict(wens::WrappedEnsemble, atomic_weights, Xnew,
                  ::Type{Deterministic}, ::Type{<:AbstractVector{<:Continuous}})
+    # considering atomic weights
     ensemble = wens.ensemble
-
-    atom = wens.atom
-
-    n_atoms = length(ensemble)
+    atom     = wens.atom
+    n_atoms  = length(ensemble)
 
     n_atoms > 0  || @error "Empty ensemble cannot make predictions."
 
     # TODO: make more memory efficient:
-    predictions = reduce(hcat, [atomic_weights[k]*predict(atom, ensemble[k], Xnew) for k in 1:n_atoms])
-    prediction =  [sum(predictions[i,:]) for i in 1:size(predictions, 1)]
+    preds_gen   = (atomic_weights[k] * predict(atom, ensemble[k], Xnew)
+                    for k in 1:n_atoms)
+    predictions = hcat(preds_gen...)
+    prediction  = [sum(predictions[i,:]) for i in 1:size(predictions, 1)]
 
     return prediction
 end
 
 function predict(wens::WrappedEnsemble, atomic_weights, Xnew,
                  ::Type{Probabilistic}, ::Type{<:AbstractVector{<:Finite}})
-
     ensemble = wens.ensemble
-
-    atom = wens.atom
-
-    n_atoms = length(ensemble)
+    atom     = wens.atom
+    n_atoms  = length(ensemble)
 
     n_atoms > 0  || @error "Empty ensemble cannot make predictions."
 
     # TODO: make this more memory efficient but note that the type of
     # Xnew is unknown (ie, model dependent):
-
     # a matrix of probability distributions:
-    predictions = reduce(hcat, [predict(atom, fitresult, Xnew) for fitresult in ensemble])
-    n_rows = size(predictions, 1)
+    preds_gen   = (predict(atom, fitresult, Xnew) for fitresult in ensemble)
+    predictions = hcat(preds_gen...)
+    n_rows      = size(predictions, 1)
 
     # the weighted averages over the ensemble of the discrete pdf's:
-    predictions  = [MLJBase.average([predictions[i,k] for k in 1:n_atoms], weights=atomic_weights) for i in 1:n_rows]
+    predictions = [average([predictions[i, k] for k in 1:n_atoms], weights=atomic_weights) for i in 1:n_rows]
 
     return predictions
 end
 
 function predict(wens::WrappedEnsemble, atomic_weights, Xnew,
                  ::Type{Probabilistic}, ::Type{<:AbstractVector{<:Continuous}})
-
     ensemble = wens.ensemble
-
-    atom = wens.atom
-
-    n_atoms = length(ensemble)
+    atom     = wens.atom
+    n_atoms  = length(ensemble)
 
     n_atoms > 0  || @error "Empty ensemble cannot make predictions."
 
     # TODO: make this more memory efficient but note that the type of
     # Xnew is unknown (ie, model dependent):
-
     # a matrix of probability distributions:
-    predictions = reduce(hcat, [predict(atom, fitresult, Xnew) for fitresult in ensemble])
+    preds_gen   = (predict(atom, fitresult, Xnew) for fitresult in ensemble)
+    predictions = hcat(preds_gen...)
 
     # n_rows = size(predictions, 1)
     # # the weighted average over the ensemble of the pdf means and pdf variances:
diff --git a/src/scitypes.jl b/src/scitypes.jl
index 3b6f41bb4..5f81b3770 100644
--- a/src/scitypes.jl
+++ b/src/scitypes.jl
@@ -1,18 +1,20 @@
 ## SUPERVISED
 
+const MST = MLJScientificTypes # only used in this file
+
 struct SupervisedScitype{input_scitype, target_scitype, prediction_type} end
 
-ScientificTypes.scitype(model::Deterministic, ::ScientificTypes.MLJ) =
+MST.scitype(model::Deterministic, ::MST.MLJ) =
     SupervisedScitype{input_scitype(model),
                     target_scitype(model),
                     :deterministic}
 
-ScientificTypes.scitype(model::Probabilistic, ::ScientificTypes.MLJ) =
+MST.scitype(model::Probabilistic, ::MST.MLJ) =
     SupervisedScitype{input_scitype(model),
                     target_scitype(model),
                     :probabilistic}
 
-ScientificTypes.scitype(model::Interval, ::ScientificTypes.MLJ) =
+MST.scitype(model::Interval, ::MST.MLJ) =
     SupervisedScitype{input_scitype(model),
                     target_scitype(model),
                     :interval}
@@ -48,7 +50,7 @@ end
 
 struct UnsupervisedScitype{input_scitype, output_scitype} end
 
-ScientificTypes.scitype(model::Unsupervised, ::ScientificTypes.MLJ) =
+MST.scitype(model::Unsupervised, ::MST.MLJ) =
     UnsupervisedScitype{input_scitype(model),
                       MLJBase.output_scitype(model)}
 
@@ -87,7 +89,7 @@ struct MeasureScitype{target_scitype,
                is_feature_dependent,
                supports_weights} end
 
-ScientificTypes.scitype(measure, ::ScientificTypes.MLJ, ::Val{:measure}) =
+MST.scitype(measure, ::MST.MLJ, ::Val{:measure}) =
     MeasureScitype{target_scitype(measure),
                prediction_type(measure),
                orientation(measure),
diff --git a/src/tuning.jl b/src/tuning.jl
index 187183758..4b3c3de4f 100644
--- a/src/tuning.jl
+++ b/src/tuning.jl
@@ -1,5 +1,6 @@
-abstract type TuningStrategy <: MLJ.MLJType end
-const ParameterName=Union{Symbol,Expr}
+abstract type TuningStrategy <: MLJType end
+
+const ParameterName = Union{Symbol,Expr}
 
 """
     Grid(resolution=10, acceleration=DEFAULT_RESOURCE[])
diff --git a/test/ensembles.jl b/test/ensembles.jl
index e08860456..72f2adb12 100644
--- a/test/ensembles.jl
+++ b/test/ensembles.jl
@@ -1,13 +1,12 @@
 module TestEnsembles
 
-# using Revise
 using Test
 using Random
 using MLJ
 using MLJBase
 import MLJModels
 using CategoricalArrays
-using Distributions
+import Distributions
 
 @load KNNRegressor
 
@@ -159,8 +158,8 @@ train, test = partition(1:length(y), 0.8);
 ensemble_model = MLJ.ProbabilisticEnsembleModel(atom=atom)
 ensemble_model.n = 10
 fitresult, cache, report = MLJ.fit(ensemble_model, 1, X, y)
-d1 = fit(Distributions.Normal, [1,1,2,2])
-d2 = fit(Distributions.Normal, [1,1,1,2])
+d1 = Distributions.fit(Distributions.Normal, [1,1,2,2])
+d2 = Distributions.fit(Distributions.Normal, [1,1,1,2])
 # @test reduce(* , [d.μ ≈ d1.μ || d.μ ≈ d2.μ for d in fitresult.ensemble])
 # @test reduce(* , [d.σ ≈ d1.σ || d.σ ≈ d2.σ for d in fitresult.ensemble])
 d=predict(ensemble_model, fitresult, MLJ.selectrows(X, test))[1]
@@ -170,7 +169,7 @@ end
 ensemble_model.bagging_fraction = 1.0
 fitresult, cache, report = MLJ.fit(ensemble_model, 1, X, y)
 d = predict(ensemble_model, fitresult, MLJ.selectrows(X, test))[1]
-d3 = fit(Distributions.Normal, y)
+d3 = Distributions.fit(Distributions.Normal, y)
 @test pdf(d, 1.52) ≈ pdf(d3, 1.52)
 atomic_weights = rand(10)
 atomic_weights = atomic_weights/sum(atomic_weights)
diff --git a/test/runtests.jl b/test/runtests.jl
index 4025d9aa4..f4122caae 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,10 +2,10 @@ using Distributed
 addprocs(2)
 
 @everywhere begin
-using MLJ
-using MLJBase
-using Test
-using Random
+    using MLJ
+    using MLJBase
+    using Test
+    using Random
 end
 
 @testset "ensembles" begin
@@ -19,5 +19,3 @@ end
 @testset "scitypes" begin
     @test include("scitypes.jl")
 end
-
-