From f0b074a49e5536ea17d1ef4cf2e972e5292903dc Mon Sep 17 00:00:00 2001 From: OkonSamuel Date: Sun, 2 Jan 2022 21:05:52 +0100 Subject: [PATCH 1/3] implement and export scitype method --- src/MLJModelInterface.jl | 2 +- src/data_utils.jl | 44 +++++++++++++++++++++++++++++++++---- test/data_utils.jl | 47 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 84 insertions(+), 9 deletions(-) diff --git a/src/MLJModelInterface.jl b/src/MLJModelInterface.jl index 9eb07f5..cc523e4 100644 --- a/src/MLJModelInterface.jl +++ b/src/MLJModelInterface.jl @@ -93,7 +93,7 @@ end # data operations export matrix, int, classes, decoder, table, - nrows, selectrows, selectcols, select, info + nrows, selectrows, selectcols, select, info, scitype # equality export is_same_except, isrepresented diff --git a/src/data_utils.jl b/src/data_utils.jl index e52544c..e02aed0 100644 --- a/src/data_utils.jl +++ b/src/data_utils.jl @@ -151,6 +151,41 @@ classes(x) = classes(get_interface_mode(), x) classes(::LightInterface, x) = errlight("classes") +# ------------------------------------------------------------------------ +# scitype + +""" + scitype(X) + +The scientific type (interpretation) of `X`, distinct from its +machine type. + +### Examples +```julia +julia> scitype(3.14) +Continuous + +julia> scitype([1, 2, missing]) +AbstractVector{Union{Missing, Count}} + +julia> scitype((5, "beige")) +Tuple{Count, Textual} + +julia> using CategoricalArrays + +julia> X = (gender = categorical(['M', 'M', 'F', 'M', 'F']), + ndevices = [1, 3, 2, 3, 2]); + +julia> scitype(X) +Table{Union{AbstractVector{Count}, AbstractVector{Multiclass{2}}}} +``` +""" +scitype(X) = scitype(get_interface_mode(), vtrait(X, "scitype"), X) + +function scitype(::LightInterface, m, X) + return errlight("scitype") +end + # ------------------------------------------------------------------------ # schema @@ -187,14 +222,15 @@ istable(::Mode, ::Val{:table}) = true # decoder """ - d = decoder(x) + decoder(x) -A callable object for decoding the integer representation of a +Return a callable object for decoding the integer representation of a `CategoricalString` or `CategoricalValue` sharing the same pool as `x`. (Here `x` is of one of these two types.) Specifically, one has -`d(int(y)) == y` for all `y in classes(x)`. One can also call `d` on -integer arrays, in which case `d` is broadcast over all elements. +`decoder(x)(int(y)) == y` for all `y in classes(x)`. One can also call `decoder(x)` on +integer arrays, in which case `decoder(x)` is broadcast over all elements. +### Examples ```julia julia> v = categorical(["c", "b", "c", "a"]) 4-element CategoricalArrays.CategoricalArray{String,1,UInt32}: diff --git a/test/data_utils.jl b/test/data_utils.jl index 579bab0..6027782 100644 --- a/test/data_utils.jl +++ b/test/data_utils.jl @@ -3,12 +3,14 @@ x = 1:5 @test_throws M.InterfaceError M.categorical(x) end + @testset "cat-full" begin setfull() M.categorical(::FI, a...; kw...) = categorical(a...; kw...) x = 1:5 @test M.categorical(x) == categorical(x) end + # ------------------------------------------------------------------------ @testset "matrix-light" begin setlight() @@ -25,18 +27,21 @@ end X = (a=[1, 2, 3], b=[1, 2, 3]) @test_throws M.InterfaceError matrix(X) end + @testset "matrix-full" begin setfull() M.matrix(::FI, ::Val{:table}, X; kw...) = Tables.matrix(X; kw...) X = (a=[1, 2, 3], b=[1, 2, 3]) @test matrix(X) == hcat([1, 2, 3], [1, 2, 3]) end + # ------------------------------------------------------------------------ @testset "int-light" begin setlight() x = categorical([1, 2, 3]) @test_throws M.InterfaceError int(x) end + @testset "int-full" begin setfull() M.int(::FI, x::CategoricalValue) = CategoricalArrays.refcode(x) @@ -61,6 +66,31 @@ end x = categorical(['a','b','a']) @test classes(x[1]) == ['a', 'b'] end + +# ------------------------------------------------------------------------ +@testset "scitype-light" begin + # throw error for any input anyway + setlight() + + ary = rand(10, 3) + @test_throws M.InterfaceError M.scitype(ary) + + df = DataFrame(rand(10, 3), :auto) + @test_throws M.InterfaceError M.scitype(df) +end + +@testset "scitype-full" begin + setfull() + M.scitype(::FI, v, X) = ScientificTypes.scitype(X) + + ary = rand(10, 3) + @test M.scitype(ary) == AbstractArray{Continuous, 2} + + df = DataFrame(A = rand(10), B = categorical(rand('a':'c', 10))) + sch = M.scitype(df) + @test sch <: Table(Continuous, Multiclass) +end + # ------------------------------------------------------------------------ @testset "schema-light" begin # throw error for any input anyway @@ -70,14 +100,14 @@ end df = DataFrame(rand(10, 3), :auto) @test_throws M.InterfaceError M.schema(df) end + @testset "schema-full" begin setfull() + M.schema(::FI, v, X) = ScientificTypes.schema(X) + ary = rand(10, 3) - M.schema(::FI, ::Val{:table}, X; kw...) = - ScientificTypes.schema(X; kw...) - M.schema(::FI, ::Val{:other}, X; kw...) = nothing + @test_throws ArgumentError M.schema(ary) - @test M.schema(ary) === nothing df = DataFrame(A = rand(10), B = categorical(rand('a':'c', 10))) sch = M.schema(df) @test sch.names == (:A, :B) @@ -86,6 +116,7 @@ end @test sch.scitypes[1] <: Continuous @test sch.scitypes[2] <: Multiclass end + # ------------------------------------------------------------------------ @testset "istable" begin # Nothing stops someone from implementing a Tables.jl @@ -106,24 +137,28 @@ end X = DataFrame(A=rand(10)) @test M.istable(X) end + # ------------------------------------------------------------------------ @testset "decoder-light" begin setlight() x = 5 @test_throws M.InterfaceError decoder(x) end + @testset "decoder-full" begin setfull() # toy test because I don't want to copy the decoder logic here M.decoder(::FI, x) = 0 @test decoder(nothing) == 0 end + # ------------------------------------------------------------------------ @testset "table-light" begin setlight() X = ones(3, 2) @test_throws M.InterfaceError table(X) end + @testset "table-full" begin setfull() function M.table(::FI, A::AbstractMatrix; names=nothing) @@ -135,6 +170,7 @@ end @test Tables.istable(T) @test Tables.matrix(T) == X end + # ------------------------------------------------------------------------ @testset "nrows-light" begin setlight() @@ -142,6 +178,7 @@ end @test_throws M.InterfaceError nrows(X) @test nrows(nothing) == 0 end + @testset "nrows-full" begin setfull() X = ones(5) @@ -157,6 +194,7 @@ end X = (a=[4, 2, 1], b=[3, 2, 1]) @test nrows(X) == 3 end + # ------------------------------------------------------------------------ @testset "select-light" begin setlight() @@ -179,6 +217,7 @@ end @test_throws M.InterfaceError selectcols(X, 1) @test_throws M.InterfaceError select(X, 1, 1) end + @testset "select-full" begin setfull() From 5ec9a0d75de6fa70973645ae717aefe67e17a310 Mon Sep 17 00:00:00 2001 From: "Anthony Blaom, PhD" Date: Tue, 4 Jan 2022 09:59:17 +1300 Subject: [PATCH 2/3] Bump 1.3.5 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 2f8d63d..998cdd0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModelInterface" uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" authors = ["Thibaut Lienart and Anthony Blaom"] -version = "1.3.4" +version = "1.3.5" [deps] Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" From 2a77b27682355affbdb536716326550ea60f7ed8 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 4 Jan 2022 10:09:31 +1300 Subject: [PATCH 3/3] add julia 1.6 testing --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 91fb6f7..24170b9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,6 +18,7 @@ jobs: matrix: version: - '1.0' + - '1.6' - '1' os: - ubuntu-latest