Skip to content

Commit

Permalink
Add median_heuristic_transform (#245)
Browse files Browse the repository at this point in the history
* Add `median_heuristic_transform`

* Move `median_heuristic_transform` to convenience fcns in the docs

* Only compute pairwise distances between different elements

* Update src/transform/scaletransform.jl

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Fix tests

* Update test/transform/scaletransform.jl

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update Project.toml

Co-authored-by: Théo Galy-Fajou <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Apr 12, 2022
1 parent 99b53c6 commit 7143f87
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 1 deletion.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "KernelFunctions"
uuid = "ec8451be-7e33-11e9-00cf-bbf324bd1392"
version = "0.10.34"
version = "0.10.35"

[deps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Expand All @@ -15,6 +15,7 @@ LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TensorCore = "62fd8b95-f654-4bbd-a8a5-9c27f68ccd50"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
3 changes: 3 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
KernelFunctions = "ec8451be-7e33-11e9-00cf-bbf324bd1392"
Kronecker = "2c470bb0-bcc8-11e8-3dad-c9649493f05e"
PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
Distances = "0.10"
Documenter = "0.27"
KernelFunctions = "0.10"
Kronecker = "0.4, 0.5"
Expand Down
1 change: 1 addition & 0 deletions docs/src/transform.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ PeriodicTransform

```@docs
with_lengthscale
median_heuristic_transform
```
4 changes: 4 additions & 0 deletions src/KernelFunctions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ export Transform,
PeriodicTransform
export with_lengthscale

export median_heuristic_transform

export NystromFact, nystrom

export gaborkernel
Expand Down Expand Up @@ -63,6 +65,8 @@ using ZygoteRules: ZygoteRules, AContext, literal_getproperty, literal_getfield
# Hack to work around Zygote type inference problems.
const Distances_pairwise = Distances.pairwise

using Statistics: median!

abstract type Kernel end
abstract type SimpleKernel <: Kernel end

Expand Down
36 changes: 36 additions & 0 deletions src/transform/scaletransform.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,39 @@ _map(t::ScaleTransform, x::RowVecs) = RowVecs(only(t.s) .* x.X)
Base.isequal(t::ScaleTransform, t2::ScaleTransform) = isequal(only(t.s), only(t2.s))

Base.show(io::IO, t::ScaleTransform) = print(io, "Scale Transform (s = ", only(t.s), ")")

# Helpers

"""
median_heuristic_transform(distance, x::AbstractVector)
Create a [`ScaleTransform`](@ref) that divides the input elementwise by the median
`distance` of the data points in `x`.
The `distance` has to support pairwise evaluation with `KernelFunctions.pairwise`. All
`PreMetric`s of the package [Distances.jl](https://github.com/JuliaStats/Distances.jl) such
as `Euclidean` satisfy this requirement automatically.
# Examples
```jldoctest
julia> using Distances, Statistics
julia> x = ColVecs(rand(100, 10));
julia> t = median_heuristic_transform(Euclidean(), x);
julia> y = map(t, x);
julia> median(euclidean(y[i], y[j]) for i in 1:10, j in 1:10 if i != j) ≈ 1
true
```
"""
function median_heuristic_transform(f, x::AbstractVector)
# Compute pairwise distances between **different** elements
n = length(x)
distances = vec(pairwise(f, x))
deleteat!(distances, 1:(n + 1):(n^2))

return ScaleTransform(inv(median!(distances)))
end
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ using LogExpFunctions
using PDMats
using Random
using SpecialFunctions
using Statistics
using Test
using Zygote: Zygote
using ForwardDiff: ForwardDiff
Expand Down
13 changes: 13 additions & 0 deletions test/transform/scaletransform.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,17 @@
@test isequal(ScaleTransform(s), ScaleTransform(s))
@test repr(t) == "Scale Transform (s = $(s2))"
test_ADs(x -> SEKernel() ScaleTransform(exp(x[1])), randn(rng, 1))

@testset "median heuristic" begin
for x in (x, XV, XC, XR), dist in (Euclidean(), Cityblock())
n = length(x)
t = median_heuristic_transform(dist, x)
@test t isa ScaleTransform
@test first(t.s)
inv(median(dist(x[i], x[j]) for i in 1:n, j in 1:n if i != j))

y = map(t, x)
@test median(dist(y[i], y[j]) for i in 1:n, j in 1:n if i != j) 1
end
end
end

2 comments on commit 7143f87

@devmotion
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/58418

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.10.35 -m "<description of version>" 7143f87f9164eee6ef7d93c1971d47730076051f
git push origin v0.10.35

Please sign in to comment.