Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
Creation and upload
  • Loading branch information
cbsteh committed May 30, 2023
1 parent c1c8920 commit 6ff0193
Show file tree
Hide file tree
Showing 9 changed files with 798 additions and 2 deletions.
11 changes: 11 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name = "SolarCorrMap"
uuid = "3d6a1b05-2667-403f-b7c0-4fe587865619"
authors = ["Christopher Teh Boon Sung <[email protected]>"]
version = "0.1.0"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
44 changes: 42 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,42 @@
# SolarCorrMap
Visualize correlations as a solar map

# Solar Correlation Map in Julia

## Overview
Visualize correlations between a given dependent variable and explanatory variables, as well as the intercorrelations between the explanatory variables, as a solar map.

The relationships between the dependent variable (the "Sun") and the explanatory variables (the "planets") are depicted as a solar system, where planets orbit around the Sun. The closer a planet is to the Sun, the stronger is their relationship, as indicated by a higher Pearson correlation coefficient.

Furthermore, some of these planets have their own moons. These moons represent explanatory variables that are closely related to the planet, with a correlation coefficient score over 0.8.

You can also regard the planets as the primary predictors (or main parameters) of the dependent variable and the moons as the colinear paramaters to the main parameters.

This work is based on the 2017 work by Stefan Zapf and Christopher Kraushaar (see References).

## Usage
Copy the three Julia source files in the `src` folder: `correlations.jl`, `drawmap.jl`, and `SolarCorrMap.jl`, and paste them in your project folder or subfolder.

Note: The `main.jl` is an example file (see below).

## Example
Call the `viz` function to read the `CSV` data file and plot the correlations as a solar map.

```
using SolarCorrMap
viz("data/housing.csv", :medv)
```

where `housing.csv` is a sample `CSV` file (Boston Housing data), and `:medv` is the dependent variable in the provided `CSV` file.

The plot result is:

![Solar Correlation Map plot](data/solar-map.png)

where negative correlations are denoted in red, else black for positive correlations. The legend on the left indicates the level of significance between the explanatory variables and the dependent variable, where `*` p<0.05, `**` p<0.01, and `ns` p>0.05.

## References
[O'Reily article. This article also explains the above example plot.](https://www.oreilly.com/content/a-new-visualization-to-beautifully-explore-correlations/)

[Python code by the original developers](https://github.com/Zapf-Consulting/solar-correlation-map)

[R code by yaricom](https://github.com/yaricom/solar-correlation-map-R)
507 changes: 507 additions & 0 deletions data/housing.csv

Large diffs are not rendered by default.

Binary file added data/solar-map.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/.DS_Store
Binary file not shown.
15 changes: 15 additions & 0 deletions src/SolarCorrMap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module SolarCorrMap

using CairoMakie
using DataFrames
using StatsBase
using CSV
using HypothesisTests

include("correlations.jl")
include("drawmap.jl")

export CelestialBody, PlanetarySystem
export correl_df, collect_planets_moons, plot_solar_corr_map, viz

end # module
65 changes: 65 additions & 0 deletions src/correlations.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

@kwdef mutable struct CelestialBody
name::String = ""
r::Float64 = 0.0
end


@kwdef mutable struct PlanetarySystem
planet::CelestialBody = CelestialBody()
moons::Vector{CelestialBody} = []
end


function correl_df(df::AbstractDataFrame)
pars = names(df)
cmat = cor(Matrix(df))
insertcols(DataFrame(cmat, pars), 1, :pars=>pars)
end


round_down(val) = (val >= 0) ? floor(val; digits=1) : ceil(val; digits=1)


function add_sig_label(x, y, label)
pv = pvalue(CorrelationTest(x, y))
txt = (pv <= 0.01) ? "**" :
(pv <= 0.05) ? "*" : "ns"
"$(label) $(txt)"
end


function collect_planets_moons(data::AbstractDataFrame, dep::Symbol; moon_threshold=0.8)
cordf = correl_df(data)
df = filter!(:pars => p -> p != string(dep), copy(cordf))
sort!(df, dep, by=abs, rev=true)

x = data[!, dep]
planets = PlanetarySystem[]

while size(df, 1) > 0
planet = Symbol(df[1, :pars])
r = round_down(df[1, dep])

dft = filter!(planet => p -> abs(p) >= moon_threshold,
sort!(select(df, [:pars, planet]), planet, by=abs, rev=true))
transform!(dft, planet => p -> round_down.(p), renamecols=false)

moons = CelestialBody[]
moonlst = dft[2:end, :pars]
filter!(:pars => p -> !(p in moonlst) && p != string(planet), df)

for m in moonlst
y = data[!, m]
moon_name = add_sig_label(x, y, m)
moon_r = cordf[cordf.pars .== m, dep][1]
push!(moons, CelestialBody(moon_name, moon_r))
end

y = data[!, planet]
planet_name = add_sig_label(x, y, string(planet))
push!(planets, PlanetarySystem(CelestialBody(planet_name, r), moons))
end

planets
end
154 changes: 154 additions & 0 deletions src/drawmap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@

function draw_orbits!(ax; fontsize=16)
rg = [0:0.01:2π...]
cosx, siny = cos.(rg), sin.(rg)
for r [0.9:-0.1:0.1...]
lines!(ax, cosx .* r, siny .* r, linestyle=:dash, color=:gray60)
rpos = (r * cos(0.5π), r * sin(0.5π))
text!(ax, position=rpos, string(round(1-r; digits=1)),
color=:gray30, align=(:center, :center), fontsize=fontsize)
end
end


function draw_center(ax, dep; fontsize=16)
scatter!(ax, 0, 0, label="DEP: $(dep)", color=:white)
text!(ax, position=(0, 0), "DEP",
color=:black, align=(:center, :center), fontsize=fontsize)
end


function draw_object!(ax, x, y; m_label, t_label, m_color, t_color,
m_strokecolor, m_strokewidth=2, m_size=32, t_fontsize=16)
scatter!(ax, x, y, label=m_label, color=m_color,
strokewidth=m_strokewidth, strokecolor=m_strokecolor,
markersize=m_size)
text!(ax, position=(x, y), t_label, color=t_color,
align=(:center, :center), fontsize=t_fontsize)
end


function find_angles(n::Int)
Δ = 2π / n
θstart = 2π * rand()
angles = [(θstart + (i-1) * Δ) % 2π for i 1:n]
sample(angles, n, replace=false)
end


function find_angles(n::Int, r, locs; threshold=0.1)
Δ = 2π / n

function n_angles(n, Δ)
θstart = 2π * rand()
[(θstart + (i-1) * Δ) % 2π for i 1:n]
end

function calculate_positions(angles)
[(x=(1-r) * cos(θ), y=(1-r) * sin(θ)) for θ angles]
end

function check_distance(pos, locs)
for l locs, p pos
d = sqrt((p.x - l.x)^2 + (p.y - l.y)^2)
if d < threshold
return true
end
end
return false
end

angles = n_angles(n, Δ)
for i 1:50
pos = calculate_positions(angles)
if !check_distance(pos, locs)
break
end
if i <= 50
angles = n_angles(n, Δ)
end
end

sample(angles, n, replace=false)
end


function plot_solar_corr_map(psv::Vector{PlanetarySystem}, dep::Symbol)
dpi = 300
fontsize = 16
chtsize = 3 # inches
sz_px = (chtsize * dpi, chtsize * dpi)
fig = Figure(resolution=sz_px, font="Arial", fontsize=fontsize)
ax = Axis(fig[1, 1])

draw_orbits!(ax; fontsize=fontsize)
draw_center(ax, string(dep); fontsize=fontsize)

df = to_df(psv)
nplanet = 0
locs = []

for g groupby(df, :abs_r)
angles = find_angles(size(g, 1), g.abs_r[1], locs; threshold=0.15)
locs = []
for (i, row) in enumerate(eachrow(g))
nplanet += 1
pos = draw_planet!(ax, row, angles[i], nplanet, fontsize)
draw_moons!(ax, row, pos, fontsize)
push!(locs, pos)
end
end

Legend(fig[1,2], ax, valign=:top, rowgap=10, framevisible=false)
hidedecorations!(ax)
hidespines!(ax)
colsize!(fig.layout, 1, Aspect(1, 1.0))
resize_to_layout!(fig)
fig
end


function to_df(psv::Vector{PlanetarySystem})
nt = map(psv) do p
planet = p.planet
(name=planet.name, r=planet.r, abs_r=abs(planet.r), moons=p.moons)
end
df = sort!(DataFrame(nt), :abs_r, rev=true)
filter(:abs_r => >(0), df)
end


function draw_planet!(ax, row, θ, nplanet, fontsize)
xp, yp = (1-row.abs_r) * cos(θ), (1-row.abs_r) * sin(θ)
clr = (row.r >= 0) ? :black : :red
draw_object!(ax, xp, yp,
m_label="$(nplanet): $(row.name)", t_label=string(nplanet),
m_color=:white, t_color=clr,
m_strokecolor=clr, m_strokewidth=2,
m_size=32, t_fontsize=fontsize)
(x=xp, y=yp)
end


function draw_moons!(ax, row, Δ, fontsize)
aZ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
m_pos = find_angles(size(row.moons, 1))
fsz = (size(row.moons, 1) > 7) ? fontsize - 1 : fontsize
for (j, moon) in enumerate(row.moons)
m_lbl = "$(aZ[(j%52)])"
xm, ym = Δ.x + 0.05 * cos(m_pos[j]), Δ.y + 0.05 * sin(m_pos[j])
clr = (moon.r >= 0) ? :black : :red
draw_object!(ax, xm, ym,
m_label="\t$(m_lbl): $(moon.name)", t_label=m_lbl,
m_color=:white, t_color=clr,
m_strokecolor=:transparent, m_strokewidth=0,
m_size=0, t_fontsize=fsz)
end
end


function viz(csv_fname::AbstractString, dep::Symbol)
df = CSV.read(csv_fname, DataFrame)
planets = collect_planets_moons(df, dep)
plot_solar_corr_map(planets, dep)
end
4 changes: 4 additions & 0 deletions src/main.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
using SolarCorrMap


viz("data/housing.csv", :medv)

0 comments on commit 6ff0193

Please sign in to comment.