From 9f525798c8b766677e39800dbed109a75e64b3cf Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Thu, 1 Aug 2024 13:55:39 -0400 Subject: [PATCH 1/7] Plot metrics script in parallel subsampling. --- examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl diff --git a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl new file mode 100644 index 0000000..5f42fb7 --- /dev/null +++ b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl @@ -0,0 +1,125 @@ +using DataFrames, CSV, Statistics, Plots + +metrics = CSV.read("metrics.csv", DataFrame) +res_path = "dyomet/" + +methods = reverse(unique(metrics.method)) +batch_sizes = unique(metrics.batch_size) +batch_size_prop = unique(metrics.batch_size_prop) +xticks_label = ("$b\n$(p*100)%" for (b, p) in zip(batch_sizes, batch_size_prop)) +colors = palette(:tab10) +metrics_cols = [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time] +metric_labels = ["E MAE | eV/atom", + "F MAE | eV/Å", + "E MAE | eV/atom", + "F MAE | eV/Å", + "Time | s"] +for (i, metric) in enumerate(metrics_cols) + plot() + for (j, method) in enumerate(methods) + metric_means = []; metric_se = [] + for batch_size in batch_sizes + ms = metrics[ metrics.method .== method .&& + metrics.batch_size .== batch_size , metric] + m = mean(ms) + se = stdm(ms, m) / sqrt(length(ms)) # standard error + push!(metric_means, m) + push!(metric_se, se) + end + plot!(batch_sizes, + metric_means, + ribbon = metric_se, + color = colors[j], + fillalpha=.1, + label=method) + plot!(batch_sizes, + metric_means, + seriestype = :scatter, + thickness_scaling = 1.35, + markersize = 3, + markerstrokewidth = 0, + markerstrokecolor = :black, + markercolor = colors[j], + label="") + max = metric == :time ? 4000 : 1 + min = metric == :time ? -100 : minimum(metric_means) * 0.99 + plot!(dpi = 300, + label = "", + xscale=:log2, + xticks = (batch_sizes, xticks_label), + ylim=(min, max), + xlabel = "Sample size", + ylabel = metric_labels[i]) + end + savefig("$res_path/$metric.png") +end + + +# xformatter = :scientific, +# markershape = :circle, +# markercolor = :gray +# yerror=metric_std, +#ribbon=metric_std, +#yerror=metric_std, +# markerstrokewidth=0, markersize=5, +#yaxis=:log, +#xaxis=:log2, yaxis=:log, + +#for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time] +# scatter() +# for method in reverse(unique(metrics[:, :method])[1:end]) +# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size] +# metric_vals = metrics[metrics.method .== method, :][:, metric] +# scatter!(batch_size_vals, metric_vals, label = method, +# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, yaxis=:log, +# xlabel = "Sample size", +# ylabel = "$metric") +# end +# savefig("$res_path/$metric-srs.png") +#end + +#scatter() +#for method in reverse(unique(metrics[:, :method])[2:end]) +# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size] +# speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./ +# metrics[metrics.method .== method, :][:, :time] +# scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time", +# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, +# xlabel = "Sample size", +# ylabel = "Speedup") +#end +#savefig("$res_path/speedup-srs.png") + + + +#using DataFrames, CSV, Plots + +#metrics = CSV.read("metrics.csv", DataFrame) +#res_path = "dyomet/" + +#for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time] +# scatter() +# for method in reverse(unique(metrics[:, :method])[1:end]) +# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size] +# metric_vals = metrics[metrics.method .== method, :][:, metric] +# scatter!(batch_size_vals, metric_vals, label = method, +# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, yaxis=:log, +# xlabel = "Sample size", +# ylabel = "$metric") +# end +# savefig("$res_path/$metric-srs.png") +#end + +#scatter() +#for method in reverse(unique(metrics[:, :method])[2:end]) +# batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size] +# speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./ +# metrics[metrics.method .== method, :][:, :time] +# scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time", +# alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, +# xlabel = "Sample size", +# ylabel = "Speedup") +#end +#savefig("$res_path/speedup-srs.png") + + From b239ee24293b1cc5f9341aabe0ba3fd20f1c8fff Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Thu, 1 Aug 2024 14:10:33 -0400 Subject: [PATCH 2/7] small improvements in parallel subsampling --- .../fit-ace-dpp-full-vs-split-dataset.jl | 51 +++++-------------- 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl index 06e3d52..7a0987f 100644 --- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl +++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl @@ -102,14 +102,14 @@ end # Load training and test configuration datasets ################################ paths = [ -# "$ds_path/Hf2_gas_form_sorted.extxyz", # ERROR: LoadError: SingularException(18) -# "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200, :) -# "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377, :( - "$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 17.2k, :-D or out of memory -# "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306, :( -# "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50, ... -# "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498, :( -# "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201, ?? +# "$ds_path/Hf2_gas_form_sorted.extxyz", +# "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200 +# "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377 + "$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 17.2k +# "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306 +# "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50 +# "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498 +# "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201 # "$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz" ] @@ -159,13 +159,13 @@ metric_names = [:exp_number, :method, :batch_size_prop, :batch_size, :time, metrics = DataFrame([Any[] for _ in 1:length(metric_names)], metric_names) # Subsampling experiments: subsample full dataset vs subsample dataset by chunks -n_experiments = 30 # 100 +n_experiments = 100 for j in 1:n_experiments global metrics # Define randomized training and test dataset - n_train = 2400 #floor(Int, 0.8 * n) - n_test = 600 #n - n_train + n_train = floor(Int, 0.8 * n) + n_test = n - n_train rnd_inds = randperm(n) rnd_inds_train = rnd_inds[1:n_train] rnd_inds_test = rnd_inds[n_train+1:n_train+n_test] # rnd_inds[n_train+1:end] @@ -173,8 +173,7 @@ for j in 1:n_experiments ds_test_rnd = @views ds[rnd_inds_test] # Subsampling experiments: different sample sizes - for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] #[0.05, 0.10, 0.25] - #[0.01, 0.02, 0.04, 0.08, 0.16, 0.32] #[0.05, 0.25, 0.5, 0.75, 0.95] #[0.05, 0.10, 0.20, 0.30] #[0.05, 0.25, 0.5, 0.75, 0.95] + for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] # Experiment j - SRS ############################################### println("Experiment:$j, method:SRS, batch_size_prop:$batch_size_prop") @@ -254,29 +253,5 @@ end # Postprocess ################################################################## -for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time] - scatter() - for method in reverse(unique(metrics[:, :method])[1:end]) - batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size] - metric_vals = metrics[metrics.method .== method, :][:, metric] - scatter!(batch_size_vals, metric_vals, label = method, - alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, - xlabel = "Sample size", - ylabel = "$metric") - end - savefig("$res_path/$metric-srs.png") -end - -scatter() -for method in reverse(unique(metrics[:, :method])[2:end]) - batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size] - speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./ - metrics[metrics.method .== method, :][:, :time] - scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time", - alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, - xlabel = "Sample size", - ylabel = "Speedup") -end -savefig("$res_path/speedup-srs.png") - +include("$base_path/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl") From fb0f6ae0f28e0440e8f2a207f8aebb7bd8248c6c Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Mon, 5 Aug 2024 14:53:14 -0400 Subject: [PATCH 3/7] Small change: improving plot label. --- examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl index 5f42fb7..b704f28 100644 --- a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl +++ b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl @@ -48,7 +48,7 @@ for (i, metric) in enumerate(metrics_cols) xscale=:log2, xticks = (batch_sizes, xticks_label), ylim=(min, max), - xlabel = "Sample size", + xlabel = "Training Dataset Sample Size", ylabel = metric_labels[i]) end savefig("$res_path/$metric.png") From dea7a4763d44ac7c4ffe69ea29364aa0655067c6 Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Wed, 28 Aug 2024 18:51:28 -0400 Subject: [PATCH 4/7] Small changes in parallel subsampling. --- .../fit-ace-dpp-full-vs-split-dataset.jl | 24 +++++++++---------- examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl | 3 +-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl index 7a0987f..5a920db 100644 --- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl +++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl @@ -11,7 +11,7 @@ using DataFrames, Plots # Define paths. base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../" ds_path = "$base_path/examples/data/Hf/" -res_path = "$base_path/examples/Parallel-DPP-ACE-HfO2/results/"; +res_path = "$base_path/examples/Parallel-DPP-ACE-HfO2/results-Hf/"; # Load utility functions. include("$base_path/examples/utils/utils.jl") @@ -102,15 +102,15 @@ end # Load training and test configuration datasets ################################ paths = [ -# "$ds_path/Hf2_gas_form_sorted.extxyz", -# "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200 -# "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377 + "$ds_path/Hf2_gas_form_sorted.extxyz", + "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200 + "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377 "$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 17.2k -# "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306 -# "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50 -# "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498 -# "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201 -# "$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz" + "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306 + "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50 + "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498 + "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201 + "$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz" ] confs = [] @@ -134,9 +134,9 @@ species = unique(vcat([atomic_symbol.(get_system(c).particles) # Compute ACE descriptors basis = ACE(species = species, - body_order = 4, - polynomial_degree = 5, - rcutoff = 10.0, + body_order = 6, + polynomial_degree = 6, + rcutoff = 7.0, wL = 1.0, csp = 1.0, r0 = 1.0) diff --git a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl index b704f28..a5c578b 100644 --- a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl +++ b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl @@ -1,7 +1,6 @@ using DataFrames, CSV, Statistics, Plots -metrics = CSV.read("metrics.csv", DataFrame) -res_path = "dyomet/" +metrics = CSV.read("$res_path/metrics.csv", DataFrame) methods = reverse(unique(metrics.method)) batch_sizes = unique(metrics.batch_size) From ea9a0477c4c5553af17c6bd595f30d18622767a9 Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Wed, 4 Sep 2024 14:38:48 -0400 Subject: [PATCH 5/7] Fix off-by-one in dataset splitting. --- .../fit-ace-dpp-full-vs-split-dataset.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl index 5a920db..9e873e3 100644 --- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl +++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl @@ -134,9 +134,9 @@ species = unique(vcat([atomic_symbol.(get_system(c).particles) # Compute ACE descriptors basis = ACE(species = species, - body_order = 6, - polynomial_degree = 6, - rcutoff = 7.0, + body_order = 8, + polynomial_degree = 8, + rcutoff = 10.0, wL = 1.0, csp = 1.0, r0 = 1.0) @@ -229,7 +229,8 @@ for j in 1:n_experiments #sampling_time = @elapsed @threads for i in 1:n_threads sampling_time = @elapsed for i in 1:n_chunks - a, b = 1 + (i-1) * n_chunk, i * n_chunk + a, b = 1 + (i-1) * n_chunk, i * n_chunk + 1 + b = norm(b-n_train) Date: Wed, 4 Sep 2024 15:15:23 -0400 Subject: [PATCH 6/7] Add Try-Catch statement to handle error from excessive matrix allocation in DPP. --- .../fit-ace-dpp-full-vs-split-dataset.jl | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl index 9e873e3..208cf3b 100644 --- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl +++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl @@ -194,26 +194,30 @@ for j in 1:n_experiments @save_dataframe(res_path, metrics) # Experiment j - DPP ############################################### - println("Experiment:$j, method:DPP, batch_size_prop:$batch_size_prop") - exp_path = "$res_path/$j-DPP-bsp$batch_size_prop/" - run(`mkdir -p $exp_path`) - batch_size = floor(Int, n_train * batch_size_prop) - sampling_time = @elapsed begin - dataset_selector = kDPP( ds_train_rnd, - GlobalMean(), - DotProduct(); - batch_size = batch_size) - inds = get_random_subset(dataset_selector) + try + println("Experiment:$j, method:DPP, batch_size_prop:$batch_size_prop") + exp_path = "$res_path/$j-DPP-bsp$batch_size_prop/" + run(`mkdir -p $exp_path`) + batch_size = floor(Int, n_train * batch_size_prop) + sampling_time = @elapsed begin + dataset_selector = kDPP( ds_train_rnd, + GlobalMean(), + DotProduct(); + batch_size = batch_size) + inds = get_random_subset(dataset_selector) + end + metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis) + metrics_j = merge(OrderedDict("exp_number" => j, + "method" => "DPP", + "batch_size_prop" => batch_size_prop, + "batch_size" => batch_size, + "time" => sampling_time), + merge(metrics_j...)) + push!(metrics, metrics_j) + @save_dataframe(res_path, metrics) + catch e # Catch error from excessive matrix allocation. + println(e) end - metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis) - metrics_j = merge(OrderedDict("exp_number" => j, - "method" => "DPP", - "batch_size_prop" => batch_size_prop, - "batch_size" => batch_size, - "time" => sampling_time), - merge(metrics_j...)) - push!(metrics, metrics_j) - @save_dataframe(res_path, metrics) # Experiment j - DPP′ using n_chunks ############################## for n_chunks in [2, 4, 8] From 8ef7cdb36002135355af3b4b266c1ca390ea9a41 Mon Sep 17 00:00:00 2001 From: Emmanuel Lujan Date: Wed, 4 Sep 2024 17:32:25 -0400 Subject: [PATCH 7/7] Extend batch_size_prop range. --- .../fit-ace-dpp-full-vs-split-dataset.jl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl index 208cf3b..5a8c5f7 100644 --- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl +++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl @@ -173,7 +173,7 @@ for j in 1:n_experiments ds_test_rnd = @views ds[rnd_inds_test] # Subsampling experiments: different sample sizes - for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] + for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 0.99] # Experiment j - SRS ############################################### println("Experiment:$j, method:SRS, batch_size_prop:$batch_size_prop") @@ -200,7 +200,7 @@ for j in 1:n_experiments run(`mkdir -p $exp_path`) batch_size = floor(Int, n_train * batch_size_prop) sampling_time = @elapsed begin - dataset_selector = kDPP( ds_train_rnd, + dataset_selector = kDPP(ds_train_rnd, GlobalMean(), DotProduct(); batch_size = batch_size) @@ -208,10 +208,10 @@ for j in 1:n_experiments end metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis) metrics_j = merge(OrderedDict("exp_number" => j, - "method" => "DPP", - "batch_size_prop" => batch_size_prop, - "batch_size" => batch_size, - "time" => sampling_time), + "method" => "DPP", + "batch_size_prop" => batch_size_prop, + "batch_size" => batch_size, + "time" => sampling_time), merge(metrics_j...)) push!(metrics, metrics_j) @save_dataframe(res_path, metrics) @@ -235,10 +235,10 @@ for j in 1:n_experiments sampling_time = @elapsed for i in 1:n_chunks a, b = 1 + (i-1) * n_chunk, i * n_chunk + 1 b = norm(b-n_train)