From 9f525798c8b766677e39800dbed109a75e64b3cf Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Thu, 1 Aug 2024 13:55:39 -0400
Subject: [PATCH 1/7] Plot metrics script in parallel subsampling.

---
 examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl

diff --git a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
new file mode 100644
index 0000000..5f42fb7
--- /dev/null
+++ b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
@@ -0,0 +1,125 @@
+using DataFrames, CSV, Statistics, Plots
+
+metrics = CSV.read("metrics.csv", DataFrame)
+res_path = "dyomet/"
+
+methods = reverse(unique(metrics.method))
+batch_sizes = unique(metrics.batch_size)
+batch_size_prop = unique(metrics.batch_size_prop)
+xticks_label = ("$b\n$(p*100)%" for (b, p) in zip(batch_sizes, batch_size_prop))
+colors = palette(:tab10)
+metrics_cols = [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
+metric_labels = ["E MAE | eV/atom",
+                 "F MAE | eV/Å",
+                 "E MAE | eV/atom",
+                 "F MAE | eV/Å",
+                 "Time | s"]
+for (i, metric) in enumerate(metrics_cols)
+    plot()
+    for (j, method) in enumerate(methods)
+        metric_means = []; metric_se = []
+        for batch_size in batch_sizes
+            ms = metrics[ metrics.method .== method .&&
+                          metrics.batch_size .== batch_size , metric]
+            m = mean(ms)
+            se = stdm(ms, m) / sqrt(length(ms)) # standard error
+            push!(metric_means, m)
+            push!(metric_se, se)
+        end
+        plot!(batch_sizes,
+              metric_means,
+              ribbon = metric_se,
+              color = colors[j],
+              fillalpha=.1,
+              label=method)
+        plot!(batch_sizes,
+              metric_means,
+              seriestype = :scatter,
+              thickness_scaling = 1.35,
+              markersize = 3,
+              markerstrokewidth = 0,
+              markerstrokecolor = :black, 
+              markercolor = colors[j],
+              label="")
+        max = metric == :time ? 4000 : 1
+        min = metric == :time ? -100 : minimum(metric_means) * 0.99
+        plot!(dpi = 300,
+              label = "",
+              xscale=:log2, 
+              xticks = (batch_sizes, xticks_label),
+              ylim=(min, max),
+              xlabel = "Sample size",
+              ylabel = metric_labels[i])
+    end
+    savefig("$res_path/$metric.png")
+end
+
+
+# xformatter = :scientific,
+#              markershape = :circle,
+#              markercolor = :gray
+# yerror=metric_std,
+#ribbon=metric_std,
+#yerror=metric_std,
+# markerstrokewidth=0, markersize=5, 
+#yaxis=:log,
+#xaxis=:log2, yaxis=:log,
+
+#for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
+#    scatter()
+#    for method in reverse(unique(metrics[:, :method])[1:end])
+#        batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
+#        metric_vals = metrics[metrics.method .== method, :][:, metric]
+#        scatter!(batch_size_vals, metric_vals, label = method,
+#                 alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, yaxis=:log,
+#                 xlabel = "Sample size",
+#                 ylabel = "$metric")
+#    end
+#    savefig("$res_path/$metric-srs.png")
+#end
+
+#scatter()
+#for method in reverse(unique(metrics[:, :method])[2:end])
+#    batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
+#    speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./
+#                  metrics[metrics.method .== method, :][:, :time]
+#    scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time",
+#             alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
+#             xlabel = "Sample size",
+#             ylabel = "Speedup")
+#end
+#savefig("$res_path/speedup-srs.png")
+
+
+
+#using DataFrames, CSV, Plots
+
+#metrics = CSV.read("metrics.csv", DataFrame)
+#res_path = "dyomet/"
+
+#for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
+#    scatter()
+#    for method in reverse(unique(metrics[:, :method])[1:end])
+#        batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
+#        metric_vals = metrics[metrics.method .== method, :][:, metric]
+#        scatter!(batch_size_vals, metric_vals, label = method,
+#                 alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2, yaxis=:log,
+#                 xlabel = "Sample size",
+#                 ylabel = "$metric")
+#    end
+#    savefig("$res_path/$metric-srs.png")
+#end
+
+#scatter()
+#for method in reverse(unique(metrics[:, :method])[2:end])
+#    batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
+#    speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./
+#                  metrics[metrics.method .== method, :][:, :time]
+#    scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time",
+#             alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
+#             xlabel = "Sample size",
+#             ylabel = "Speedup")
+#end
+#savefig("$res_path/speedup-srs.png")
+
+

From b239ee24293b1cc5f9341aabe0ba3fd20f1c8fff Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Thu, 1 Aug 2024 14:10:33 -0400
Subject: [PATCH 2/7] small improvements in parallel subsampling

---
 .../fit-ace-dpp-full-vs-split-dataset.jl      | 51 +++++--------------
 1 file changed, 13 insertions(+), 38 deletions(-)

diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
index 06e3d52..7a0987f 100644
--- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
+++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
@@ -102,14 +102,14 @@ end
 # Load training and test configuration datasets ################################
 
 paths = [
-#         "$ds_path/Hf2_gas_form_sorted.extxyz", # ERROR: LoadError: SingularException(18)
-#         "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200, :)
-#         "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377, :(
-         "$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 17.2k, :-D or out of memory
-#         "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306, :(
-#         "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50, ...
-#         "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498, :(
-#         "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201, ??
+#         "$ds_path/Hf2_gas_form_sorted.extxyz",
+#         "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200
+#         "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377
+         "$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 17.2k
+#         "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306
+#         "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50
+#         "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498
+#         "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201
 #         "$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz"
          ]
 
@@ -159,13 +159,13 @@ metric_names = [:exp_number,  :method, :batch_size_prop, :batch_size, :time,
 metrics = DataFrame([Any[] for _ in 1:length(metric_names)], metric_names)
 
 # Subsampling experiments: subsample full dataset vs subsample dataset by chunks
-n_experiments = 30 # 100
+n_experiments = 100
 for j in 1:n_experiments
     global metrics
     
     # Define randomized training and test dataset
-    n_train = 2400 #floor(Int, 0.8 * n)
-    n_test = 600 #n - n_train
+    n_train = floor(Int, 0.8 * n)
+    n_test = n - n_train
     rnd_inds = randperm(n)
     rnd_inds_train = rnd_inds[1:n_train]
     rnd_inds_test = rnd_inds[n_train+1:n_train+n_test] # rnd_inds[n_train+1:end]
@@ -173,8 +173,7 @@ for j in 1:n_experiments
     ds_test_rnd  = @views ds[rnd_inds_test]
 
     # Subsampling experiments:  different sample sizes
-    for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] #[0.05, 0.10, 0.25]
-            #[0.01, 0.02, 0.04, 0.08, 0.16, 0.32] #[0.05, 0.25, 0.5, 0.75, 0.95] #[0.05, 0.10, 0.20, 0.30] #[0.05, 0.25, 0.5, 0.75, 0.95]
+    for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32]
     
             # Experiment j - SRS ###############################################
             println("Experiment:$j, method:SRS, batch_size_prop:$batch_size_prop")
@@ -254,29 +253,5 @@ end
 
 # Postprocess ##################################################################
 
-for metric in [:e_train_mae, :f_train_mae, :e_test_mae, :f_test_mae, :time]
-    scatter()
-    for method in reverse(unique(metrics[:, :method])[1:end])
-        batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
-        metric_vals = metrics[metrics.method .== method, :][:, metric]
-        scatter!(batch_size_vals, metric_vals, label = method,
-                 alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
-                 xlabel = "Sample size",
-                 ylabel = "$metric")
-    end
-    savefig("$res_path/$metric-srs.png")
-end
-
-scatter()
-for method in reverse(unique(metrics[:, :method])[2:end])
-    batch_size_vals = metrics[metrics.method .== method, :][:, :batch_size]
-    speedup_vals = metrics[metrics.method .== "DPP", :][:, :time] ./
-                  metrics[metrics.method .== method, :][:, :time]
-    scatter!(batch_size_vals, speedup_vals, label = "DPP time / $method time",
-             alpha = 0.5, dpi=300, markerstrokewidth=0, markersize=5, xaxis=:log2,
-             xlabel = "Sample size",
-             ylabel = "Speedup")
-end
-savefig("$res_path/speedup-srs.png")
-
+include("$base_path/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl")
 

From fb0f6ae0f28e0440e8f2a207f8aebb7bd8248c6c Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Mon, 5 Aug 2024 14:53:14 -0400
Subject: [PATCH 3/7] Small change: improving plot label.

---
 examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
index 5f42fb7..b704f28 100644
--- a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
+++ b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
@@ -48,7 +48,7 @@ for (i, metric) in enumerate(metrics_cols)
               xscale=:log2, 
               xticks = (batch_sizes, xticks_label),
               ylim=(min, max),
-              xlabel = "Sample size",
+              xlabel = "Training Dataset Sample Size",
               ylabel = metric_labels[i])
     end
     savefig("$res_path/$metric.png")

From dea7a4763d44ac7c4ffe69ea29364aa0655067c6 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Wed, 28 Aug 2024 18:51:28 -0400
Subject: [PATCH 4/7] Small changes in parallel subsampling.

---
 .../fit-ace-dpp-full-vs-split-dataset.jl      | 24 +++++++++----------
 examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl |  3 +--
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
index 7a0987f..5a920db 100644
--- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
+++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
@@ -11,7 +11,7 @@ using DataFrames, Plots
 # Define paths.
 base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
 ds_path   = "$base_path/examples/data/Hf/"
-res_path  = "$base_path/examples/Parallel-DPP-ACE-HfO2/results/";
+res_path  = "$base_path/examples/Parallel-DPP-ACE-HfO2/results-Hf/";
 
 # Load utility functions.
 include("$base_path/examples/utils/utils.jl")
@@ -102,15 +102,15 @@ end
 # Load training and test configuration datasets ################################
 
 paths = [
-#         "$ds_path/Hf2_gas_form_sorted.extxyz",
-#         "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200
-#         "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377
+         "$ds_path/Hf2_gas_form_sorted.extxyz",
+         "$ds_path/Hf2_mp103_EOS_1D_form_sorted.extxyz", # 200
+         "$ds_path/Hf2_mp103_EOS_3D_form_sorted.extxyz", # 9377
          "$ds_path/Hf2_mp103_EOS_6D_form_sorted.extxyz", # 17.2k
-#         "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306
-#         "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50
-#         "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498
-#         "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201
-#         "$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz"
+         "$ds_path/Hf128_MC_rattled_mp100_form_sorted.extxyz", # 306
+         "$ds_path/Hf128_MC_rattled_mp103_form_sorted.extxyz", # 50
+         "$ds_path/Hf128_MC_rattled_random_form_sorted.extxyz", # 498
+         "$ds_path/Hf_mp100_EOS_1D_form_sorted.extxyz", # 201
+         "$ds_path/Hf_mp100_primitive_EOS_1D_form_sorted.extxyz"
          ]
 
 confs = []
@@ -134,9 +134,9 @@ species = unique(vcat([atomic_symbol.(get_system(c).particles)
 
 # Compute ACE descriptors
 basis = ACE(species           = species,
-            body_order        = 4,
-            polynomial_degree = 5,
-            rcutoff           = 10.0,
+            body_order        = 6,
+            polynomial_degree = 6,
+            rcutoff           = 7.0,
             wL                = 1.0,
             csp               = 1.0,
             r0                = 1.0)
diff --git a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
index b704f28..a5c578b 100644
--- a/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
+++ b/examples/Parallel-DPP-ACE-HfO2/plotmetrics.jl
@@ -1,7 +1,6 @@
 using DataFrames, CSV, Statistics, Plots
 
-metrics = CSV.read("metrics.csv", DataFrame)
-res_path = "dyomet/"
+metrics = CSV.read("$res_path/metrics.csv", DataFrame)
 
 methods = reverse(unique(metrics.method))
 batch_sizes = unique(metrics.batch_size)

From ea9a0477c4c5553af17c6bd595f30d18622767a9 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Wed, 4 Sep 2024 14:38:48 -0400
Subject: [PATCH 5/7] Fix off-by-one in dataset splitting.

---
 .../fit-ace-dpp-full-vs-split-dataset.jl                 | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
index 5a920db..9e873e3 100644
--- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
+++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
@@ -134,9 +134,9 @@ species = unique(vcat([atomic_symbol.(get_system(c).particles)
 
 # Compute ACE descriptors
 basis = ACE(species           = species,
-            body_order        = 6,
-            polynomial_degree = 6,
-            rcutoff           = 7.0,
+            body_order        = 8,
+            polynomial_degree = 8,
+            rcutoff           = 10.0,
             wL                = 1.0,
             csp               = 1.0,
             r0                = 1.0)
@@ -229,7 +229,8 @@ for j in 1:n_experiments
                 
                 #sampling_time = @elapsed @threads for i in 1:n_threads
                 sampling_time = @elapsed for i in 1:n_chunks
-                    a, b = 1 + (i-1) * n_chunk, i * n_chunk
+                    a, b = 1 + (i-1) * n_chunk, i * n_chunk + 1
+                    b = norm(b-n_train)<n_chunk ? n_train : b
                     dataset_selector = kDPP(  ds_train_rnd[a:b],
                                               GlobalMean(),
                                               DotProduct();

From 09ebc2fe384b29456e58397d8c13df1abf4fa912 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Wed, 4 Sep 2024 15:15:23 -0400
Subject: [PATCH 6/7] Add Try-Catch statement to handle error from excessive
 matrix allocation in DPP.

---
 .../fit-ace-dpp-full-vs-split-dataset.jl      | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
index 9e873e3..208cf3b 100644
--- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
+++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
@@ -194,26 +194,30 @@ for j in 1:n_experiments
             @save_dataframe(res_path, metrics)
 
             # Experiment j - DPP ###############################################
-            println("Experiment:$j, method:DPP, batch_size_prop:$batch_size_prop")
-            exp_path = "$res_path/$j-DPP-bsp$batch_size_prop/"
-            run(`mkdir -p $exp_path`)
-            batch_size = floor(Int, n_train * batch_size_prop)
-            sampling_time = @elapsed begin
-                dataset_selector = kDPP(  ds_train_rnd,
-                                          GlobalMean(),
-                                          DotProduct();
-                                          batch_size = batch_size)
-                inds = get_random_subset(dataset_selector)
+            try
+                println("Experiment:$j, method:DPP, batch_size_prop:$batch_size_prop")
+                exp_path = "$res_path/$j-DPP-bsp$batch_size_prop/"
+                run(`mkdir -p $exp_path`)
+                batch_size = floor(Int, n_train * batch_size_prop)
+                sampling_time = @elapsed begin
+                    dataset_selector = kDPP(  ds_train_rnd,
+                                            GlobalMean(),
+                                            DotProduct();
+                                            batch_size = batch_size)
+                    inds = get_random_subset(dataset_selector)
+                end
+                metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis)
+                metrics_j = merge(OrderedDict("exp_number" => j,
+                                            "method" => "DPP",
+                                            "batch_size_prop" => batch_size_prop,
+                                            "batch_size" => batch_size,
+                                            "time" => sampling_time),
+                                merge(metrics_j...))
+                push!(metrics, metrics_j)
+                @save_dataframe(res_path, metrics)
+            catch e # Catch error from excessive matrix allocation.
+                println(e)
             end
-            metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis)
-            metrics_j = merge(OrderedDict("exp_number" => j,
-                                          "method" => "DPP",
-                                          "batch_size_prop" => batch_size_prop,
-                                          "batch_size" => batch_size,
-                                          "time" => sampling_time),
-                              merge(metrics_j...))
-            push!(metrics, metrics_j)
-            @save_dataframe(res_path, metrics)
             
             # Experiment j - DPP′ using n_chunks ##############################
             for n_chunks in [2, 4, 8]

From 8ef7cdb36002135355af3b4b266c1ca390ea9a41 Mon Sep 17 00:00:00 2001
From: Emmanuel Lujan <lujan.emmanuel@gmail.com>
Date: Wed, 4 Sep 2024 17:32:25 -0400
Subject: [PATCH 7/7] Extend batch_size_prop range.

---
 .../fit-ace-dpp-full-vs-split-dataset.jl      | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
index 208cf3b..5a8c5f7 100644
--- a/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
+++ b/examples/Parallel-DPP-ACE-HfO2/fit-ace-dpp-full-vs-split-dataset.jl
@@ -173,7 +173,7 @@ for j in 1:n_experiments
     ds_test_rnd  = @views ds[rnd_inds_test]
 
     # Subsampling experiments:  different sample sizes
-    for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32]
+    for batch_size_prop in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 0.99]
     
             # Experiment j - SRS ###############################################
             println("Experiment:$j, method:SRS, batch_size_prop:$batch_size_prop")
@@ -200,7 +200,7 @@ for j in 1:n_experiments
                 run(`mkdir -p $exp_path`)
                 batch_size = floor(Int, n_train * batch_size_prop)
                 sampling_time = @elapsed begin
-                    dataset_selector = kDPP(  ds_train_rnd,
+                    dataset_selector = kDPP(ds_train_rnd,
                                             GlobalMean(),
                                             DotProduct();
                                             batch_size = batch_size)
@@ -208,10 +208,10 @@ for j in 1:n_experiments
                 end
                 metrics_j = fit(exp_path, (@views ds_train_rnd[inds]), ds_test_rnd, basis)
                 metrics_j = merge(OrderedDict("exp_number" => j,
-                                            "method" => "DPP",
-                                            "batch_size_prop" => batch_size_prop,
-                                            "batch_size" => batch_size,
-                                            "time" => sampling_time),
+                                              "method" => "DPP",
+                                              "batch_size_prop" => batch_size_prop,
+                                              "batch_size" => batch_size,
+                                              "time" => sampling_time),
                                 merge(metrics_j...))
                 push!(metrics, metrics_j)
                 @save_dataframe(res_path, metrics)
@@ -235,10 +235,10 @@ for j in 1:n_experiments
                 sampling_time = @elapsed for i in 1:n_chunks
                     a, b = 1 + (i-1) * n_chunk, i * n_chunk + 1
                     b = norm(b-n_train)<n_chunk ? n_train : b
-                    dataset_selector = kDPP(  ds_train_rnd[a:b],
-                                              GlobalMean(),
-                                              DotProduct();
-                                              batch_size = batch_size_chunk)
+                    dataset_selector = kDPP(@views(ds_train_rnd[a:b]),
+                                            GlobalMean(),
+                                            DotProduct();
+                                            batch_size = batch_size_chunk)
                     inds_i = get_random_subset(dataset_selector)
                     append!(inds, inds_i .+ (a .- 1))
                 end