diff --git a/docs/Project.toml b/docs/Project.toml index 603cc361..9c07b862 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -7,6 +7,7 @@ DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" +Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712" InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04" InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" diff --git a/docs/make.jl b/docs/make.jl index ae25211e..3c29f630 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -48,6 +48,12 @@ examples = [ ] ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) +# Optimization examples +examples = [ + "1 - Optimize ACE hyper-parameters: minimize force time and fitting error" => "Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl", +] +opt_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR) + # Dimension reduction examples examples = [ "1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl", @@ -72,6 +78,7 @@ makedocs( "How to run the examples" => "how-to-run-the-examples.md", "Basic examples" => basic_examples, "Optimize atomistic data via intelligent subsampling" => ss_examples, + "Optimize interatomic potential models via hyper-paramter optimization" => opt_examples, "Optimize interatomic potential models via dimension reduction" => dr_examples, "API" => "api.md"], format = Documenter.HTML(; diff --git a/examples/Opt-ACE-aHfO2/Project.toml b/examples/Opt-ACE-aHfO2/Project.toml new file mode 100644 index 00000000..14b25567 --- /dev/null +++ b/examples/Opt-ACE-aHfO2/Project.toml @@ -0,0 +1,16 @@ +[deps] +AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a" +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6" +Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712" +InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04" +IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +PotentialLearning = "82b0a93c-c2e3-44bc-a418-f0f89b0ae5c2" +ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" +UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a" diff --git a/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl new file mode 100644 index 00000000..d84900f1 --- /dev/null +++ b/examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl @@ -0,0 +1,144 @@ +# # Optimize ACE hyper-parameters: minimize force time and fitting error. + +# ## a. Load packages, define paths, and create experiment folder. + +# Load packages. +using AtomsBase, InteratomicPotentials, PotentialLearning +using Unitful, UnitfulAtomic +using LinearAlgebra, Random, DisplayAs +using DataFrames, Hyperopt + +# Define paths. +path = joinpath(dirname(pathof(PotentialLearning)), "../examples/Opt-ACE-aHfO2") +ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz" +res_path = "$path/results/"; + +# Load utility functions. +include("$path/../utils/utils.jl") + +# Create experiment folder. +run(`mkdir -p $res_path`); + +# ## b. Load atomistic dataset and split it into training and test. + +# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.) +ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000] # Only the first 1K samples are used in this example. + +# Split atomistic dataset into training and test +n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example. +conf_train, conf_test = split(ds, n_train, n_test) + + +# NEW: utilizty functions ####################################################### + +function estimate_time(confs, iap; batch_size = 30) + if length(confs) < batch_size + batch_size = length(confs) + end + random_selector = RandomSelector(length(confs), batch_size) + inds = PotentialLearning.get_random_subset(random_selector) + time = @elapsed begin + f_descr = compute_force_descriptors(confs[inds], + iap.basis, + pbar = false) + ds = DataSet(confs[inds] .+ f_descr) + f_pred = get_all_forces(ds, iap) + end + n_atoms = sum(length(get_system(c)) for c in confs[inds]) + return time / n_atoms +end + +function get_results(ho) + column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...)) + rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)] + results = DataFrame([Any[] for _ in 1:length(column_names)], column_names) + [push!(results, r) for r in rows] + return sort!(results) +end + +function plot_err_time(ho) + error = [r[2][:error] for r in ho.results] + times = [r[2][:time_us] for r in ho.results] + scatter(times, + error, + label = "", + xaxis = "Time per force per atom | µs", + yaxis = "we MSE(E, E') + wf MSE(F, F')") +end + + +# Hyperparamter optimization ################################################### +e_mae_max = 0.05 +f_mae_max = 0.05 +weights = [1.0, 1.0] +intercept = true + +ho = Hyperoptimizer(10, + species = [[:Hf, :O]], + body_order = [2, 3, 4], + polynomial_degree = [3, 4, 5], + rcutoff = [4.5, 5.0, 5.5], + wL = [0.5, 1.0, 1.5], + csp = [0.5, 1.0, 1.5], + r0 = [0.5, 1.0, 1.5]) + +for (i, species, body_order, polynomial_degree, rcutoff, wL, csp, r0) in ho + basis = ACE(species = species, + body_order = body_order, + polynomial_degree = polynomial_degree, + rcutoff = rcutoff, + wL = wL, + csp = csp, + r0 = r0) + iap = LBasisPotential(basis) + e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false) + f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false) + ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new) + learn!(iap, ds_cur, weights, intercept) + e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap) + f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap) + e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e) + f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f) + time_us = estimate_time(conf_train, iap) * 10^6 + error = weights[1] * e_rmse^2 + weights[2] * f_rmse^2 + metrics = OrderedDict( :error => error, + :e_mae => e_mae, + :e_rmse => e_rmse, + :e_rsq => e_rsq, + :f_mae => f_mae, + :f_rmse => f_rmse, + :f_rsq => f_rsq, + :time_us => time_us) + if e_mae < e_mae_max && + f_mae < f_mae_max + loss = time_us + else + loss = time_us + error * 10^3 + end + println("") + print("E_MAE:$(round(e_mae; digits=3)), ") + print("F_MAE:$(round(f_mae; digits=3)), ") + println("Time per force per atom | µs:$(round(time_us; digits=3))") + flush(stdout) + push!(ho.history, (species, body_order, polynomial_degree, rcutoff, wL, csp, r0)) + push!(ho.results, (loss, metrics, iap)) +end + +# Post-process output: calculate metrics, create plots, and save results ####### + +# Prnt and save optimization results +results = get_results(ho) +println(results) +@save_dataframe path results + +# Optimal IAP +opt_iap = ho.minimum[3] +@save_var res_path opt_iap.β +@save_var res_path opt_iap.β0 +@save_var res_path opt_iap.basis + +# Plot loss vs time +err_time = plot_err_time(ho) +@save_fig res_path err_time +DisplayAs.PNG(err_time) +