Skip to content

Commit

Permalink
Hyperparameter optimization example. First version.
Browse files Browse the repository at this point in the history
  • Loading branch information
emmanuellujan committed Jun 21, 2024
1 parent d16e181 commit 49a09e4
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
Expand Down
7 changes: 7 additions & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ examples = [
]
ss_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

# Optimization examples
examples = [
"1 - Optimize ACE hyper-parameters: minimize force time and fitting error" => "Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl",
]
opt_examples = create_examples(examples, EXAMPLES_DIR, OUTPUT_DIR)

# Dimension reduction examples
examples = [
"1 - Reduce ACE descriptors with PCA and fit a-HfO2 dataset" => "PCA-ACE-aHfO2/fit-pca-ace-ahfo2.jl",
Expand All @@ -72,6 +78,7 @@ makedocs(
"How to run the examples" => "how-to-run-the-examples.md",
"Basic examples" => basic_examples,
"Optimize atomistic data via intelligent subsampling" => ss_examples,
"Optimize interatomic potential models via hyper-paramter optimization" => opt_examples,
"Optimize interatomic potential models via dimension reduction" => dr_examples,
"API" => "api.md"],
format = Documenter.HTML(;
Expand Down
16 changes: 16 additions & 0 deletions examples/Opt-ACE-aHfO2/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[deps]
AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DisplayAs = "0b91fe84-8a4c-11e9-3e1d-67c38462b6d6"
Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PotentialLearning = "82b0a93c-c2e3-44bc-a418-f0f89b0ae5c2"
ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a"
144 changes: 144 additions & 0 deletions examples/Opt-ACE-aHfO2/fit-opt-ace-ahfo2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# # Optimize ACE hyper-parameters: minimize force time and fitting error.

# ## a. Load packages, define paths, and create experiment folder.

# Load packages.
using AtomsBase, InteratomicPotentials, PotentialLearning
using Unitful, UnitfulAtomic
using LinearAlgebra, Random, DisplayAs
using DataFrames, Hyperopt

# Define paths.
path = joinpath(dirname(pathof(PotentialLearning)), "../examples/Opt-ACE-aHfO2")
ds_path = "$path/../data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
res_path = "$path/results/";

# Load utility functions.
include("$path/../utils/utils.jl")

# Create experiment folder.
run(`mkdir -p $res_path`);

# ## b. Load atomistic dataset and split it into training and test.

# Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
ds = load_data(ds_path, uparse("eV"), uparse(""))[1:1000] # Only the first 1K samples are used in this example.

# Split atomistic dataset into training and test
n_train, n_test = 50, 50 # Only 50 samples per dataset are used in this example.
conf_train, conf_test = split(ds, n_train, n_test)


# NEW: utilizty functions #######################################################

function estimate_time(confs, iap; batch_size = 30)
if length(confs) < batch_size
batch_size = length(confs)
end
random_selector = RandomSelector(length(confs), batch_size)
inds = PotentialLearning.get_random_subset(random_selector)
time = @elapsed begin
f_descr = compute_force_descriptors(confs[inds],
iap.basis,
pbar = false)
ds = DataSet(confs[inds] .+ f_descr)
f_pred = get_all_forces(ds, iap)
end
n_atoms = sum(length(get_system(c)) for c in confs[inds])
return time / n_atoms
end

function get_results(ho)
column_names = string.(vcat(keys(ho.results[1][2])..., ho.params...))
rows = [[values(r[2])..., p...] for (r, p) in zip(ho.results, ho.history)]
results = DataFrame([Any[] for _ in 1:length(column_names)], column_names)
[push!(results, r) for r in rows]
return sort!(results)
end

function plot_err_time(ho)
error = [r[2][:error] for r in ho.results]
times = [r[2][:time_us] for r in ho.results]
scatter(times,
error,
label = "",
xaxis = "Time per force per atom | µs",
yaxis = "we MSE(E, E') + wf MSE(F, F')")
end


# Hyperparamter optimization ###################################################
e_mae_max = 0.05
f_mae_max = 0.05
weights = [1.0, 1.0]
intercept = true

ho = Hyperoptimizer(10,
species = [[:Hf, :O]],
body_order = [2, 3, 4],
polynomial_degree = [3, 4, 5],
rcutoff = [4.5, 5.0, 5.5],
wL = [0.5, 1.0, 1.5],
csp = [0.5, 1.0, 1.5],
r0 = [0.5, 1.0, 1.5])

for (i, species, body_order, polynomial_degree, rcutoff, wL, csp, r0) in ho
basis = ACE(species = species,
body_order = body_order,
polynomial_degree = polynomial_degree,
rcutoff = rcutoff,
wL = wL,
csp = csp,
r0 = r0)
iap = LBasisPotential(basis)
e_descr_new = compute_local_descriptors(conf_train, iap.basis, pbar = false)
f_descr_new = compute_force_descriptors(conf_train, iap.basis, pbar = false)
ds_cur = DataSet(conf_train .+ e_descr_new .+ f_descr_new)
learn!(iap, ds_cur, weights, intercept)
e, e_pred = get_all_energies(ds_cur), get_all_energies(ds_cur, iap)
f, f_pred = get_all_forces(ds_cur), get_all_forces(ds_cur, iap)
e_mae, e_rmse, e_rsq = calc_metrics(e_pred, e)
f_mae, f_rmse, f_rsq = calc_metrics(f_pred, f)
time_us = estimate_time(conf_train, iap) * 10^6
error = weights[1] * e_rmse^2 + weights[2] * f_rmse^2
metrics = OrderedDict( :error => error,
:e_mae => e_mae,
:e_rmse => e_rmse,
:e_rsq => e_rsq,
:f_mae => f_mae,
:f_rmse => f_rmse,
:f_rsq => f_rsq,
:time_us => time_us)
if e_mae < e_mae_max &&
f_mae < f_mae_max
loss = time_us
else
loss = time_us + error * 10^3
end
println("")
print("E_MAE:$(round(e_mae; digits=3)), ")
print("F_MAE:$(round(f_mae; digits=3)), ")
println("Time per force per atom | µs:$(round(time_us; digits=3))")
flush(stdout)
push!(ho.history, (species, body_order, polynomial_degree, rcutoff, wL, csp, r0))
push!(ho.results, (loss, metrics, iap))
end

# Post-process output: calculate metrics, create plots, and save results #######

# Prnt and save optimization results
results = get_results(ho)
println(results)
@save_dataframe path results

# Optimal IAP
opt_iap = ho.minimum[3]
@save_var res_path opt_iap.β
@save_var res_path opt_iap.β0
@save_var res_path opt_iap.basis

# Plot loss vs time
err_time = plot_err_time(ho)
@save_fig res_path err_time
DisplayAs.PNG(err_time)

0 comments on commit 49a09e4

Please sign in to comment.