Fix MLIP related issues with the benchmark results file (autoatml#243)

* increase M3GNET RMSE tolereance because of fluctuations * prettify GAP parameters representation and remove GAP related things from hyperparameter header * rearrange file entires to make it look better, added user-defined for (hyper)params and full for Database type if not defined otherwise * fix unit test * fix unit test * small format fix * fix unit test * removed a comment * decrease default max_length for rattled supercells
naik-aakash · Nov 15, 2024 · 5ced788 · 5ced788
1 parent 40a586b
commit 5ced788
Show file tree

Hide file tree

Showing 6 changed files with 39 additions and 29 deletions.
diff --git a/src/autoplex/auto/phonons/flows.py b/src/autoplex/auto/phonons/flows.py
@@ -442,7 +442,6 @@ def make(
                                             dft_references=dft_references,
                                             supercell_settings=self.supercell_settings,
                                             displacement=displacement,
-                                            # TODO add a hyper parameter here for the benchmark
                                             atomwise_regularization_parameter=atomwise_reg_parameter,
                                             soap_dict=soap_dict,
                                             **self.benchmark_kwargs,

diff --git a/src/autoplex/benchmark/phonons/jobs.py b/src/autoplex/benchmark/phonons/jobs.py
@@ -38,17 +38,17 @@ def write_benchmark_metrics(
         encoding="utf-8",
     ) as file:
         file.write(
-            "%-11s%-11s%-12s%-18s%-12s%-55s%-16s%-16s%-14s"
+            "%-11s%-11s%-12s%-18s%-12s%-16s%-16s%-16s%-18s"
             % (
                 "Potential",
                 "Structure",
                 "MPID",
                 "Displacement (Å)",
                 "RMSE (THz)",
-                "Hyperparameters (atom-wise f, n_sparse, SOAP delta)",
-                "Database type",
                 "imagmodes(pot)",
                 "imagmodes(dft)",
+                "Database type",
+                "(Hyper-)Parameters",
             )
         )
 
@@ -59,15 +59,25 @@ def write_benchmark_metrics(
             encoding="utf-8",
         ) as file:
             # Build the SOAP dictionary or suffix value
-            soap_params = {
+            soap_params = {  # (atom-wise f, n_sparse, SOAP delta)
                 f"f={metric['atomwise_regularization_parameter']}": metric["soap_dict"]
             }
 
+            if metric["ml_model"] == "GAP":
+                key = next(iter(soap_params.keys()))
+                value = next(iter(soap_params.values()))
+                pretty_hyper_params = f"atom-wise {key}: n_sparse = {value['n_sparse']}, SOAP delta = {value['delta']}"
+            else:
+                pretty_hyper_params = "user defined"
+
+            if not metric["suffix"]:
+                metric["suffix"] = "full"
+
             file.write(
                 f"\n{metric['ml_model']:<11}{structure_composition:<11}{metric['mp_id']:<12}"
                 f"{metric['displacement']:<18.2f}{metric['benchmark_phonon_rmse']:<12.5f}"
-                f"{soap_params!s:<55}{metric['suffix']!s:<16}{metric['ml_imaginary_modes']!s:<16}"
-                f"{metric['dft_imaginary_modes']!s:<5}"
+                f"{metric['ml_imaginary_modes']!s:<16}{metric['dft_imaginary_modes']!s:<16}"
+                f"{metric['suffix']!s:<16}{pretty_hyper_params!s:<50}"
             )
 
     return Response(output=metrics)
diff --git a/src/autoplex/data/phonons/flows.py b/src/autoplex/data/phonons/flows.py
@@ -393,7 +393,7 @@ def make(
             supercell_matrix_job = reduce_supercell_size_job(
                 structure=structure,
                 min_length=self.supercell_settings.get("min_length", 12),
-                max_length=self.supercell_settings.get("max_length", 25),
+                max_length=self.supercell_settings.get("max_length", 20),
                 fallback_min_length=self.supercell_settings.get(
                     "fallback_min_length", 10
                 ),

diff --git a/tests/auto/test_auto_flows.py b/tests/auto/test_auto_flows.py
@@ -530,7 +530,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_gap(
 
     # check if soap_default_dict is correctly constructed from
     # n_sparse and delta values in mlip_phonon_default json file
-    expected_soap_dict = "{'f=0.1': {'n_sparse': 6000, 'delta': 0.5}}"
+    expected_soap_dict = "atom-wise f=0.1: n_sparse = 6000, SOAP delta = 0.5"
     results_files = glob.glob('job*/results_LiCl.txt')
 
     for file_path in results_files:
@@ -584,7 +584,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_m3gnet(
     assert complete_workflow_m3gnet.jobs[4].name == "complete_benchmark_mp-22905"
     assert responses[complete_workflow_m3gnet.jobs[-1].output.uuid][1].output[0][0][
                "benchmark_phonon_rmse"] == pytest.approx(
-        5.2622804443539355, abs=1.0  # bad fit data
+        5.2622804443539355, abs=3.0  # bad fit data, fluctuates between 4 and 7
     )
 
 
@@ -996,7 +996,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_with_sigma_regularization(
     assert reg_specific_file_exists
 
     # check if soap_default_dict is correctly constructed from n_sparse and delta values in user fit parameter input
-    expected_soap_dict = "{'f=0.1': {'delta': 3.0, 'n_sparse': 8000}}"
+    expected_soap_dict = "atom-wise f=0.1: n_sparse = 8000, SOAP delta = 3.0"
 
     results_files = glob.glob('job*/test_results_LiCl.txt')
     for file_path in results_files:

diff --git a/tests/benchmark/test_benchmark_jobs.py b/tests/benchmark/test_benchmark_jobs.py
@@ -47,7 +47,6 @@ def test_compute_bandstructure_benchmark_metrics_dummy(test_dir, clean_dir):
 def test_compute_bandstructure_benchmark_metrics(test_dir, clean_dir):
     import os
     from pathlib import Path
-    from jobflow import run_locally
 
     # test wih two different band-structures
     dft_bs_file_path = test_dir / "benchmark" / "DFT_phonon_band_structure.yaml"
@@ -104,12 +103,14 @@ def test_write_benchmark_metrics(test_dir, clean_dir):
     ]
 
     soap_dict = [  # unit tests for checking correct default soap_dict in tests/auto/test_auto_flows.py
-        None,
-        None,
-        None,
-        None,
+        {'n_sparse': 3000, 'delta': 1.0},
+        {'n_sparse': 4000, 'delta': 1.0},
+        {'n_sparse': 5000, 'delta': 1.0},
+        {'n_sparse': 6000, 'delta': 1.0},
+        {'n_sparse': 6000, 'delta': 1.0},
         {'n_sparse': 3000, 'delta': 1.0},
         {'n_sparse': 5000, 'delta': 1.0},
+        {'n_sparse': 6000, 'delta': 1.0},
     ]
 
     suffixes = ["", '_wo_sigma', '_phonon', '_rand_struc']
@@ -118,8 +119,8 @@ def test_write_benchmark_metrics(test_dir, clean_dir):
 
     suffix_index = 0
 
-    for i, metric_group in enumerate(metric_vals):
-        for metric in metric_group:
+    for metric_group in metric_vals:
+        for metric, soap in zip(metric_group, soap_dict):
             fused_dict = {
                 'benchmark_phonon_rmse': metric['benchmark_phonon_rmse'],
                 'dft_imaginary_modes': metric['dft_imaginary_modes'],
@@ -129,7 +130,7 @@ def test_write_benchmark_metrics(test_dir, clean_dir):
                 'structure': structure,
                 'displacement': 0.01,
                 'atomwise_regularization_parameter': 0.1,
-                'soap_dict': soap_dict[i],
+                'soap_dict': soap,
                 'suffix': suffixes[suffix_index]
             }
             metrics.append(fused_dict)
@@ -140,7 +141,7 @@ def test_write_benchmark_metrics(test_dir, clean_dir):
         metrics=[metrics],
     )
 
-    _ = run_locally(write_metrics_job, create_folders=False, ensure_success=True)
+    run_locally(write_metrics_job, create_folders=False, ensure_success=True)
 
     # get list of generated txt file
     path_to_ref_txt_file = test_dir / "benchmark" / "results_LiCl_ref.txt"

diff --git a/tests/test_data/benchmark/results_LiCl_ref.txt b/tests/test_data/benchmark/results_LiCl_ref.txt
@@ -1,9 +1,9 @@
-Potential  Structure  MPID        Displacement (Å)  RMSE (THz)  Hyperparameters (atom-wise f, n_sparse, SOAP delta)    Database type   imagmodes(pot)  imagmodes(dft)
-GAP        LiCl       mp-22905    0.01              0.87425     {'f=0.1': None}                                                        False           False
-GAP        LiCl       mp-22905    0.01              0.63839     {'f=0.1': None}                                                        False           False
-GAP        LiCl       mp-22905    0.01              0.55506     {'f=0.1': None}                                        _wo_sigma       False           False
-GAP        LiCl       mp-22905    0.01              0.43216     {'f=0.1': None}                                        _phonon         False           False
-GAP        LiCl       mp-22905    0.01              0.54584     {'f=0.1': None}                                        _rand_struc     False           False
-GAP        LiCl       mp-22905    0.01              0.43216     {'f=0.1': {'n_sparse': 3000, 'delta': 1.0}}            _rand_struc     False           False
-GAP        LiCl       mp-22905    0.01              0.36478     {'f=0.1': {'n_sparse': 5000, 'delta': 1.0}}            _rand_struc     False           False
-GAP        LiCl       mp-22905    0.01              0.38100     {'f=0.1': {'n_sparse': 5000, 'delta': 1.0}}            _rand_struc     False           False
+Potential  Structure  MPID        Displacement (Å)  RMSE (THz)  imagmodes(pot)  imagmodes(dft)  Database type   (Hyper-)Parameters
+GAP        LiCl       mp-22905    0.01              0.87425     False           False           full            atom-wise f=0.1: n_sparse = 3000, SOAP delta = 1.0
+GAP        LiCl       mp-22905    0.01              0.63839     False           False           full            atom-wise f=0.1: n_sparse = 4000, SOAP delta = 1.0
+GAP        LiCl       mp-22905    0.01              0.55506     False           False           _wo_sigma       atom-wise f=0.1: n_sparse = 3000, SOAP delta = 1.0
+GAP        LiCl       mp-22905    0.01              0.43216     False           False           _phonon         atom-wise f=0.1: n_sparse = 3000, SOAP delta = 1.0
+GAP        LiCl       mp-22905    0.01              0.54584     False           False           _rand_struc     atom-wise f=0.1: n_sparse = 3000, SOAP delta = 1.0
+GAP        LiCl       mp-22905    0.01              0.43216     False           False           _rand_struc     atom-wise f=0.1: n_sparse = 3000, SOAP delta = 1.0
+GAP        LiCl       mp-22905    0.01              0.36478     False           False           _rand_struc     atom-wise f=0.1: n_sparse = 3000, SOAP delta = 1.0
+GAP        LiCl       mp-22905    0.01              0.38100     False           False           _rand_struc     atom-wise f=0.1: n_sparse = 4000, SOAP delta = 1.0