Merge pull request #21 from JaGeo/combine_add_data_complete_flow

Combine Complete Flow and add data flow
autoatml · Jan 26, 2024 · 7bd29b9 · 7bd29b9
2 parents 7cea18a + 0ddfe6f
commit 7bd29b9
Show file tree

Hide file tree

Showing 2 changed files with 185 additions and 168 deletions.
diff --git a/autoplex/auto/flows.py b/autoplex/auto/flows.py
@@ -38,141 +38,141 @@
 # Volker's idea: provide several default flows with different setting/setups
 # TODO TaskDocs
 
-
-@dataclass
-class CompleteDFTvsMLBenchmarkWorkflow(Maker):
-    """
-    Maker to calculate harmonic phonons with DFT, fit GAP and benchmark the results.
-
-    User has no data.
-
-    Parameters
-    ----------
-    name : str
-        Name of the flows produced by this maker
-    n_struct: int.
-        The total number of randomly displaced structures to be generated.
-    displacements: List[float]
-        displacement distance for phonons
-    symprec : float
-        Symmetry precision to use in the
-        reduction of symmetry to find the primitive/conventional cell
-        (use_primitive_standard_structure, use_conventional_standard_structure)
-        and to handle all symmetry-related tasks in phonopy
-    uc: bool.
-        If True, will generate randomly distorted structures (unitcells)
-        and add static computation jobs to the flow
-
-    """
-
-    name: str = "complete_workflow"
-    n_struct: int = 1
-    displacements: list[float] = field(default_factory=lambda: [0.01])
-    min_length: int = 20
-    symprec: float = 1e-4
-    uc: bool = False  # to get rattled unit cells
-    supercell_matrix: Matrix3D | None = None
-
-    def make(
-        self,
-        structure_list: list[Structure],
-        mp_ids,
-        phonon_displacement_maker,
-        benchmark_structure: Structure,  # structures
-        mp_id,  # benchmark_mp_ids
-        **fit_kwargs,
-    ):
-        """
-        Make the complete workflow for DFT vs. ML benchmarking.
-
-        Parameters
-        ----------
-        structure_list: List[Structure]
-            list of pymatgen structures
-        mp_ids : list.
-            list of materials project ids
-        phonon_displacement_maker : .BaseVaspMaker
-            Maker used to compute the forces for a supercell.
-        benchmark_structure: Structure.
-            Structure used for benchmarking.
-        mp_id: str
-            materials project ID corresponding to the benchmark structure
-        """
-        flows = []
-        datagen = {}
-        collect = []
-        isoatoms = get_iso_atom(structure_list)
-        flows.append(isoatoms)
-
-        for struc_i, structure in enumerate(structure_list):
-            autoplex_datagen = DFTDataGenerationFlow(
-                name="datagen",
-                phonon_displacement_maker=phonon_displacement_maker,
-                n_struct=self.n_struct,
-                displacements=self.displacements,
-                min_length=self.min_length,
-                symprec=self.symprec,
-                uc=self.uc,
-                supercell_matrix=self.supercell_matrix,
-            ).make(structure=structure, mp_id=mp_ids[struc_i])
-            flows.append(autoplex_datagen)
-            datagen.update({mp_ids[struc_i]: autoplex_datagen.output})
-
-        autoplex_fit = PhononDFTMLFitFlow().make(
-            species=isoatoms.output["species"],
-            isolated_atoms_energy=isoatoms.output["energies"],
-            fit_input=datagen,
-            **fit_kwargs,
-        )
-        flows.append(autoplex_fit)
-
-        autoplex_ml_phonon = get_phonon_ml_calculation_jobs(
-            structure=benchmark_structure,
-            min_length=self.min_length,
-            ml_dir=autoplex_fit.output,
-        )
-        flows.append(autoplex_ml_phonon)
-        if mp_id not in mp_ids:
-            dft_phonons = DFTPhononMaker(
-                symprec=self.symprec,
-                phonon_displacement_maker=phonon_displacement_maker,
-                born_maker=None,
-                min_length=self.min_length,
-            ).make(structure=benchmark_structure)
-            dft_phonons = update_user_incar_settings(
-                dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
-            )
-            flows.append(dft_phonons)
-
-            dft_reference = dft_phonons.output
-        else:
-            dft_reference = datagen[mp_id]["phonon_data"][
-                "001"
-            ]  # flag take all phonon runs
-            # explanation for 001 = 0.01
-
-        autoplex_bm = PhononDFTMLBenchmarkFlow(name="testBM").make(
-            structure=benchmark_structure,
-            mp_id=mp_id,
-            ml_phonon_task_doc=autoplex_ml_phonon.output,
-            dft_phonon_task_doc=dft_reference,
-        )
-        flows.append(autoplex_bm)
-        collect.append(autoplex_bm.output)
-
-        collect_bm = write_benchmark_metrics(
-            benchmark_structure=benchmark_structure,
-            mp_id=mp_id,
-            rmse=collect,
-            displacements=self.displacements,
-        )
-        flows.append(collect_bm)
-
-        return Flow(flows)
+#
+# @dataclass
+# class CompleteDFTvsMLBenchmarkWorkflow(Maker):
+#     """
+#     Maker to calculate harmonic phonons with DFT, fit GAP and benchmark the results.
+#
+#     User has no data.
+#
+#     Parameters
+#     ----------
+#     name : str
+#         Name of the flows produced by this maker
+#     n_struct: int.
+#         The total number of randomly displaced structures to be generated.
+#     displacements: List[float]
+#         displacement distance for phonons
+#     symprec : float
+#         Symmetry precision to use in the
+#         reduction of symmetry to find the primitive/conventional cell
+#         (use_primitive_standard_structure, use_conventional_standard_structure)
+#         and to handle all symmetry-related tasks in phonopy
+#     uc: bool.
+#         If True, will generate randomly distorted structures (unitcells)
+#         and add static computation jobs to the flow
+#
+#     """
+#
+#     name: str = "complete_workflow"
+#     n_struct: int = 1
+#     displacements: list[float] = field(default_factory=lambda: [0.01])
+#     min_length: int = 20
+#     symprec: float = 1e-4
+#     uc: bool = False  # to get rattled unit cells
+#     supercell_matrix: Matrix3D | None = None
+#
+#     def make(
+#         self,
+#         structure_list: list[Structure],
+#         mp_ids,
+#         phonon_displacement_maker,
+#         benchmark_structure: Structure,  # structures
+#         mp_id,  # benchmark_mp_ids
+#         **fit_kwargs,
+#     ):
+#         """
+#         Make the complete workflow for DFT vs. ML benchmarking.
+#
+#         Parameters
+#         ----------
+#         structure_list: List[Structure]
+#             list of pymatgen structures
+#         mp_ids : list.
+#             list of materials project ids
+#         phonon_displacement_maker : .BaseVaspMaker
+#             Maker used to compute the forces for a supercell.
+#         benchmark_structure: Structure.
+#             Structure used for benchmarking.
+#         mp_id: str
+#             materials project ID corresponding to the benchmark structure
+#         """
+#         flows = []
+#         datagen = {}
+#         collect = []
+#         isoatoms = get_iso_atom(structure_list)
+#         flows.append(isoatoms)
+#
+#         for struc_i, structure in enumerate(structure_list):
+#             autoplex_datagen = DFTDataGenerationFlow(
+#                 name="datagen",
+#                 phonon_displacement_maker=phonon_displacement_maker,
+#                 n_struct=self.n_struct,
+#                 displacements=self.displacements,
+#                 min_length=self.min_length,
+#                 symprec=self.symprec,
+#                 uc=self.uc,
+#                 supercell_matrix=self.supercell_matrix,
+#             ).make(structure=structure, mp_id=mp_ids[struc_i])
+#             flows.append(autoplex_datagen)
+#             datagen.update({mp_ids[struc_i]: autoplex_datagen.output})
+#
+#         autoplex_fit = PhononDFTMLFitFlow().make(
+#             species=isoatoms.output["species"],
+#             isolated_atoms_energy=isoatoms.output["energies"],
+#             fit_input=datagen,
+#             **fit_kwargs,
+#         )
+#         flows.append(autoplex_fit)
+#
+#         autoplex_ml_phonon = get_phonon_ml_calculation_jobs(
+#             structure=benchmark_structure,
+#             min_length=self.min_length,
+#             ml_dir=autoplex_fit.output,
+#         )
+#         flows.append(autoplex_ml_phonon)
+#         if mp_id not in mp_ids:
+#             dft_phonons = DFTPhononMaker(
+#                 symprec=self.symprec,
+#                 phonon_displacement_maker=phonon_displacement_maker,
+#                 born_maker=None,
+#                 min_length=self.min_length,
+#             ).make(structure=benchmark_structure)
+#             dft_phonons = update_user_incar_settings(
+#                 dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
+#             )
+#             flows.append(dft_phonons)
+#
+#             dft_reference = dft_phonons.output
+#         else:
+#             dft_reference = datagen[mp_id]["phonon_data"][
+#                 "001"
+#             ]  # flag take all phonon runs
+#             # explanation for 001 = 0.01
+#
+#         autoplex_bm = PhononDFTMLBenchmarkFlow(name="testBM").make(
+#             structure=benchmark_structure,
+#             mp_id=mp_id,
+#             ml_phonon_task_doc=autoplex_ml_phonon.output,
+#             dft_phonon_task_doc=dft_reference,
+#         )
+#         flows.append(autoplex_bm)
+#         collect.append(autoplex_bm.output)
+#
+#         collect_bm = write_benchmark_metrics(
+#             benchmark_structure=benchmark_structure,
+#             mp_id=mp_id,
+#             rmse=collect,
+#             displacements=self.displacements,
+#         )
+#         flows.append(collect_bm)
+#
+#         return Flow(flows)
 
 
 @dataclass
-class AddDataToDataset(
+class CompleteDFTvsMLBenchmarkWorkflow(
     Maker
 ):  # merge with complete wf and set another flag for adding data
     """
@@ -210,10 +210,10 @@ def make(
         self,
         structure_list: list[Structure],
         mp_ids,
-        xyz_file,
-        dft_reference: PhononBSDOSDoc | None,
-        benchmark_structure: Structure,
-        mp_id,
+        xyz_file: str | None = None,
+        dft_reference: PhononBSDOSDoc | None = None,
+        benchmark_structure: Structure | None = None,
+        mp_id: str | None = None,
         **fit_kwargs,
     ):
         """
@@ -239,8 +239,8 @@ def make(
         fit_input = {}
         collect = []
 
-        if xyz_file is None:
-            raise Exception("Error. Please provide an existing xyz file.")
+        # if xyz_file is None:
+        #    raise Exception("Error. Please provide an existing xyz file.")
 
         for i, structure in enumerate(structure_list):
             if self.add_dft_random_struct:
@@ -527,7 +527,7 @@ class PhononDFTMLBenchmarkFlow(Maker):
     def make(
         self,
         structure: Structure,
-        mp_id: str,
+        mp_id,
         ml_phonon_task_doc: PhononBSDOSDoc,
         dft_phonon_task_doc: PhononBSDOSDoc,
     ):