Skip to content

Commit

Permalink
Merge pull request #21 from JaGeo/combine_add_data_complete_flow
Browse files Browse the repository at this point in the history
Combine Complete Flow and add data flow
  • Loading branch information
JaGeo authored Jan 26, 2024
2 parents 7cea18a + 0ddfe6f commit 7bd29b9
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 168 deletions.
278 changes: 139 additions & 139 deletions autoplex/auto/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,141 +38,141 @@
# Volker's idea: provide several default flows with different setting/setups
# TODO TaskDocs


@dataclass
class CompleteDFTvsMLBenchmarkWorkflow(Maker):
"""
Maker to calculate harmonic phonons with DFT, fit GAP and benchmark the results.
User has no data.
Parameters
----------
name : str
Name of the flows produced by this maker
n_struct: int.
The total number of randomly displaced structures to be generated.
displacements: List[float]
displacement distance for phonons
symprec : float
Symmetry precision to use in the
reduction of symmetry to find the primitive/conventional cell
(use_primitive_standard_structure, use_conventional_standard_structure)
and to handle all symmetry-related tasks in phonopy
uc: bool.
If True, will generate randomly distorted structures (unitcells)
and add static computation jobs to the flow
"""

name: str = "complete_workflow"
n_struct: int = 1
displacements: list[float] = field(default_factory=lambda: [0.01])
min_length: int = 20
symprec: float = 1e-4
uc: bool = False # to get rattled unit cells
supercell_matrix: Matrix3D | None = None

def make(
self,
structure_list: list[Structure],
mp_ids,
phonon_displacement_maker,
benchmark_structure: Structure, # structures
mp_id, # benchmark_mp_ids
**fit_kwargs,
):
"""
Make the complete workflow for DFT vs. ML benchmarking.
Parameters
----------
structure_list: List[Structure]
list of pymatgen structures
mp_ids : list.
list of materials project ids
phonon_displacement_maker : .BaseVaspMaker
Maker used to compute the forces for a supercell.
benchmark_structure: Structure.
Structure used for benchmarking.
mp_id: str
materials project ID corresponding to the benchmark structure
"""
flows = []
datagen = {}
collect = []
isoatoms = get_iso_atom(structure_list)
flows.append(isoatoms)

for struc_i, structure in enumerate(structure_list):
autoplex_datagen = DFTDataGenerationFlow(
name="datagen",
phonon_displacement_maker=phonon_displacement_maker,
n_struct=self.n_struct,
displacements=self.displacements,
min_length=self.min_length,
symprec=self.symprec,
uc=self.uc,
supercell_matrix=self.supercell_matrix,
).make(structure=structure, mp_id=mp_ids[struc_i])
flows.append(autoplex_datagen)
datagen.update({mp_ids[struc_i]: autoplex_datagen.output})

autoplex_fit = PhononDFTMLFitFlow().make(
species=isoatoms.output["species"],
isolated_atoms_energy=isoatoms.output["energies"],
fit_input=datagen,
**fit_kwargs,
)
flows.append(autoplex_fit)

autoplex_ml_phonon = get_phonon_ml_calculation_jobs(
structure=benchmark_structure,
min_length=self.min_length,
ml_dir=autoplex_fit.output,
)
flows.append(autoplex_ml_phonon)
if mp_id not in mp_ids:
dft_phonons = DFTPhononMaker(
symprec=self.symprec,
phonon_displacement_maker=phonon_displacement_maker,
born_maker=None,
min_length=self.min_length,
).make(structure=benchmark_structure)
dft_phonons = update_user_incar_settings(
dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
)
flows.append(dft_phonons)

dft_reference = dft_phonons.output
else:
dft_reference = datagen[mp_id]["phonon_data"][
"001"
] # flag take all phonon runs
# explanation for 001 = 0.01

autoplex_bm = PhononDFTMLBenchmarkFlow(name="testBM").make(
structure=benchmark_structure,
mp_id=mp_id,
ml_phonon_task_doc=autoplex_ml_phonon.output,
dft_phonon_task_doc=dft_reference,
)
flows.append(autoplex_bm)
collect.append(autoplex_bm.output)

collect_bm = write_benchmark_metrics(
benchmark_structure=benchmark_structure,
mp_id=mp_id,
rmse=collect,
displacements=self.displacements,
)
flows.append(collect_bm)

return Flow(flows)
#
# @dataclass
# class CompleteDFTvsMLBenchmarkWorkflow(Maker):
# """
# Maker to calculate harmonic phonons with DFT, fit GAP and benchmark the results.
#
# User has no data.
#
# Parameters
# ----------
# name : str
# Name of the flows produced by this maker
# n_struct: int.
# The total number of randomly displaced structures to be generated.
# displacements: List[float]
# displacement distance for phonons
# symprec : float
# Symmetry precision to use in the
# reduction of symmetry to find the primitive/conventional cell
# (use_primitive_standard_structure, use_conventional_standard_structure)
# and to handle all symmetry-related tasks in phonopy
# uc: bool.
# If True, will generate randomly distorted structures (unitcells)
# and add static computation jobs to the flow
#
# """
#
# name: str = "complete_workflow"
# n_struct: int = 1
# displacements: list[float] = field(default_factory=lambda: [0.01])
# min_length: int = 20
# symprec: float = 1e-4
# uc: bool = False # to get rattled unit cells
# supercell_matrix: Matrix3D | None = None
#
# def make(
# self,
# structure_list: list[Structure],
# mp_ids,
# phonon_displacement_maker,
# benchmark_structure: Structure, # structures
# mp_id, # benchmark_mp_ids
# **fit_kwargs,
# ):
# """
# Make the complete workflow for DFT vs. ML benchmarking.
#
# Parameters
# ----------
# structure_list: List[Structure]
# list of pymatgen structures
# mp_ids : list.
# list of materials project ids
# phonon_displacement_maker : .BaseVaspMaker
# Maker used to compute the forces for a supercell.
# benchmark_structure: Structure.
# Structure used for benchmarking.
# mp_id: str
# materials project ID corresponding to the benchmark structure
# """
# flows = []
# datagen = {}
# collect = []
# isoatoms = get_iso_atom(structure_list)
# flows.append(isoatoms)
#
# for struc_i, structure in enumerate(structure_list):
# autoplex_datagen = DFTDataGenerationFlow(
# name="datagen",
# phonon_displacement_maker=phonon_displacement_maker,
# n_struct=self.n_struct,
# displacements=self.displacements,
# min_length=self.min_length,
# symprec=self.symprec,
# uc=self.uc,
# supercell_matrix=self.supercell_matrix,
# ).make(structure=structure, mp_id=mp_ids[struc_i])
# flows.append(autoplex_datagen)
# datagen.update({mp_ids[struc_i]: autoplex_datagen.output})
#
# autoplex_fit = PhononDFTMLFitFlow().make(
# species=isoatoms.output["species"],
# isolated_atoms_energy=isoatoms.output["energies"],
# fit_input=datagen,
# **fit_kwargs,
# )
# flows.append(autoplex_fit)
#
# autoplex_ml_phonon = get_phonon_ml_calculation_jobs(
# structure=benchmark_structure,
# min_length=self.min_length,
# ml_dir=autoplex_fit.output,
# )
# flows.append(autoplex_ml_phonon)
# if mp_id not in mp_ids:
# dft_phonons = DFTPhononMaker(
# symprec=self.symprec,
# phonon_displacement_maker=phonon_displacement_maker,
# born_maker=None,
# min_length=self.min_length,
# ).make(structure=benchmark_structure)
# dft_phonons = update_user_incar_settings(
# dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
# )
# flows.append(dft_phonons)
#
# dft_reference = dft_phonons.output
# else:
# dft_reference = datagen[mp_id]["phonon_data"][
# "001"
# ] # flag take all phonon runs
# # explanation for 001 = 0.01
#
# autoplex_bm = PhononDFTMLBenchmarkFlow(name="testBM").make(
# structure=benchmark_structure,
# mp_id=mp_id,
# ml_phonon_task_doc=autoplex_ml_phonon.output,
# dft_phonon_task_doc=dft_reference,
# )
# flows.append(autoplex_bm)
# collect.append(autoplex_bm.output)
#
# collect_bm = write_benchmark_metrics(
# benchmark_structure=benchmark_structure,
# mp_id=mp_id,
# rmse=collect,
# displacements=self.displacements,
# )
# flows.append(collect_bm)
#
# return Flow(flows)


@dataclass
class AddDataToDataset(
class CompleteDFTvsMLBenchmarkWorkflow(
Maker
): # merge with complete wf and set another flag for adding data
"""
Expand Down Expand Up @@ -210,10 +210,10 @@ def make(
self,
structure_list: list[Structure],
mp_ids,
xyz_file,
dft_reference: PhononBSDOSDoc | None,
benchmark_structure: Structure,
mp_id,
xyz_file: str | None = None,
dft_reference: PhononBSDOSDoc | None = None,
benchmark_structure: Structure | None = None,
mp_id: str | None = None,
**fit_kwargs,
):
"""
Expand All @@ -239,8 +239,8 @@ def make(
fit_input = {}
collect = []

if xyz_file is None:
raise Exception("Error. Please provide an existing xyz file.")
# if xyz_file is None:
# raise Exception("Error. Please provide an existing xyz file.")

for i, structure in enumerate(structure_list):
if self.add_dft_random_struct:
Expand Down Expand Up @@ -527,7 +527,7 @@ class PhononDFTMLBenchmarkFlow(Maker):
def make(
self,
structure: Structure,
mp_id: str,
mp_id,
ml_phonon_task_doc: PhononBSDOSDoc,
dft_phonon_task_doc: PhononBSDOSDoc,
):
Expand Down
Loading

0 comments on commit 7bd29b9

Please sign in to comment.