diff --git a/__pycache__/generic.cpython-310.pyc b/__pycache__/generic.cpython-310.pyc deleted file mode 100644 index bac7896..0000000 Binary files a/__pycache__/generic.cpython-310.pyc and /dev/null differ diff --git a/__pycache__/parallel.cpython-310.pyc b/__pycache__/parallel.cpython-310.pyc deleted file mode 100644 index 06e27b5..0000000 Binary files a/__pycache__/parallel.cpython-310.pyc and /dev/null differ diff --git a/__pycache__/vasp.cpython-310.pyc b/__pycache__/vasp.cpython-310.pyc deleted file mode 100644 index 3ff20f0..0000000 Binary files a/__pycache__/vasp.cpython-310.pyc and /dev/null differ diff --git a/actual_usage/VaspReconvergeExample.py b/actual_usage/VaspReconvergeExample.py index a2b53ff..85511c3 100644 --- a/actual_usage/VaspReconvergeExample.py +++ b/actual_usage/VaspReconvergeExample.py @@ -1,4 +1,4 @@ -from utils.vasp.vasp_resubmitter import CalculationConverger +from vasp.resubmitter import CalculationConverger import os vasp_resubmitter = CalculationConverger(parent_dir=os.getcwd(), diff --git a/actual_usage/build_vasp_database.py b/actual_usage/build_vasp_database.py index 67e43ed..20db052 100644 --- a/actual_usage/build_vasp_database.py +++ b/actual_usage/build_vasp_database.py @@ -1,4 +1,4 @@ -from utils.vasp.vasp import DatabaseGenerator +from vasp.database import DatabaseGenerator import argparse import warnings from multiprocessing import cpu_count diff --git a/actual_usage/update_vasp_db.py b/actual_usage/update_vasp_db.py index 1b7aaea..e3c55a2 100644 --- a/actual_usage/update_vasp_db.py +++ b/actual_usage/update_vasp_db.py @@ -1,4 +1,4 @@ -from utils.vasp.vasp import DatabaseGenerator +from vasp.database import DatabaseGenerator import argparse import warnings from multiprocessing import cpu_count diff --git a/GNN_calculators/mace.py b/utils/GNN_calculators/mace.py similarity index 100% rename from GNN_calculators/mace.py rename to utils/GNN_calculators/mace.py diff --git a/StructureManipulator/cleave.py b/utils/StructureManipulator/cleave.py similarity index 100% rename from StructureManipulator/cleave.py rename to utils/StructureManipulator/cleave.py diff --git a/StructureManipulator/interstitial.py b/utils/StructureManipulator/interstitial.py similarity index 100% rename from StructureManipulator/interstitial.py rename to utils/StructureManipulator/interstitial.py diff --git a/WIP_notebooks/pyiron.log b/utils/WIP_notebooks/pyiron.log similarity index 100% rename from WIP_notebooks/pyiron.log rename to utils/WIP_notebooks/pyiron.log diff --git a/WIP_notebooks/test_kpoints.ipynb b/utils/WIP_notebooks/test_kpoints.ipynb similarity index 100% rename from WIP_notebooks/test_kpoints.ipynb rename to utils/WIP_notebooks/test_kpoints.ipynb diff --git a/WIP_notebooks/test_utils.ipynb b/utils/WIP_notebooks/test_utils.ipynb similarity index 100% rename from WIP_notebooks/test_utils.ipynb rename to utils/WIP_notebooks/test_utils.ipynb diff --git a/WIP_notebooks/test_vasp_convergence_check.ipynb b/utils/WIP_notebooks/test_vasp_convergence_check.ipynb similarity index 100% rename from WIP_notebooks/test_vasp_convergence_check.ipynb rename to utils/WIP_notebooks/test_vasp_convergence_check.ipynb diff --git a/ace_descriptor_utils.py b/utils/ace_descriptor_utils.py similarity index 100% rename from ace_descriptor_utils.py rename to utils/ace_descriptor_utils.py diff --git a/utils/actual_usage/VaspReconvergeExample.py b/utils/actual_usage/VaspReconvergeExample.py new file mode 100644 index 0000000..a2b53ff --- /dev/null +++ b/utils/actual_usage/VaspReconvergeExample.py @@ -0,0 +1,10 @@ +from utils.vasp.vasp_resubmitter import CalculationConverger +import os + +vasp_resubmitter = CalculationConverger(parent_dir=os.getcwd(), + script_template_dir="/home/hmai/CustodianJobfiles", + max_submissions = 1000, + submission_command = "sbatch", + username="hmai") + +vasp_resubmitter.reconverge_all() \ No newline at end of file diff --git a/actual_usage/build_and_show_db b/utils/actual_usage/build_and_show_db similarity index 100% rename from actual_usage/build_and_show_db rename to utils/actual_usage/build_and_show_db diff --git a/utils/actual_usage/build_vasp_database.py b/utils/actual_usage/build_vasp_database.py new file mode 100644 index 0000000..67e43ed --- /dev/null +++ b/utils/actual_usage/build_vasp_database.py @@ -0,0 +1,42 @@ +from utils.vasp.vasp import DatabaseGenerator +import argparse +import warnings +from multiprocessing import cpu_count + +def main(): + warnings.filterwarnings("ignore") + + # Initialize argument parser + parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.') + parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on') + parser.add_argument('--extract', action='store_true', help='Extract directories during database generation') + parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation') + parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory') + parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors') + args = parser.parse_args() + + datagen = DatabaseGenerator(args.directory, + max_workers=cpu_count()) + + # Check if max_dir_count is provided as an argument + if args.max_dir_count is not None: + max_dir_count = args.max_dir_count + else: + max_dir_count = 2000 # Default value + + # Call the build_database function with the updated parameters + df = datagen.build_database(extract_directories=args.extract, + read_multiple_runs_in_dir=args.read_all_runs_in_dir, + read_error_dirs=args.read_error_runs_in_dir, + max_dir_count=max_dir_count, + tarball_extensions=(".tar.gz", ".tar.bz2"), + cleanup=False, + keep_filenames_after_cleanup=[], + keep_filename_patterns_after_cleanup=[], + filenames_to_qualify=["OUTCAR", "vasprun.xml"], + all_present=True, + df_filename=None, + df_compression=True) + +if __name__ == '__main__': + main() diff --git a/actual_usage/check_jobdir b/utils/actual_usage/check_jobdir similarity index 100% rename from actual_usage/check_jobdir rename to utils/actual_usage/check_jobdir diff --git a/actual_usage/compress_here b/utils/actual_usage/compress_here similarity index 84% rename from actual_usage/compress_here rename to utils/actual_usage/compress_here index edecfd8..0ba1edc 100755 --- a/actual_usage/compress_here +++ b/utils/actual_usage/compress_here @@ -11,6 +11,6 @@ folder_path="$1" path_to_utils="/scratch/pawsey0380/hmai/utils/actual_usage/" # Garching path_to_utils="/cmmc/u/hmai/personal_dev/utils/actual_usage" - +path_to_utils="/root/personal_python_utilities/utils/actual_usage" # Run the first Python script and append the output to py.output python "$path_to_utils/compression.py" "$folder_path" diff --git a/actual_usage/compression.py b/utils/actual_usage/compression.py similarity index 100% rename from actual_usage/compression.py rename to utils/actual_usage/compression.py diff --git a/actual_usage/develop_chargemol_class.ipynb b/utils/actual_usage/develop_chargemol_class.ipynb similarity index 100% rename from actual_usage/develop_chargemol_class.ipynb rename to utils/actual_usage/develop_chargemol_class.ipynb diff --git a/actual_usage/develop_new_vasp_db_generator.ipynb b/utils/actual_usage/develop_new_vasp_db_generator.ipynb similarity index 100% rename from actual_usage/develop_new_vasp_db_generator.ipynb rename to utils/actual_usage/develop_new_vasp_db_generator.ipynb diff --git a/actual_usage/develop_parse_DDEC6.ipynb b/utils/actual_usage/develop_parse_DDEC6.ipynb similarity index 100% rename from actual_usage/develop_parse_DDEC6.ipynb rename to utils/actual_usage/develop_parse_DDEC6.ipynb diff --git a/actual_usage/memory_check b/utils/actual_usage/memory_check similarity index 100% rename from actual_usage/memory_check rename to utils/actual_usage/memory_check diff --git a/actual_usage/qstat_slurm b/utils/actual_usage/qstat_slurm similarity index 100% rename from actual_usage/qstat_slurm rename to utils/actual_usage/qstat_slurm diff --git a/actual_usage/setonix_refresh_mamba b/utils/actual_usage/setonix_refresh_mamba similarity index 100% rename from actual_usage/setonix_refresh_mamba rename to utils/actual_usage/setonix_refresh_mamba diff --git a/actual_usage/slurm_list_jobdir b/utils/actual_usage/slurm_list_jobdir similarity index 100% rename from actual_usage/slurm_list_jobdir rename to utils/actual_usage/slurm_list_jobdir diff --git a/actual_usage/summarise_db b/utils/actual_usage/summarise_db similarity index 100% rename from actual_usage/summarise_db rename to utils/actual_usage/summarise_db diff --git a/actual_usage/summarise_vasp_database.py b/utils/actual_usage/summarise_vasp_database.py similarity index 100% rename from actual_usage/summarise_vasp_database.py rename to utils/actual_usage/summarise_vasp_database.py diff --git a/utils/actual_usage/update_0.pkl.gz b/utils/actual_usage/update_0.pkl.gz new file mode 100644 index 0000000..c5a2076 Binary files /dev/null and b/utils/actual_usage/update_0.pkl.gz differ diff --git a/utils/actual_usage/update_database.ipynb b/utils/actual_usage/update_database.ipynb new file mode 100644 index 0000000..629ef83 --- /dev/null +++ b/utils/actual_usage/update_database.ipynb @@ -0,0 +1,307 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/personal_python_utilities/utils/vasp/parser/outcar.py:227: SyntaxWarning: invalid escape sequence '\\A'\n", + " \"\"\"\n", + "/root/personal_python_utilities/utils/vasp/parser/outcar.py:257: SyntaxWarning: invalid escape sequence '\\A'\n", + " \"\"\"\n", + "/root/personal_python_utilities/utils/vasp/parser/outcar.py:284: SyntaxWarning: invalid escape sequence '\\A'\n", + " \"\"\"\n", + "/root/personal_python_utilities/utils/vasp/parser/outcar.py:312: SyntaxWarning: invalid escape sequence '\\A'\n", + " \"\"\"\n", + "/root/personal_python_utilities/utils/vasp/parser/outcar.py:1077: SyntaxWarning: invalid escape sequence '\\A'\n", + " \"\"\"\n", + "/root/personal_python_utilities/utils/vasp/parser/outcar.py:1118: SyntaxWarning: invalid escape sequence '\\A'\n", + " \"\"\"\n" + ] + } + ], + "source": [ + "from utils.vasp.vasp import DatabaseGenerator\n", + "import argparse\n", + "import warnings\n", + "from multiprocessing import cpu_count\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The total number of vasp directories that we are building the database out of is 90\n", + "# Processes: 90, Processors available: 16, CPUs used: 16\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''\n", + "Step 0: 47.602 seconds taken for 90 parse steps\n", + "Elapsed time: 74.146 seconds\n" + ] + } + ], + "source": [ + "directory = \"/root/Fe_Ptable_Project/128_Fe_Bulk_data/Fe-Ptable-Data/Bulk\"\n", + "datagen = DatabaseGenerator(directory,\n", + " max_workers=cpu_count())\n", + "# Call the build_database function with the updated parameters\n", + "df = datagen.build_database(extract_directories=False,\n", + " read_multiple_runs_in_dir=False,\n", + " read_error_dirs=False,\n", + " max_dir_count=1000,\n", + " tarball_extensions=(\".tar.gz\", \".tar.bz2\"),\n", + " cleanup=False,\n", + " keep_filenames_after_cleanup=[],\n", + " keep_filename_patterns_after_cleanup=[],\n", + " filenames_to_qualify=[\"OUTCAR\"],\n", + " df_filename=None,\n", + " df_compression=True,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_pickle(os.path.join(os.getcwd(), f\"vasp_database.pkl.gz\"), compression=\"gzip\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import glob\n", + "import time\n", + "\n", + "from pymatgen.core import Structure\n", + "from pymatgen.io.vasp import Vasprun, Kpoints, Incar\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import utils.generic as gen_tools\n", + "from utils.parallel import parallelise\n", + "from utils.vasp.parser.outcar import Outcar\n", + "from utils.vasp.vasp_database import parse_vasp_directory\n", + "\n", + "def update_failed_jobs_in_database(parent_dir, df_path=None, read_error_dirs=False, read_multiple_runs_in_dir=False, max_dir_count=None, df_compression=True, max_workers=16):\n", + " compression_option = 'gzip' if df_compression else None\n", + " compression_extension = '.gz' if df_compression else ''\n", + " \n", + " if df_path is None:\n", + " df_path = os.path.join(parent_dir, f\"vasp_database.pkl{compression_extension}\")\n", + " \n", + " if os.path.isdir(df_path):\n", + " potential_files = [\n", + " os.path.join(df_path, \"vasp_database.pkl.gz\"),\n", + " os.path.join(df_path, \"vasp_database.pkl\")\n", + " ]\n", + " output_path = os.path.join(df_path, f\"vasp_database.pkl{compression_extension}\")\n", + " else:\n", + " potential_files = [df_path]\n", + " output_path = df_path\n", + " \n", + " df = None\n", + " for file in potential_files:\n", + " try:\n", + " if file.endswith(\".gz\"):\n", + " df = pd.read_pickle(file, compression='gzip')\n", + " else:\n", + " df = pd.read_pickle(file, compression=None)\n", + " print(f\"Successfully read database from {file}\")\n", + " break\n", + " except (FileNotFoundError, pd.errors.UnrecognizedCompressionError):\n", + " print(f\"Failed to read database from {file}\")\n", + "\n", + " if df is None:\n", + " raise ValueError(\"Invalid path or filename - please check! Attempted paths: \" + \", \".join(potential_files))\n", + " \n", + " failed_dirs = df[df['convergence'] == False]['filepath'].tolist()\n", + " print(f\"Reparsing {len(failed_dirs)} directories where convergence is False\")\n", + "\n", + " if max_dir_count:\n", + " pkl_filenames = []\n", + " for i, chunks in enumerate(gen_tools.chunk_list(failed_dirs, max_dir_count)):\n", + " step_time = time.time()\n", + " failed_df = pd.concat(parallelise(parse_vasp_directory, \n", + " [(chunk,) for chunk in chunks],\n", + " max_workers=max_workers,\n", + " extract_error_dirs=read_error_dirs, \n", + " parse_all_in_dir=read_multiple_runs_in_dir))\n", + " db_filename = f\"update_{i}.pkl{compression_extension}\"\n", + " pkl_filenames.append(os.path.join(parent_dir, db_filename))\n", + " failed_df.to_pickle(os.path.join(parent_dir, db_filename), compression=compression_option)\n", + " step_taken_time = np.round(time.time() - step_time, 3)\n", + " print(f\"Step {i}: {step_taken_time} seconds taken for {len(chunks)} parse steps\")\n", + " \n", + " failed_df = pd.concat([pd.read_pickle(partial_df, compression=compression_option) for partial_df in pkl_filenames])\n", + " else:\n", + " failed_df = pd.concat(parallelise(parse_vasp_directory, \n", + " [(chunk,) for chunk in failed_dirs],\n", + " max_workers=max_workers,\n", + " extract_error_dirs=read_error_dirs, \n", + " parse_all_in_dir=read_multiple_runs_in_dir))\n", + "\n", + " # # Ensure no duplicate labels in index before update\n", + " # if failed_df.index.duplicated().any():\n", + " # failed_df = failed_df[~failed_df.index.duplicated()]\n", + "\n", + " # if df.index.duplicated().any():\n", + " # df = df[~df.index.duplicated()]\n", + "\n", + " # # Use a different method to merge the DataFrames\n", + " # df.update(failed_df, overwrite=True, filter_func=lambda x: not pd.isna(x))\n", + "\n", + " # df.to_pickle(output_path, compression=compression_option)\n", + " # print(f\"Updated dataframe saved to {output_path}\")\n", + " return failed_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# Call the update_failed_jobs_in_database function with the updated parameters\n", + "df = datagen.update_failed_jobs_in_database(df_path=directory,\n", + " read_error_dirs=False,\n", + " read_multiple_runs_in_dir=False,\n", + " max_dir_count=max_dir_count,\n", + " df_compression=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pyiron_workflow", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/actual_usage/update_failed_jobs_db b/utils/actual_usage/update_failed_jobs_db similarity index 100% rename from actual_usage/update_failed_jobs_db rename to utils/actual_usage/update_failed_jobs_db diff --git a/utils/actual_usage/update_vasp_db.py b/utils/actual_usage/update_vasp_db.py new file mode 100644 index 0000000..1b7aaea --- /dev/null +++ b/utils/actual_usage/update_vasp_db.py @@ -0,0 +1,34 @@ +from utils.vasp.vasp import DatabaseGenerator +import argparse +import warnings +from multiprocessing import cpu_count + +def main(): + warnings.filterwarnings("ignore") + + # Initialize argument parser + parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.') + parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on') + parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation') + parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory') + parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors') + args = parser.parse_args() + + datagen = DatabaseGenerator(args.directory, + max_workers=cpu_count()) + + # Check if max_dir_count is provided as an argument + if args.max_dir_count is not None: + max_dir_count = args.max_dir_count + else: + max_dir_count = 2000 # Default value + + # Call the update_failed_jobs_in_database function with the updated parameters + df = datagen.update_failed_jobs_in_database(df_path=args.directory, + read_error_dirs=args.read_error_runs_in_dir, + read_multiple_runs_in_dir=args.read_all_runs_in_dir, + max_dir_count=max_dir_count, + df_compression=True) + +if __name__ == '__main__': + main() diff --git a/utils/actual_usage/vasp_database.pkl.gz b/utils/actual_usage/vasp_database.pkl.gz new file mode 100644 index 0000000..2b61a1c Binary files /dev/null and b/utils/actual_usage/vasp_database.pkl.gz differ diff --git a/analysis_functions.py b/utils/analysis_functions.py similarity index 100% rename from analysis_functions.py rename to utils/analysis_functions.py diff --git a/chargemol.py b/utils/chargemol.py similarity index 99% rename from chargemol.py rename to utils/chargemol.py index 2879f6b..5da8ba1 100644 --- a/chargemol.py +++ b/utils/chargemol.py @@ -213,6 +213,9 @@ def get_ANSBO_profile(self, axis=2, tolerance=0.1): def get_min_ANSBO(self, axis=2, tolerance=0.1): return min(get_ANSBO_all_cleavage_planes(self.struct, self.bond_matrix, axis=axis, tolerance=tolerance)) + + def analyse_ANSBO(self, axis=2, tolerance=0.1): + return analyse_ANSBO(self.directory, axis=axis, tolerance=tolerance) def analyse_ANSBO(directory, axis=2, tolerance=0.1): """ @@ -512,7 +515,7 @@ def get_ANSBO_all_cleavage_planes(structure, bond_matrix, axis = 2, tolerance = ANSBO_profile = [] for cp in cp_list: ANSBO_profile.append(get_ANSBO(structure, bond_matrix, cp)) - return ANSBO_profile + return cp_list, ANSBO_profile def plot_ANSBO_profile(structure, bond_matrix, diff --git a/custom_custodian_handlers.py b/utils/custom_custodian_handlers.py similarity index 100% rename from custom_custodian_handlers.py rename to utils/custom_custodian_handlers.py diff --git a/functions.py b/utils/functions.py similarity index 96% rename from functions.py rename to utils/functions.py index 112bb12..522e7fa 100644 --- a/functions.py +++ b/utils/functions.py @@ -7,7 +7,7 @@ from utils.jobfile import jobfile potcar_library_path = "/root/POTCAR_Library/GGA" -potcar_library_path = "/u/hmai/pyiron-resources-cmmc/vasp/potentials/potpaw_PBE" +#potcar_library_path = "/u/hmai/pyiron-resources-cmmc/vasp/potentials/potpaw_PBE" sites_to_study = {"S11-RA110-S3-32": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], "S3-RA110-S1-11": [20, 22, 24, 26, 28, 30, 32, 34, 36], diff --git a/generic.py b/utils/generic.py similarity index 100% rename from generic.py rename to utils/generic.py diff --git a/jobfile.py b/utils/jobfile.py similarity index 100% rename from jobfile.py rename to utils/jobfile.py diff --git a/jobscript_templates/CustodianScripts/BASE_Custodian_Setonix.sh b/utils/jobscript_templates/CustodianScripts/BASE_Custodian_Setonix.sh similarity index 100% rename from jobscript_templates/CustodianScripts/BASE_Custodian_Setonix.sh rename to utils/jobscript_templates/CustodianScripts/BASE_Custodian_Setonix.sh diff --git a/jobscript_templates/CustodianScripts/DRS_Custodian_Gadi.sh b/utils/jobscript_templates/CustodianScripts/DRS_Custodian_Gadi.sh similarity index 100% rename from jobscript_templates/CustodianScripts/DRS_Custodian_Gadi.sh rename to utils/jobscript_templates/CustodianScripts/DRS_Custodian_Gadi.sh diff --git a/jobscript_templates/CustodianScripts/DRS_Custodian_Gadi_GPU.sh b/utils/jobscript_templates/CustodianScripts/DRS_Custodian_Gadi_GPU.sh similarity index 100% rename from jobscript_templates/CustodianScripts/DRS_Custodian_Gadi_GPU.sh rename to utils/jobscript_templates/CustodianScripts/DRS_Custodian_Gadi_GPU.sh diff --git a/jobscript_templates/CustodianScripts/DRS_Custodian_Garching.sh b/utils/jobscript_templates/CustodianScripts/DRS_Custodian_Garching.sh similarity index 100% rename from jobscript_templates/CustodianScripts/DRS_Custodian_Garching.sh rename to utils/jobscript_templates/CustodianScripts/DRS_Custodian_Garching.sh diff --git a/jobscript_templates/CustodianScripts/DRS_Custodian_Setonix.sh b/utils/jobscript_templates/CustodianScripts/DRS_Custodian_Setonix.sh similarity index 100% rename from jobscript_templates/CustodianScripts/DRS_Custodian_Setonix.sh rename to utils/jobscript_templates/CustodianScripts/DRS_Custodian_Setonix.sh diff --git a/jobscript_templates/CustodianScripts/SDRS_Custodian_Garching.sh b/utils/jobscript_templates/CustodianScripts/SDRS_Custodian_Garching.sh similarity index 100% rename from jobscript_templates/CustodianScripts/SDRS_Custodian_Garching.sh rename to utils/jobscript_templates/CustodianScripts/SDRS_Custodian_Garching.sh diff --git a/jobscript_templates/CustodianScripts/SDRS_Custodian_Setonix.sh b/utils/jobscript_templates/CustodianScripts/SDRS_Custodian_Setonix.sh similarity index 100% rename from jobscript_templates/CustodianScripts/SDRS_Custodian_Setonix.sh rename to utils/jobscript_templates/CustodianScripts/SDRS_Custodian_Setonix.sh diff --git a/jobscript_templates/CustodianScripts/Static_Custodian_Garching.sh b/utils/jobscript_templates/CustodianScripts/Static_Custodian_Garching.sh similarity index 100% rename from jobscript_templates/CustodianScripts/Static_Custodian_Garching.sh rename to utils/jobscript_templates/CustodianScripts/Static_Custodian_Garching.sh diff --git a/jobscript_templates/CustodianScripts/template_BASE.py b/utils/jobscript_templates/CustodianScripts/template_BASE.py similarity index 100% rename from jobscript_templates/CustodianScripts/template_BASE.py rename to utils/jobscript_templates/CustodianScripts/template_BASE.py diff --git a/jobscript_templates/CustodianScripts/template_DRS.py b/utils/jobscript_templates/CustodianScripts/template_DRS.py similarity index 100% rename from jobscript_templates/CustodianScripts/template_DRS.py rename to utils/jobscript_templates/CustodianScripts/template_DRS.py diff --git a/jobscript_templates/CustodianScripts/template_SDRS.py b/utils/jobscript_templates/CustodianScripts/template_SDRS.py similarity index 100% rename from jobscript_templates/CustodianScripts/template_SDRS.py rename to utils/jobscript_templates/CustodianScripts/template_SDRS.py diff --git a/jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py b/utils/jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py similarity index 100% rename from jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py rename to utils/jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py diff --git a/jobscript_templates/CustodianScripts/template_Static.py b/utils/jobscript_templates/CustodianScripts/template_Static.py similarity index 100% rename from jobscript_templates/CustodianScripts/template_Static.py rename to utils/jobscript_templates/CustodianScripts/template_Static.py diff --git a/jobscript_templates/DRS_Custodian_Garching.sh b/utils/jobscript_templates/DRS_Custodian_Garching.sh similarity index 100% rename from jobscript_templates/DRS_Custodian_Garching.sh rename to utils/jobscript_templates/DRS_Custodian_Garching.sh diff --git a/jobscript_templates/DRS_Custodian_Setonix.sh b/utils/jobscript_templates/DRS_Custodian_Setonix.sh similarity index 100% rename from jobscript_templates/DRS_Custodian_Setonix.sh rename to utils/jobscript_templates/DRS_Custodian_Setonix.sh diff --git a/jobscript_templates/INCAR b/utils/jobscript_templates/INCAR similarity index 100% rename from jobscript_templates/INCAR rename to utils/jobscript_templates/INCAR diff --git a/jobscript_templates/SDRS_Custodian_Garching.sh b/utils/jobscript_templates/SDRS_Custodian_Garching.sh similarity index 100% rename from jobscript_templates/SDRS_Custodian_Garching.sh rename to utils/jobscript_templates/SDRS_Custodian_Garching.sh diff --git a/jobscript_templates/SDRS_Custodian_Setonix.sh b/utils/jobscript_templates/SDRS_Custodian_Setonix.sh similarity index 100% rename from jobscript_templates/SDRS_Custodian_Setonix.sh rename to utils/jobscript_templates/SDRS_Custodian_Setonix.sh diff --git a/jobscript_templates/cmti_large.sh b/utils/jobscript_templates/cmti_large.sh similarity index 100% rename from jobscript_templates/cmti_large.sh rename to utils/jobscript_templates/cmti_large.sh diff --git a/jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6 b/utils/jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6 rename to utils/jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6 diff --git a/jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6-GPU b/utils/jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6-GPU similarity index 100% rename from jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6-GPU rename to utils/jobscript_templates/jobfile-Gadi-DoubleRelaxation-DDEC6-GPU diff --git a/jobscript_templates/jobfile-Gadi-StaticImage-DDEC6 b/utils/jobscript_templates/jobfile-Gadi-StaticImage-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Gadi-StaticImage-DDEC6 rename to utils/jobscript_templates/jobfile-Gadi-StaticImage-DDEC6 diff --git a/jobscript_templates/jobfile-Garching-DoubleRelaxation-DDEC6 b/utils/jobscript_templates/jobfile-Garching-DoubleRelaxation-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Garching-DoubleRelaxation-DDEC6 rename to utils/jobscript_templates/jobfile-Garching-DoubleRelaxation-DDEC6 diff --git a/jobscript_templates/jobfile-Garching-Static-DoubleRel-Static-DDEC6 b/utils/jobscript_templates/jobfile-Garching-Static-DoubleRel-Static-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Garching-Static-DoubleRel-Static-DDEC6 rename to utils/jobscript_templates/jobfile-Garching-Static-DoubleRel-Static-DDEC6 diff --git a/jobscript_templates/jobfile-Garching-StaticImage-DDEC6 b/utils/jobscript_templates/jobfile-Garching-StaticImage-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Garching-StaticImage-DDEC6 rename to utils/jobscript_templates/jobfile-Garching-StaticImage-DDEC6 diff --git a/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6 b/utils/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6 rename to utils/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6 diff --git a/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KPOINTS b/utils/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KPOINTS similarity index 100% rename from jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KPOINTS rename to utils/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KPOINTS diff --git a/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KSPACING_005 b/utils/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KSPACING_005 similarity index 100% rename from jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KSPACING_005 rename to utils/jobscript_templates/jobfile-Setonix-DoubleRelaxation-DDEC6-KSPACING_005 diff --git a/jobscript_templates/jobfile-Setonix-Static-DoubleRel-Static-DDEC6 b/utils/jobscript_templates/jobfile-Setonix-Static-DoubleRel-Static-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Setonix-Static-DoubleRel-Static-DDEC6 rename to utils/jobscript_templates/jobfile-Setonix-Static-DoubleRel-Static-DDEC6 diff --git a/jobscript_templates/jobfile-Setonix-StaticImage-DDEC6 b/utils/jobscript_templates/jobfile-Setonix-StaticImage-DDEC6 similarity index 100% rename from jobscript_templates/jobfile-Setonix-StaticImage-DDEC6 rename to utils/jobscript_templates/jobfile-Setonix-StaticImage-DDEC6 diff --git a/jobscript_templates/jobfile-Setonix-base-custodian.sh b/utils/jobscript_templates/jobfile-Setonix-base-custodian.sh similarity index 65% rename from jobscript_templates/jobfile-Setonix-base-custodian.sh rename to utils/jobscript_templates/jobfile-Setonix-base-custodian.sh index 8c11957..a367cbf 100644 --- a/jobscript_templates/jobfile-Setonix-base-custodian.sh +++ b/utils/jobscript_templates/jobfile-Setonix-base-custodian.sh @@ -19,11 +19,25 @@ source /scratch/pawsey0380/hmai/mambaforge/bin/activate pymatgen echo 'import sys from custodian.custodian import Custodian -from custodian.vasp.handlers import VaspErrorHandler, UnconvergedErrorHandler, NonConvergingErrorHandler, PositiveEnergyErrorHandler from custodian.vasp.jobs import VaspJob - +from custodian.vasp.handlers import ( + VaspErrorHandler, + NonConvergingErrorHandler, + PositiveEnergyErrorHandler, + FrozenJobErrorHandler +) +from utils.custom_custodian_handlers import Han_CustomVaspErrorHandler + +output_filename = {VASPOUTPUTFILENAME} + +handlers = [ + VaspErrorHandler(output_filename=output_filename), + Han_CustomVaspErrorHandler(), + NonConvergingErrorHandler(), + PositiveEnergyErrorHandler(), + FrozenJobErrorHandler(output_filename=output_filename) +] output_filename = "vasp.log" -handlers = [VaspErrorHandler(output_filename=output_filename), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PositiveEnergyErrorHandler()] jobs = [VaspJob(sys.argv[1:], output_file=output_filename, suffix = "")] diff --git a/jobscript_templates/jobfile-conv-Gadi b/utils/jobscript_templates/jobfile-conv-Gadi similarity index 100% rename from jobscript_templates/jobfile-conv-Gadi rename to utils/jobscript_templates/jobfile-conv-Gadi diff --git a/jobscript_templates/jobfile-conv-Magnus b/utils/jobscript_templates/jobfile-conv-Magnus similarity index 100% rename from jobscript_templates/jobfile-conv-Magnus rename to utils/jobscript_templates/jobfile-conv-Magnus diff --git a/jobscript_templates/jobfile-conv-Setonix b/utils/jobscript_templates/jobfile-conv-Setonix similarity index 100% rename from jobscript_templates/jobfile-conv-Setonix rename to utils/jobscript_templates/jobfile-conv-Setonix diff --git a/jobscript_templates/jobfile_Garching b/utils/jobscript_templates/jobfile_Garching similarity index 100% rename from jobscript_templates/jobfile_Garching rename to utils/jobscript_templates/jobfile_Garching diff --git a/jobscript_templates/jobfile_Garching_vasp b/utils/jobscript_templates/jobfile_Garching_vasp similarity index 100% rename from jobscript_templates/jobfile_Garching_vasp rename to utils/jobscript_templates/jobfile_Garching_vasp diff --git a/parallel.py b/utils/parallel.py similarity index 100% rename from parallel.py rename to utils/parallel.py diff --git a/periodic_table.csv b/utils/periodic_table.csv similarity index 100% rename from periodic_table.csv rename to utils/periodic_table.csv diff --git a/periodic_table.py b/utils/periodic_table.py similarity index 100% rename from periodic_table.py rename to utils/periodic_table.py diff --git a/plotters/grid_plots.py b/utils/plotters/grid_plots.py similarity index 100% rename from plotters/grid_plots.py rename to utils/plotters/grid_plots.py diff --git a/plotters/structure_plots.py b/utils/plotters/structure_plots.py similarity index 100% rename from plotters/structure_plots.py rename to utils/plotters/structure_plots.py diff --git a/structure_featuriser.py b/utils/structure_featuriser.py similarity index 100% rename from structure_featuriser.py rename to utils/structure_featuriser.py diff --git a/training_data_nequip.py b/utils/training_data_nequip.py similarity index 100% rename from training_data_nequip.py rename to utils/training_data_nequip.py diff --git a/utils/vasp/__pycache__/vasp.cpython-312.pyc b/utils/vasp/__pycache__/vasp.cpython-312.pyc new file mode 100644 index 0000000..345271c Binary files /dev/null and b/utils/vasp/__pycache__/vasp.cpython-312.pyc differ diff --git a/utils/vasp/__pycache__/vasp_database.cpython-312.pyc b/utils/vasp/__pycache__/vasp_database.cpython-312.pyc new file mode 100644 index 0000000..0510a36 Binary files /dev/null and b/utils/vasp/__pycache__/vasp_database.cpython-312.pyc differ diff --git a/vasp/vasp.py b/utils/vasp/database.py similarity index 99% rename from vasp/vasp.py rename to utils/vasp/database.py index c532e39..ac725f3 100644 --- a/vasp/vasp.py +++ b/utils/vasp/database.py @@ -11,7 +11,7 @@ import utils.generic as gen_tools from utils.parallel import parallelise from utils.vasp.parser.outcar import Outcar -from utils.vasp.vasp_database import parse_vasp_directory +from vasp.parser.output import parse_vasp_directory def find_vasp_directories(parent_dir, filenames=["vasp.log", "INCAR", "POTCAR", "CONTCAR", "KPOINTS", "OUTCAR", "vasprun.xml"], diff --git a/vasp/vasp_job.py b/utils/vasp/job.py similarity index 100% rename from vasp/vasp_job.py rename to utils/vasp/job.py diff --git a/utils/vasp/parser/__pycache__/outcar.cpython-312.pyc b/utils/vasp/parser/__pycache__/outcar.cpython-312.pyc new file mode 100644 index 0000000..e8f97d7 Binary files /dev/null and b/utils/vasp/parser/__pycache__/outcar.cpython-312.pyc differ diff --git a/vasp/parser/outcar.py b/utils/vasp/parser/outcar.py similarity index 100% rename from vasp/parser/outcar.py rename to utils/vasp/parser/outcar.py diff --git a/vasp/vasp_database.py b/utils/vasp/parser/output.py similarity index 100% rename from vasp/vasp_database.py rename to utils/vasp/parser/output.py diff --git a/vasp/vasp_resubmitter.py b/utils/vasp/resubmitter.py similarity index 100% rename from vasp/vasp_resubmitter.py rename to utils/vasp/resubmitter.py diff --git a/utils/vasp/test_custodian_finaljobfuckingme.ipynb b/utils/vasp/test_custodian_finaljobfuckingme.ipynb new file mode 100644 index 0000000..e054fe8 --- /dev/null +++ b/utils/vasp/test_custodian_finaljobfuckingme.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "from custodian.custodian import Custodian\n", + "from custodian.vasp.handlers import VaspErrorHandler, UnconvergedErrorHandler, NonConvergingErrorHandler, PositiveEnergyErrorHandler\n", + "from custodian.vasp.jobs import VaspJob\n", + "\n", + "output_filename = \"vasp.log\"\n", + "handlers = [VaspErrorHandler(output_filename=output_filename), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PositiveEnergyErrorHandler()]\n", + "jobs = [VaspJob(sys.argv[1:], output_file=output_filename, suffix = \".relax_1\", final=False, settings_override=[{\"dict\": \"INCAR\", \"action\": {\"_set\": {\"KSPACING\": 0.5}}}]),\n", + " VaspJob(sys.argv[1:], output_file=output_filename, suffix = \".relax_2\", final=False,\n", + " settings_override = [{\"file\": \"CONTCAR\", \"action\": {\"_file_copy\": {\"dest\": \"POSCAR\"}}},\n", + " {\"dict\": \"INCAR\", \"action\": {\"_set\": {\"KSPACING\": 0.5, \"EDIFF\": 1E-5, \"EDIFFG\": 1E-4}}}], copy_magmom=True),\n", + " VaspJob(sys.argv[1:], output_file=output_filename, suffix = \"\",\n", + " settings_override = [{\"dict\": \"INCAR\", \"action\": {\"_set\": {\"NSW\": 0, \"LAECHG\": True, \"LCHARGE\": True, \"NELM\": 240, \"EDIFF\": 1E-5}}},\n", + " {\"file\": \"CONTCAR\", \"action\": {\"_file_copy\": {\"dest\": \"POSCAR\"}}}])]\n", + "c = Custodian(handlers, jobs, max_errors=10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pyiron_workflow", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/utils/vasp/vasp_potential_training_database.py b/utils/vasp/vasp_potential_training_database.py new file mode 100644 index 0000000..e69de29 diff --git a/vasp/resubmitter.py b/vasp/resubmitter.py new file mode 100644 index 0000000..104827f --- /dev/null +++ b/vasp/resubmitter.py @@ -0,0 +1,164 @@ +import os +import shutil +import tarfile +import subprocess +import pandas as pd + +from vasp.database import find_vasp_directories, check_convergence +from utils.generic import get_latest_file_iteration +from utils.jobfile import jobfile + +def get_slurm_jobs_working_directories(username="hmai"): + command = f"squeue -u {username} -o \"%i %Z\"" + result = subprocess.run(command, shell=True, capture_output=True, text=True) + output_lines = result.stdout.strip().split("\n")[1:] # Remove the header line + + # Parse the output lines into a list of tuples (job_id, working_directory) + data = [line.split() for line in output_lines] + + # Create a Pandas DataFrame from the data + df = pd.DataFrame(data, columns=["Job ID", "Working Directory"]) + + return df + +class CalculationConverger(): + + def __init__(self, parent_dir, script_template_dir, max_submissions=1000, submission_command="sbatch", username="hmai"): + self.parent_dir = parent_dir + self.max_submissions = max_submissions + self.submission_command = submission_command + self.vasp_dirs = find_vasp_directories(parent_dir, filenames=["INCAR", "POTCAR"], all_present=True, extract_tarballs=False) + self.script_template_dir = script_template_dir + self.user = username + + def submit_to_queue(self, dirpath, script_name): + os.system(f"cd {dirpath} && {self.submission_command} {script_name}") + + def reconverge_all(self, calc_type="DRS", HPC="Setonix", VASP_version="5.4.4", CPU=128, walltime=24, cpu_per_node=128, from_dataframe_path=None): + non_converged = self.load_non_converged_paths(from_dataframe_path) + running_jobs_df = get_slurm_jobs_working_directories(self.user) + running_queued_job_directories = running_jobs_df["Working Directory"].to_list() + + dirs_to_search_next_time, leftover_calcs_exceeding_queue_limit = [], [] + + dirs_to_apply_reconverge = set(non_converged or self.vasp_dirs) - set(running_queued_job_directories) + + for i, dir in enumerate(dirs_to_apply_reconverge): + if not check_convergence(dir): + if i + len(running_queued_job_directories) > self.max_submissions: + leftover_calcs_exceeding_queue_limit.append(dir) + else: + self.reconverge(dir, calc_type, HPC, VASP_version, CPU, walltime, cpu_per_node) + dirs_to_search_next_time.append(dir) + else: + print(f"CONVERGED: {dir}") + + self.update_resubmit_log(dirs_to_search_next_time + running_queued_job_directories + leftover_calcs_exceeding_queue_limit) + return dirs_to_search_next_time + + def load_non_converged_paths(self, from_dataframe_path): + if from_dataframe_path: + df = pd.read_pickle(from_dataframe_path) + return [path.rstrip(os.sep + "OUTCAR") if path.endswith(os.sep + "OUTCAR") else path for path in df['filepath'].tolist()] + return self.reconverge_from_log_file() + + def update_resubmit_log(self, dirs_to_search_next_time): + with open(os.path.join(self.parent_dir, "resubmit.log"), "w") as log_file: + for dir_path in dirs_to_search_next_time: + log_file.write(dir_path + "\n") + + def reconverge(self, dirpath, calc_type="SDRS", HPC="Setonix", VASP_version="5.4.4", CPU=128, walltime=24, cpu_per_node=128): + self.handle_error_run_files(dirpath) + reconverge_methods = { + "static": self.reconverge_static, + "SDRS": self.reconverge_SDRS, + "DRS": self.reconverge_DRS, + "base": self.reconverge_base + } + reconverge_method = reconverge_methods.get(calc_type, self.reconverge_base) + reconverge_method(dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node) + + def handle_error_run_files(self, dirpath): + error_tar_files_exist = any("error" in f and "tar" in f for f in os.listdir(dirpath)) + if error_tar_files_exist: + latest_error_run_index = self.find_latest_error_run_index(dirpath) + error_run_folder_path = os.path.join(dirpath, f"error_run_{latest_error_run_index + 1}") + os.makedirs(error_run_folder_path) + self.move_files_to_error_run_folder(dirpath, error_run_folder_path) + + def move_files_to_error_run_folder(self, dirpath, error_run_folder_path): + for f in os.listdir(dirpath): + if ("error" in f and "tar" in f) or f.endswith(".sh"): + shutil.move(os.path.join(dirpath, f), os.path.join(error_run_folder_path, f)) + + for og_file in ["INCAR.orig", "POSCAR.orig", "KPOINTS.orig", "custodian.json"]: + if os.path.exists(os.path.join(dirpath, og_file)): + shutil.move(os.path.join(dirpath, og_file), os.path.join(error_run_folder_path, og_file)) + + for current_run in ["INCAR", "POSCAR", "POTCAR", "OUTCAR", "vasprun.xml", "vasp.log"]: + if os.path.exists(os.path.join(dirpath, current_run)): + shutil.copy(os.path.join(dirpath, current_run), os.path.join(error_run_folder_path, current_run)) + + def find_latest_error_run_index(self, dirpath): + error_run_indices = [0] + for f in os.listdir(dirpath): + if f.startswith("error_run_"): + try: + n = int(f.split("error_run_")[-1]) + error_run_indices.append(n) + except ValueError as e: + print(f"Exception occurred at {dirpath}: {e}") + return max(error_run_indices) + + def generate_custodian_string(self, template_filename, user_inputs): + template_path = os.path.join(self.script_template_dir, template_filename) + return jobfile._replace_fields(template_path, user_inputs) + + def reconverge_base(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node): + self.reconverge_generic(dirpath, "template_BASE.py", HPC, VASP_version, CPU, walltime, cpu_per_node) + + def reconverge_static(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node): + self.reconverge_generic(dirpath, "template_Static.py", HPC, VASP_version, CPU, walltime, cpu_per_node) + + def reconverge_DRS(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node): + stages_left = self.get_stages_left(dirpath, ["relax_1", "relax_2"], 3) + self.reconverge_generic(dirpath, "template_DRS.py", HPC, VASP_version, CPU, walltime, cpu_per_node, {"{STAGES_LEFT}": str(stages_left)}) + + def reconverge_SDRS(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node): + stages_left = self.get_stages_left(dirpath, ["static_1", "relax_1", "relax_2"], 4) + self.reconverge_generic(dirpath, "template_SDRS.py", HPC, VASP_version, CPU, walltime, cpu_per_node, {"{STAGES_LEFT}": str(stages_left)}) + + def get_stages_left(self, dirpath, stage_markers, default_stages_left): + for i, marker in enumerate(reversed(stage_markers)): + if any(f.endswith(f".{marker}") for f in os.listdir(dirpath)): + return i + 1 + return default_stages_left + + def reconverge_generic(self, dirpath, template_filename, HPC, VASP_version, CPU, walltime, cpu_per_node, extra_inputs=None): + user_inputs = { + '{VASPOUTPUTFILENAME}': '"vasp.log"', + '{MAXCUSTODIANERRORS}': "20" + } + if extra_inputs: + user_inputs.update(extra_inputs) + + custodian_string = self.generate_custodian_string(template_filename, user_inputs) + script_name = os.path.join(self.script_template_dir, f"{template_filename.split('_')[0]}_Custodian_{HPC}.sh") + job = jobfile(file_path=script_name, HPC=HPC, VASP_version=VASP_version, CPU=CPU, walltime=walltime, cpu_per_node=cpu_per_node, generic_insert_field=["{CUSTODIANSTRING}"], generic_insert=[custodian_string]) + target_script_name = f"{os.path.basename(dirpath)}.sh" + job.to_file(job_name=target_script_name, output_path=dirpath) + self.submit_to_queue(dirpath, target_script_name) + + def reconverge_from_log_file(self): + resubmit_log_file = os.path.join(self.parent_dir, "resubmit.log") + if os.path.isfile(resubmit_log_file): + with open(resubmit_log_file, "r") as log_file: + non_converged_dirs = [line.strip() for line in log_file.readlines()] + + largest_n = get_latest_file_iteration(self.parent_dir, "resubmit.log_") + os.rename(resubmit_log_file, os.path.join(self.parent_dir, f"resubmit.log_{largest_n + 1}")) + + return non_converged_dirs + else: + print("No resubmit log file found. Nothing to resubmit from old logs.") + return [] diff --git a/vasp/vasp_potential_training_database.py b/vasp/vasp_potential_training_database.py new file mode 100644 index 0000000..a4b6498 --- /dev/null +++ b/vasp/vasp_potential_training_database.py @@ -0,0 +1 @@ +def \ No newline at end of file