Skip to content

Commit

Permalink
black formatting + fixed tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Han Lin Mai committed Jun 14, 2024
1 parent ce488d5 commit c0caa08
Show file tree
Hide file tree
Showing 35 changed files with 4,171 additions and 2,219 deletions.
14 changes: 8 additions & 6 deletions actual_usage/VaspReconvergeExample.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from utils.vasp.vasp_resubmitter import CalculationConverger
import os

vasp_resubmitter = CalculationConverger(parent_dir=os.getcwd(),
script_template_dir="/home/hmai/CustodianJobfiles",
max_submissions = 1000,
submission_command = "sbatch",
username="hmai")
vasp_resubmitter = CalculationConverger(
parent_dir=os.getcwd(),
script_template_dir="/home/hmai/CustodianJobfiles",
max_submissions=1000,
submission_command="sbatch",
username="hmai",
)

vasp_resubmitter.reconverge_all()
vasp_resubmitter.reconverge_all()
73 changes: 49 additions & 24 deletions actual_usage/build_vasp_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,65 @@
import warnings
from multiprocessing import cpu_count


def main():
warnings.filterwarnings("ignore")

# Initialize argument parser
parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
parser.add_argument('--extract', action='store_true', help='Extract directories during database generation')
parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation')
parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory')
parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors')
parser = argparse.ArgumentParser(
description="Find and compress directories based on specified criteria."
)
parser.add_argument(
"directory", metavar="DIR", type=str, help="the directory to operate on"
)
parser.add_argument(
"--extract",
action="store_true",
help="Extract directories during database generation",
)
parser.add_argument(
"--max_dir_count",
type=int,
help="Maximum directory count for database generation",
)
parser.add_argument(
"--read_all_runs_in_dir",
action="store_true",
default=False,
help="Read all runs in directory",
)
parser.add_argument(
"--read_error_runs_in_dir",
action="store_true",
default=False,
help="Read directories with errors",
)
args = parser.parse_args()

datagen = DatabaseGenerator(args.directory,
max_workers=cpu_count())

datagen = DatabaseGenerator(args.directory, max_workers=cpu_count())

# Check if max_dir_count is provided as an argument
if args.max_dir_count is not None:
max_dir_count = args.max_dir_count
else:
max_dir_count = 2000 # Default value

# Call the build_database function with the updated parameters
df = datagen.build_database(extract_directories=args.extract,
read_multiple_runs_in_dir=args.read_all_runs_in_dir,
read_error_dirs=args.read_error_runs_in_dir,
max_dir_count=max_dir_count,
tarball_extensions=(".tar.gz", ".tar.bz2"),
cleanup=False,
keep_filenames_after_cleanup=[],
keep_filename_patterns_after_cleanup=[],
filenames_to_qualify=["OUTCAR"],#, "vasprun.xml"],
all_present=True,
df_filename=None,
df_compression=True)

if __name__ == '__main__':
df = datagen.build_database(
extract_directories=args.extract,
read_multiple_runs_in_dir=args.read_all_runs_in_dir,
read_error_dirs=args.read_error_runs_in_dir,
max_dir_count=max_dir_count,
tarball_extensions=(".tar.gz", ".tar.bz2"),
cleanup=False,
keep_filenames_after_cleanup=[],
keep_filename_patterns_after_cleanup=[],
filenames_to_qualify=["OUTCAR"], # , "vasprun.xml"],
all_present=True,
df_filename=None,
df_compression=True,
)


if __name__ == "__main__":
main()
14 changes: 10 additions & 4 deletions actual_usage/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
from utils.generic import find_and_compress_directories_parallel
import os


def main():
parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
parser = argparse.ArgumentParser(
description="Find and compress directories based on specified criteria."
)
parser.add_argument(
"directory", metavar="DIR", type=str, help="the directory to operate on"
)
args = parser.parse_args()

find_and_compress_directories_parallel(
Expand All @@ -16,8 +21,9 @@ def main():
files=[],
file_patterns=[],
print_msg=True,
inside_dir=True
inside_dir=True,
)

if __name__ == '__main__':

if __name__ == "__main__":
main()
44 changes: 34 additions & 10 deletions actual_usage/summarise_vasp_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@
import argparse
import pandas as pd


def analyze_vasp_database(folder_path, output_compression=False):
# Initialize paths for both potential database files
database_file_pkl = os.path.join(folder_path, 'vasp_database.pkl')
database_file_gz = os.path.join(folder_path, 'vasp_database.pkl.gz')
database_file_pkl = os.path.join(folder_path, "vasp_database.pkl")
database_file_gz = os.path.join(folder_path, "vasp_database.pkl.gz")

# Determine which file exists and set the appropriate path and compression option
if os.path.exists(database_file_gz):
database_file = database_file_gz
compression_option = 'gzip'
compression_option = "gzip"
elif os.path.exists(database_file_pkl):
database_file = database_file_pkl
compression_option = None
else:
print("Error: neither 'vasp_database.pkl' nor 'vasp_database.pkl.gz' found in the specified folder.")
print(
"Error: neither 'vasp_database.pkl' nor 'vasp_database.pkl.gz' found in the specified folder."
)
return

# Load the database into a DataFrame with or without compression
Expand All @@ -29,20 +32,41 @@ def analyze_vasp_database(folder_path, output_compression=False):
converged_jobs = df[df["convergence"] == True]

# Determine compression option for output based on the user input
output_compression_option = 'gzip' if output_compression else None
output_compression_option = "gzip" if output_compression else None

# Write the failed_jobs and converged_jobs DataFrames to separate pickle files with optional compression
failed_jobs.to_pickle(os.path.join(folder_path, 'failed_jobs.pkl.gz' if output_compression else 'failed_jobs.pkl'), compression=output_compression_option)
converged_jobs.to_pickle(os.path.join(folder_path, 'converged_jobs.pkl.gz' if output_compression else 'converged_jobs.pkl'), compression=output_compression_option)
failed_jobs.to_pickle(
os.path.join(
folder_path,
"failed_jobs.pkl.gz" if output_compression else "failed_jobs.pkl",
),
compression=output_compression_option,
)
converged_jobs.to_pickle(
os.path.join(
folder_path,
"converged_jobs.pkl.gz" if output_compression else "converged_jobs.pkl",
),
compression=output_compression_option,
)

# Print the counts
print(f"The number of failed jobs is: {len(failed_jobs)}")
print(f"The number of successful jobs is: {len(converged_jobs)}")
print(f"The total number of jobs is: {len(df)}")


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Analyze VASP database")
parser.add_argument("folder_path", type=str, help="Folder path containing 'vasp_database.pkl' or 'vasp_database.pkl.gz'")
parser.add_argument("--output_compression", action="store_true", help="Enable gzip compression for output pkl files")
parser.add_argument(
"folder_path",
type=str,
help="Folder path containing 'vasp_database.pkl' or 'vasp_database.pkl.gz'",
)
parser.add_argument(
"--output_compression",
action="store_true",
help="Enable gzip compression for output pkl files",
)
args = parser.parse_args()
analyze_vasp_database(args.folder_path, args.output_compression)
analyze_vasp_database(args.folder_path, args.output_compression)
51 changes: 36 additions & 15 deletions actual_usage/update_vasp_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,53 @@
import warnings
from multiprocessing import cpu_count


def main():
warnings.filterwarnings("ignore")

# Initialize argument parser
parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation')
parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory')
parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors')
parser = argparse.ArgumentParser(
description="Find and compress directories based on specified criteria."
)
parser.add_argument(
"directory", metavar="DIR", type=str, help="the directory to operate on"
)
parser.add_argument(
"--max_dir_count",
type=int,
help="Maximum directory count for database generation",
)
parser.add_argument(
"--read_all_runs_in_dir",
action="store_true",
default=False,
help="Read all runs in directory",
)
parser.add_argument(
"--read_error_runs_in_dir",
action="store_true",
default=False,
help="Read directories with errors",
)
args = parser.parse_args()

datagen = DatabaseGenerator(args.directory,
max_workers=cpu_count())

datagen = DatabaseGenerator(args.directory, max_workers=cpu_count())

# Check if max_dir_count is provided as an argument
if args.max_dir_count is not None:
max_dir_count = args.max_dir_count
else:
max_dir_count = 2000 # Default value

# Call the update_failed_jobs_in_database function with the updated parameters
df = datagen.update_failed_jobs_in_database(df_path=args.directory,
read_error_dirs=args.read_error_runs_in_dir,
read_multiple_runs_in_dir=args.read_all_runs_in_dir,
max_dir_count=max_dir_count,
df_compression=True)
df = datagen.update_failed_jobs_in_database(
df_path=args.directory,
read_error_dirs=args.read_error_runs_in_dir,
read_multiple_runs_in_dir=args.read_all_runs_in_dir,
max_dir_count=max_dir_count,
df_compression=True,
)


if __name__ == '__main__':
if __name__ == "__main__":
main()
34 changes: 17 additions & 17 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
from setuptools import setup, find_packages

setup(
name='utils',
version='0.1',
packages=find_packages(where='utils'),
package_dir={'': 'utils'},
name="utils",
version="0.1",
packages=find_packages(where="utils"),
package_dir={"": "utils"},
install_requires=[
'pandas',
'numpy',
'pymatgen',
"pandas",
"numpy",
"pymatgen",
],
scripts=[
'actual_usage/check_jobdir',
'actual_usage/memory_check',
'actual_usage/slurm_list_jobdir',
'actual_usage/build_and_show_db',
'actual_usage/compress_here',
'actual_usage/qstat_slurm',
'actual_usage/summarise_db',
'actual_usage/setonix_refresh_mamba',
'actual_usage/setonix_refresh_mamba',
'actual_usage/update_failed_jobs_db',
"actual_usage/check_jobdir",
"actual_usage/memory_check",
"actual_usage/slurm_list_jobdir",
"actual_usage/build_and_show_db",
"actual_usage/compress_here",
"actual_usage/qstat_slurm",
"actual_usage/summarise_db",
"actual_usage/setonix_refresh_mamba",
"actual_usage/setonix_refresh_mamba",
"actual_usage/update_failed_jobs_db",
],
)
Loading

0 comments on commit c0caa08

Please sign in to comment.