black formatting + fixed tests

ligerzero-ai · Jun 14, 2024 · c0caa08 · c0caa08
1 parent ce488d5
commit c0caa08
Show file tree

Hide file tree

Showing 35 changed files with 4,171 additions and 2,219 deletions.
diff --git a/actual_usage/VaspReconvergeExample.py b/actual_usage/VaspReconvergeExample.py
@@ -1,10 +1,12 @@
 from utils.vasp.vasp_resubmitter import CalculationConverger
 import os
 
-vasp_resubmitter = CalculationConverger(parent_dir=os.getcwd(),
-                                        script_template_dir="/home/hmai/CustodianJobfiles",
-                                        max_submissions = 1000,
-                                        submission_command = "sbatch",
-                                        username="hmai")
+vasp_resubmitter = CalculationConverger(
+    parent_dir=os.getcwd(),
+    script_template_dir="/home/hmai/CustodianJobfiles",
+    max_submissions=1000,
+    submission_command="sbatch",
+    username="hmai",
+)
 
-vasp_resubmitter.reconverge_all()                                        
+vasp_resubmitter.reconverge_all()
diff --git a/actual_usage/build_vasp_database.py b/actual_usage/build_vasp_database.py
@@ -3,40 +3,65 @@
 import warnings
 from multiprocessing import cpu_count
 
+
 def main():
     warnings.filterwarnings("ignore")
-    
+
     # Initialize argument parser
-    parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
-    parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
-    parser.add_argument('--extract', action='store_true', help='Extract directories during database generation')
-    parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation')
-    parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory')
-    parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors')
+    parser = argparse.ArgumentParser(
+        description="Find and compress directories based on specified criteria."
+    )
+    parser.add_argument(
+        "directory", metavar="DIR", type=str, help="the directory to operate on"
+    )
+    parser.add_argument(
+        "--extract",
+        action="store_true",
+        help="Extract directories during database generation",
+    )
+    parser.add_argument(
+        "--max_dir_count",
+        type=int,
+        help="Maximum directory count for database generation",
+    )
+    parser.add_argument(
+        "--read_all_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read all runs in directory",
+    )
+    parser.add_argument(
+        "--read_error_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read directories with errors",
+    )
     args = parser.parse_args()
 
-    datagen = DatabaseGenerator(args.directory,
-                                max_workers=cpu_count())
-
+    datagen = DatabaseGenerator(args.directory, max_workers=cpu_count())
+
     # Check if max_dir_count is provided as an argument
     if args.max_dir_count is not None:
         max_dir_count = args.max_dir_count
     else:
         max_dir_count = 2000  # Default value
 
     # Call the build_database function with the updated parameters
-    df = datagen.build_database(extract_directories=args.extract,
-                                read_multiple_runs_in_dir=args.read_all_runs_in_dir,
-                                read_error_dirs=args.read_error_runs_in_dir,
-                                max_dir_count=max_dir_count,
-                                tarball_extensions=(".tar.gz", ".tar.bz2"),
-                                cleanup=False,
-                                keep_filenames_after_cleanup=[],
-                                keep_filename_patterns_after_cleanup=[],
-                                filenames_to_qualify=["OUTCAR"],#, "vasprun.xml"],
-                                all_present=True,
-                                df_filename=None,
-                                df_compression=True)
-
-if __name__ == '__main__':
+    df = datagen.build_database(
+        extract_directories=args.extract,
+        read_multiple_runs_in_dir=args.read_all_runs_in_dir,
+        read_error_dirs=args.read_error_runs_in_dir,
+        max_dir_count=max_dir_count,
+        tarball_extensions=(".tar.gz", ".tar.bz2"),
+        cleanup=False,
+        keep_filenames_after_cleanup=[],
+        keep_filename_patterns_after_cleanup=[],
+        filenames_to_qualify=["OUTCAR"],  # , "vasprun.xml"],
+        all_present=True,
+        df_filename=None,
+        df_compression=True,
+    )
+
+
+if __name__ == "__main__":
     main()
diff --git a/actual_usage/compression.py b/actual_usage/compression.py
@@ -2,9 +2,14 @@
 from utils.generic import find_and_compress_directories_parallel
 import os
 
+
 def main():
-    parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
-    parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
+    parser = argparse.ArgumentParser(
+        description="Find and compress directories based on specified criteria."
+    )
+    parser.add_argument(
+        "directory", metavar="DIR", type=str, help="the directory to operate on"
+    )
     args = parser.parse_args()
 
     find_and_compress_directories_parallel(
@@ -16,8 +21,9 @@ def main():
         files=[],
         file_patterns=[],
         print_msg=True,
-        inside_dir=True
+        inside_dir=True,
     )
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/actual_usage/summarise_vasp_database.py b/actual_usage/summarise_vasp_database.py
@@ -2,20 +2,23 @@
 import argparse
 import pandas as pd
 
+
 def analyze_vasp_database(folder_path, output_compression=False):
     # Initialize paths for both potential database files
-    database_file_pkl = os.path.join(folder_path, 'vasp_database.pkl')
-    database_file_gz = os.path.join(folder_path, 'vasp_database.pkl.gz')
+    database_file_pkl = os.path.join(folder_path, "vasp_database.pkl")
+    database_file_gz = os.path.join(folder_path, "vasp_database.pkl.gz")
 
     # Determine which file exists and set the appropriate path and compression option
     if os.path.exists(database_file_gz):
         database_file = database_file_gz
-        compression_option = 'gzip'
+        compression_option = "gzip"
     elif os.path.exists(database_file_pkl):
         database_file = database_file_pkl
         compression_option = None
     else:
-        print("Error: neither 'vasp_database.pkl' nor 'vasp_database.pkl.gz' found in the specified folder.")
+        print(
+            "Error: neither 'vasp_database.pkl' nor 'vasp_database.pkl.gz' found in the specified folder."
+        )
         return
 
     # Load the database into a DataFrame with or without compression
@@ -29,20 +32,41 @@ def analyze_vasp_database(folder_path, output_compression=False):
     converged_jobs = df[df["convergence"] == True]
 
     # Determine compression option for output based on the user input
-    output_compression_option = 'gzip' if output_compression else None
+    output_compression_option = "gzip" if output_compression else None
 
     # Write the failed_jobs and converged_jobs DataFrames to separate pickle files with optional compression
-    failed_jobs.to_pickle(os.path.join(folder_path, 'failed_jobs.pkl.gz' if output_compression else 'failed_jobs.pkl'), compression=output_compression_option)
-    converged_jobs.to_pickle(os.path.join(folder_path, 'converged_jobs.pkl.gz' if output_compression else 'converged_jobs.pkl'), compression=output_compression_option)
+    failed_jobs.to_pickle(
+        os.path.join(
+            folder_path,
+            "failed_jobs.pkl.gz" if output_compression else "failed_jobs.pkl",
+        ),
+        compression=output_compression_option,
+    )
+    converged_jobs.to_pickle(
+        os.path.join(
+            folder_path,
+            "converged_jobs.pkl.gz" if output_compression else "converged_jobs.pkl",
+        ),
+        compression=output_compression_option,
+    )
 
     # Print the counts
     print(f"The number of failed jobs is: {len(failed_jobs)}")
     print(f"The number of successful jobs is: {len(converged_jobs)}")
     print(f"The total number of jobs is: {len(df)}")
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Analyze VASP database")
-    parser.add_argument("folder_path", type=str, help="Folder path containing 'vasp_database.pkl' or 'vasp_database.pkl.gz'")
-    parser.add_argument("--output_compression", action="store_true", help="Enable gzip compression for output pkl files")
+    parser.add_argument(
+        "folder_path",
+        type=str,
+        help="Folder path containing 'vasp_database.pkl' or 'vasp_database.pkl.gz'",
+    )
+    parser.add_argument(
+        "--output_compression",
+        action="store_true",
+        help="Enable gzip compression for output pkl files",
+    )
     args = parser.parse_args()
-    analyze_vasp_database(args.folder_path, args.output_compression)
+    analyze_vasp_database(args.folder_path, args.output_compression)
diff --git a/actual_usage/update_vasp_db.py b/actual_usage/update_vasp_db.py
@@ -3,32 +3,53 @@
 import warnings
 from multiprocessing import cpu_count
 
+
 def main():
     warnings.filterwarnings("ignore")
-    
+
     # Initialize argument parser
-    parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
-    parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
-    parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation')
-    parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory')
-    parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors')
+    parser = argparse.ArgumentParser(
+        description="Find and compress directories based on specified criteria."
+    )
+    parser.add_argument(
+        "directory", metavar="DIR", type=str, help="the directory to operate on"
+    )
+    parser.add_argument(
+        "--max_dir_count",
+        type=int,
+        help="Maximum directory count for database generation",
+    )
+    parser.add_argument(
+        "--read_all_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read all runs in directory",
+    )
+    parser.add_argument(
+        "--read_error_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read directories with errors",
+    )
     args = parser.parse_args()
 
-    datagen = DatabaseGenerator(args.directory,
-                                max_workers=cpu_count())
-
+    datagen = DatabaseGenerator(args.directory, max_workers=cpu_count())
+
     # Check if max_dir_count is provided as an argument
     if args.max_dir_count is not None:
         max_dir_count = args.max_dir_count
     else:
         max_dir_count = 2000  # Default value
 
     # Call the update_failed_jobs_in_database function with the updated parameters
-    df = datagen.update_failed_jobs_in_database(df_path=args.directory,
-                                                read_error_dirs=args.read_error_runs_in_dir,
-                                                read_multiple_runs_in_dir=args.read_all_runs_in_dir,
-                                                max_dir_count=max_dir_count,
-                                                df_compression=True)
+    df = datagen.update_failed_jobs_in_database(
+        df_path=args.directory,
+        read_error_dirs=args.read_error_runs_in_dir,
+        read_multiple_runs_in_dir=args.read_all_runs_in_dir,
+        max_dir_count=max_dir_count,
+        df_compression=True,
+    )
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/setup.py b/setup.py
@@ -1,25 +1,25 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='utils',
-    version='0.1',
-    packages=find_packages(where='utils'),
-    package_dir={'': 'utils'},
+    name="utils",
+    version="0.1",
+    packages=find_packages(where="utils"),
+    package_dir={"": "utils"},
     install_requires=[
-        'pandas',
-        'numpy',
-        'pymatgen',
+        "pandas",
+        "numpy",
+        "pymatgen",
     ],
     scripts=[
-        'actual_usage/check_jobdir',
-        'actual_usage/memory_check',
-        'actual_usage/slurm_list_jobdir',
-        'actual_usage/build_and_show_db',
-        'actual_usage/compress_here',
-        'actual_usage/qstat_slurm',
-        'actual_usage/summarise_db',
-        'actual_usage/setonix_refresh_mamba',
-        'actual_usage/setonix_refresh_mamba',
-        'actual_usage/update_failed_jobs_db',
+        "actual_usage/check_jobdir",
+        "actual_usage/memory_check",
+        "actual_usage/slurm_list_jobdir",
+        "actual_usage/build_and_show_db",
+        "actual_usage/compress_here",
+        "actual_usage/qstat_slurm",
+        "actual_usage/summarise_db",
+        "actual_usage/setonix_refresh_mamba",
+        "actual_usage/setonix_refresh_mamba",
+        "actual_usage/update_failed_jobs_db",
     ],
 )