From c0caa08b562aa8293317208e6acc4e46b82e0811 Mon Sep 17 00:00:00 2001
From: Han Lin Mai <h.mai@mpie.de>
Date: Fri, 14 Jun 2024 23:04:26 +0200
Subject: [PATCH] black formatting + fixed tests

---
 actual_usage/VaspReconvergeExample.py         |   14 +-
 actual_usage/build_vasp_database.py           |   73 +-
 actual_usage/compression.py                   |   14 +-
 actual_usage/summarise_vasp_database.py       |   44 +-
 actual_usage/update_vasp_db.py                |   51 +-
 setup.py                                      |   34 +-
 tests/test_generic.py                         |  370 ++++--
 tests/test_vasp.py                            |   14 +-
 utils/GNN_calculators/mace.py                 |   69 +-
 utils/StructureManipulator/cleave.py          |  190 ++-
 utils/StructureManipulator/interstitial.py    |  155 ++-
 utils/ace_descriptor_utils.py                 |  298 ++---
 utils/analysis_functions.py                   | 1033 +++++++++++------
 utils/chargemol.py                            |  575 +++++----
 utils/custom_custodian_handlers.py            |   32 +-
 utils/functions.py                            |  425 ++++---
 utils/generic.py                              |  316 +++--
 utils/jobfile.py                              |   95 +-
 .../CustodianScripts/template_BASE.py         |   12 +-
 .../CustodianScripts/template_DRS.py          |   87 +-
 .../CustodianScripts/template_SDRS.py         |  122 +-
 .../CustodianScripts/template_SDRS_KPOINTS.py |  121 +-
 .../CustodianScripts/template_Static.py       |   34 +-
 utils/parallel.py                             |   18 +-
 utils/periodic_table.py                       |  486 +++++---
 utils/plotters/grid_plots.py                  |   66 +-
 utils/plotters/structure_plots.py             |  108 +-
 utils/structure_featuriser.py                 |  121 +-
 utils/training_data_nequip.py                 |   41 +-
 utils/vasp/database.py                        |  661 +++++++----
 utils/vasp/job.py                             |  189 +--
 utils/vasp/parser/outcar.py                   |   76 +-
 utils/vasp/parser/output.py                   |  240 ++--
 utils/vasp/resubmitter.py                     |  205 +++-
 .../vasp/vasp_potential_training_database.py  |    1 -
 35 files changed, 4171 insertions(+), 2219 deletions(-)

diff --git a/actual_usage/VaspReconvergeExample.py b/actual_usage/VaspReconvergeExample.py
index a2b53ff..9367903 100644
--- a/actual_usage/VaspReconvergeExample.py
+++ b/actual_usage/VaspReconvergeExample.py
@@ -1,10 +1,12 @@
 from utils.vasp.vasp_resubmitter import CalculationConverger
 import os
 
-vasp_resubmitter = CalculationConverger(parent_dir=os.getcwd(),
-                                        script_template_dir="/home/hmai/CustodianJobfiles",
-                                        max_submissions = 1000,
-                                        submission_command = "sbatch",
-                                        username="hmai")
+vasp_resubmitter = CalculationConverger(
+    parent_dir=os.getcwd(),
+    script_template_dir="/home/hmai/CustodianJobfiles",
+    max_submissions=1000,
+    submission_command="sbatch",
+    username="hmai",
+)
 
-vasp_resubmitter.reconverge_all()                                        
\ No newline at end of file
+vasp_resubmitter.reconverge_all()
diff --git a/actual_usage/build_vasp_database.py b/actual_usage/build_vasp_database.py
index c666fc8..76511d8 100644
--- a/actual_usage/build_vasp_database.py
+++ b/actual_usage/build_vasp_database.py
@@ -3,21 +3,43 @@
 import warnings
 from multiprocessing import cpu_count
 
+
 def main():
     warnings.filterwarnings("ignore")
-    
+
     # Initialize argument parser
-    parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
-    parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
-    parser.add_argument('--extract', action='store_true', help='Extract directories during database generation')
-    parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation')
-    parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory')
-    parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors')
+    parser = argparse.ArgumentParser(
+        description="Find and compress directories based on specified criteria."
+    )
+    parser.add_argument(
+        "directory", metavar="DIR", type=str, help="the directory to operate on"
+    )
+    parser.add_argument(
+        "--extract",
+        action="store_true",
+        help="Extract directories during database generation",
+    )
+    parser.add_argument(
+        "--max_dir_count",
+        type=int,
+        help="Maximum directory count for database generation",
+    )
+    parser.add_argument(
+        "--read_all_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read all runs in directory",
+    )
+    parser.add_argument(
+        "--read_error_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read directories with errors",
+    )
     args = parser.parse_args()
 
-    datagen = DatabaseGenerator(args.directory,
-                                max_workers=cpu_count())
-    
+    datagen = DatabaseGenerator(args.directory, max_workers=cpu_count())
+
     # Check if max_dir_count is provided as an argument
     if args.max_dir_count is not None:
         max_dir_count = args.max_dir_count
@@ -25,18 +47,21 @@ def main():
         max_dir_count = 2000  # Default value
 
     # Call the build_database function with the updated parameters
-    df = datagen.build_database(extract_directories=args.extract,
-                                read_multiple_runs_in_dir=args.read_all_runs_in_dir,
-                                read_error_dirs=args.read_error_runs_in_dir,
-                                max_dir_count=max_dir_count,
-                                tarball_extensions=(".tar.gz", ".tar.bz2"),
-                                cleanup=False,
-                                keep_filenames_after_cleanup=[],
-                                keep_filename_patterns_after_cleanup=[],
-                                filenames_to_qualify=["OUTCAR"],#, "vasprun.xml"],
-                                all_present=True,
-                                df_filename=None,
-                                df_compression=True)
-
-if __name__ == '__main__':
+    df = datagen.build_database(
+        extract_directories=args.extract,
+        read_multiple_runs_in_dir=args.read_all_runs_in_dir,
+        read_error_dirs=args.read_error_runs_in_dir,
+        max_dir_count=max_dir_count,
+        tarball_extensions=(".tar.gz", ".tar.bz2"),
+        cleanup=False,
+        keep_filenames_after_cleanup=[],
+        keep_filename_patterns_after_cleanup=[],
+        filenames_to_qualify=["OUTCAR"],  # , "vasprun.xml"],
+        all_present=True,
+        df_filename=None,
+        df_compression=True,
+    )
+
+
+if __name__ == "__main__":
     main()
diff --git a/actual_usage/compression.py b/actual_usage/compression.py
index 0f1399d..9c8f64d 100644
--- a/actual_usage/compression.py
+++ b/actual_usage/compression.py
@@ -2,9 +2,14 @@
 from utils.generic import find_and_compress_directories_parallel
 import os
 
+
 def main():
-    parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
-    parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
+    parser = argparse.ArgumentParser(
+        description="Find and compress directories based on specified criteria."
+    )
+    parser.add_argument(
+        "directory", metavar="DIR", type=str, help="the directory to operate on"
+    )
     args = parser.parse_args()
 
     find_and_compress_directories_parallel(
@@ -16,8 +21,9 @@ def main():
         files=[],
         file_patterns=[],
         print_msg=True,
-        inside_dir=True
+        inside_dir=True,
     )
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/actual_usage/summarise_vasp_database.py b/actual_usage/summarise_vasp_database.py
index 4f4090b..10acbb7 100644
--- a/actual_usage/summarise_vasp_database.py
+++ b/actual_usage/summarise_vasp_database.py
@@ -2,20 +2,23 @@
 import argparse
 import pandas as pd
 
+
 def analyze_vasp_database(folder_path, output_compression=False):
     # Initialize paths for both potential database files
-    database_file_pkl = os.path.join(folder_path, 'vasp_database.pkl')
-    database_file_gz = os.path.join(folder_path, 'vasp_database.pkl.gz')
+    database_file_pkl = os.path.join(folder_path, "vasp_database.pkl")
+    database_file_gz = os.path.join(folder_path, "vasp_database.pkl.gz")
 
     # Determine which file exists and set the appropriate path and compression option
     if os.path.exists(database_file_gz):
         database_file = database_file_gz
-        compression_option = 'gzip'
+        compression_option = "gzip"
     elif os.path.exists(database_file_pkl):
         database_file = database_file_pkl
         compression_option = None
     else:
-        print("Error: neither 'vasp_database.pkl' nor 'vasp_database.pkl.gz' found in the specified folder.")
+        print(
+            "Error: neither 'vasp_database.pkl' nor 'vasp_database.pkl.gz' found in the specified folder."
+        )
         return
 
     # Load the database into a DataFrame with or without compression
@@ -29,20 +32,41 @@ def analyze_vasp_database(folder_path, output_compression=False):
     converged_jobs = df[df["convergence"] == True]
 
     # Determine compression option for output based on the user input
-    output_compression_option = 'gzip' if output_compression else None
+    output_compression_option = "gzip" if output_compression else None
 
     # Write the failed_jobs and converged_jobs DataFrames to separate pickle files with optional compression
-    failed_jobs.to_pickle(os.path.join(folder_path, 'failed_jobs.pkl.gz' if output_compression else 'failed_jobs.pkl'), compression=output_compression_option)
-    converged_jobs.to_pickle(os.path.join(folder_path, 'converged_jobs.pkl.gz' if output_compression else 'converged_jobs.pkl'), compression=output_compression_option)
+    failed_jobs.to_pickle(
+        os.path.join(
+            folder_path,
+            "failed_jobs.pkl.gz" if output_compression else "failed_jobs.pkl",
+        ),
+        compression=output_compression_option,
+    )
+    converged_jobs.to_pickle(
+        os.path.join(
+            folder_path,
+            "converged_jobs.pkl.gz" if output_compression else "converged_jobs.pkl",
+        ),
+        compression=output_compression_option,
+    )
 
     # Print the counts
     print(f"The number of failed jobs is: {len(failed_jobs)}")
     print(f"The number of successful jobs is: {len(converged_jobs)}")
     print(f"The total number of jobs is: {len(df)}")
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Analyze VASP database")
-    parser.add_argument("folder_path", type=str, help="Folder path containing 'vasp_database.pkl' or 'vasp_database.pkl.gz'")
-    parser.add_argument("--output_compression", action="store_true", help="Enable gzip compression for output pkl files")
+    parser.add_argument(
+        "folder_path",
+        type=str,
+        help="Folder path containing 'vasp_database.pkl' or 'vasp_database.pkl.gz'",
+    )
+    parser.add_argument(
+        "--output_compression",
+        action="store_true",
+        help="Enable gzip compression for output pkl files",
+    )
     args = parser.parse_args()
-    analyze_vasp_database(args.folder_path, args.output_compression)
\ No newline at end of file
+    analyze_vasp_database(args.folder_path, args.output_compression)
diff --git a/actual_usage/update_vasp_db.py b/actual_usage/update_vasp_db.py
index 1b7aaea..10334b3 100644
--- a/actual_usage/update_vasp_db.py
+++ b/actual_usage/update_vasp_db.py
@@ -3,20 +3,38 @@
 import warnings
 from multiprocessing import cpu_count
 
+
 def main():
     warnings.filterwarnings("ignore")
-    
+
     # Initialize argument parser
-    parser = argparse.ArgumentParser(description='Find and compress directories based on specified criteria.')
-    parser.add_argument('directory', metavar='DIR', type=str, help='the directory to operate on')
-    parser.add_argument('--max_dir_count', type=int, help='Maximum directory count for database generation')
-    parser.add_argument('--read_all_runs_in_dir', action='store_true', default=False, help='Read all runs in directory')
-    parser.add_argument('--read_error_runs_in_dir', action='store_true', default=False, help='Read directories with errors')
+    parser = argparse.ArgumentParser(
+        description="Find and compress directories based on specified criteria."
+    )
+    parser.add_argument(
+        "directory", metavar="DIR", type=str, help="the directory to operate on"
+    )
+    parser.add_argument(
+        "--max_dir_count",
+        type=int,
+        help="Maximum directory count for database generation",
+    )
+    parser.add_argument(
+        "--read_all_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read all runs in directory",
+    )
+    parser.add_argument(
+        "--read_error_runs_in_dir",
+        action="store_true",
+        default=False,
+        help="Read directories with errors",
+    )
     args = parser.parse_args()
 
-    datagen = DatabaseGenerator(args.directory,
-                                max_workers=cpu_count())
-    
+    datagen = DatabaseGenerator(args.directory, max_workers=cpu_count())
+
     # Check if max_dir_count is provided as an argument
     if args.max_dir_count is not None:
         max_dir_count = args.max_dir_count
@@ -24,11 +42,14 @@ def main():
         max_dir_count = 2000  # Default value
 
     # Call the update_failed_jobs_in_database function with the updated parameters
-    df = datagen.update_failed_jobs_in_database(df_path=args.directory,
-                                                read_error_dirs=args.read_error_runs_in_dir,
-                                                read_multiple_runs_in_dir=args.read_all_runs_in_dir,
-                                                max_dir_count=max_dir_count,
-                                                df_compression=True)
+    df = datagen.update_failed_jobs_in_database(
+        df_path=args.directory,
+        read_error_dirs=args.read_error_runs_in_dir,
+        read_multiple_runs_in_dir=args.read_all_runs_in_dir,
+        max_dir_count=max_dir_count,
+        df_compression=True,
+    )
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/setup.py b/setup.py
index e14f33d..7b443fa 100644
--- a/setup.py
+++ b/setup.py
@@ -1,25 +1,25 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='utils',
-    version='0.1',
-    packages=find_packages(where='utils'),
-    package_dir={'': 'utils'},
+    name="utils",
+    version="0.1",
+    packages=find_packages(where="utils"),
+    package_dir={"": "utils"},
     install_requires=[
-        'pandas',
-        'numpy',
-        'pymatgen',
+        "pandas",
+        "numpy",
+        "pymatgen",
     ],
     scripts=[
-        'actual_usage/check_jobdir',
-        'actual_usage/memory_check',
-        'actual_usage/slurm_list_jobdir',
-        'actual_usage/build_and_show_db',
-        'actual_usage/compress_here',
-        'actual_usage/qstat_slurm',
-        'actual_usage/summarise_db',
-        'actual_usage/setonix_refresh_mamba',
-        'actual_usage/setonix_refresh_mamba',
-        'actual_usage/update_failed_jobs_db',
+        "actual_usage/check_jobdir",
+        "actual_usage/memory_check",
+        "actual_usage/slurm_list_jobdir",
+        "actual_usage/build_and_show_db",
+        "actual_usage/compress_here",
+        "actual_usage/qstat_slurm",
+        "actual_usage/summarise_db",
+        "actual_usage/setonix_refresh_mamba",
+        "actual_usage/setonix_refresh_mamba",
+        "actual_usage/update_failed_jobs_db",
     ],
 )
diff --git a/tests/test_generic.py b/tests/test_generic.py
index adb1b8a..0a71e9a 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -7,21 +7,24 @@
 import filecmp
 
 # Import the function to be tested
-from utils.generic import (chunk_list,
-                           search_line_in_file,
-                           parse_lines,
-                           find_directories_with_files,
-                           extract_tarball,
-                           find_and_extract_tarballs_parallel,
-                           extract_files_from_tarball,
-                           extract_files_from_tarballs_parallel,
-                           find_and_extract_files_from_tarballs_parallel,
-                           compress_directory,
-                           compress_directory_parallel,
-                           cleanup_dir,
-                           compress_and_cleanup,
-                           find_and_compress_directories_parallel,
-                           is_line_in_file)
+from utils.generic import (
+    chunk_list,
+    search_line_in_file,
+    parse_lines,
+    find_directories_with_files,
+    extract_tarball,
+    find_and_extract_tarballs_parallel,
+    extract_files_from_tarball,
+    extract_files_from_tarballs_parallel,
+    find_and_extract_files_from_tarballs_parallel,
+    compress_directory,
+    compress_directory_parallel,
+    cleanup_dir,
+    compress_and_cleanup,
+    find_and_compress_directories_parallel,
+    is_line_in_file,
+)
+
 
 class TestChunkList(unittest.TestCase):
     def test_chunk_list(self):
@@ -48,21 +51,30 @@ def test_chunk_list(self):
         # Test with a large list and large n value
         lst5 = list(range(1, 1001))
         result5 = chunk_list(lst5, 100)
-        expected_result5 = [list(range(1, 101)), list(range(101, 201)), list(range(201, 301)),
-                            list(range(301, 401)), list(range(401, 501)), list(range(501, 601)),
-                            list(range(601, 701)), list(range(701, 801)), list(range(801, 901)),
-                            list(range(901, 1001))]
+        expected_result5 = [
+            list(range(1, 101)),
+            list(range(101, 201)),
+            list(range(201, 301)),
+            list(range(301, 401)),
+            list(range(401, 501)),
+            list(range(501, 601)),
+            list(range(601, 701)),
+            list(range(701, 801)),
+            list(range(801, 901)),
+            list(range(901, 1001)),
+        ]
         self.assertEqual(result5, expected_result5)
 
+
 class TestSearchLineInFile(unittest.TestCase):
     def setUp(self):
         # Create a temporary file and write some contents for testing
         self.temp_dir = tempfile.mkdtemp()
-        self.temp_file = os.path.join(self.temp_dir, 'test_file.txt')
-        with open(self.temp_file, 'w') as file:
-            file.write('This is the first line.\n')
-            file.write('This is the second line.\n')
-            file.write('This is the third line.\n')
+        self.temp_file = os.path.join(self.temp_dir, "test_file.txt")
+        with open(self.temp_file, "w") as file:
+            file.write("This is the first line.\n")
+            file.write("This is the second line.\n")
+            file.write("This is the third line.\n")
 
     def tearDown(self):
         # Remove the temporary directory and files after the test
@@ -70,28 +82,31 @@ def tearDown(self):
 
     def test_search_line_in_file(self):
         # Test searching for an existing line in the file
-        result = search_line_in_file(self.temp_file, 'second line')
+        result = search_line_in_file(self.temp_file, "second line")
         self.assertTrue(result)
 
         # Test searching for a non-existing line in the file
-        result = search_line_in_file(self.temp_file, 'fourth line')
+        result = search_line_in_file(self.temp_file, "fourth line")
         self.assertFalse(result)
 
     def test_search_line_in_file_with_depth(self):
         # Test searching for a line with a specified depth
-        result = search_line_in_file(self.temp_file, 'first line', search_depth=2, reverse=True)
+        result = search_line_in_file(
+            self.temp_file, "first line", search_depth=2, reverse=True
+        )
         self.assertFalse(result)
 
     def test_search_line_in_file_reverse(self):
         # Test searching for a line in reverse order
-        result = search_line_in_file(self.temp_file, 'third line', reverse=True)
+        result = search_line_in_file(self.temp_file, "third line", reverse=True)
         self.assertTrue(result)
 
     def test_search_line_in_file_file_not_found(self):
         # Test handling of file not found scenario
-        result = search_line_in_file('non_existing_file.txt', 'line')
+        result = search_line_in_file("non_existing_file.txt", "line")
         self.assertFalse(result)
 
+
 class TestParseLines(unittest.TestCase):
     def test_parse_lines(self):
         flist = [
@@ -100,14 +115,14 @@ def test_parse_lines(self):
             "1.0 2.0 3.0\n",
             "4.0 5.0 6.0\n",
             "Trigger End line\n",
-            "Footer line\n"
+            "Footer line\n",
         ]
         trigger_start = "Trigger Start"
         trigger_end = "Trigger End"
 
         result = parse_lines(flist, trigger_start, trigger_end)
         expected = [["1.0 2.0 3.0\n", "4.0 5.0 6.0\n"]]
-                
+
         np.testing.assert_array_equal(result, expected)
 
     def test_parse_lines_triggers_but_no_data(self):
@@ -115,43 +130,36 @@ def test_parse_lines_triggers_but_no_data(self):
             "Header line\n",
             "Trigger Start line\n",
             "Trigger End line\n",
-            "Footer line\n"
+            "Footer line\n",
         ]
         trigger_start = "Trigger Start"
         trigger_end = "Trigger End"
 
         result = parse_lines(flist, trigger_start, trigger_end)
         expected = [[]]
-        
+
         np.testing.assert_array_equal(result, expected)
-        
+
     def test_parse_lines_no_data(self):
-        flist = [
-            "Header line\n",
-            "Footer line\n"
-        ]
+        flist = ["Header line\n", "Footer line\n"]
         trigger_start = "Trigger Start"
         trigger_end = "Trigger End"
 
         result = parse_lines(flist, trigger_start, trigger_end)
         expected = []
-        
+
         np.testing.assert_array_equal(result, expected)
-    
+
     def test_parse_lines_no_endtrigger(self):
-        flist = [
-            "Header line\n",
-            "Trigger Start",
-            "1.0 2.0 3.0\n"
-        ]
+        flist = ["Header line\n", "Trigger Start", "1.0 2.0 3.0\n"]
         trigger_start = "Trigger Start"
         trigger_end = "Trigger End"
 
         result = parse_lines(flist, trigger_start, trigger_end)
         expected = [["1.0 2.0 3.0\n"]]
-        
+
         np.testing.assert_array_equal(result, expected)
-                
+
     def test_parse_lines_multiple_blocks(self):
         flist = [
             "Header line\n",
@@ -164,19 +172,22 @@ def test_parse_lines_multiple_blocks(self):
             "7.0 8.0 9.0\n",
             "10.0 11.0 12.0\n",
             "Trigger End line\n",
-            "Footer line\n"
+            "Footer line\n",
         ]
         trigger_start = "Trigger Start"
         trigger_end = "Trigger End"
 
         result = parse_lines(flist, trigger_start, trigger_end)
-        expected = [["1.0 2.0 3.0\n", "4.0 5.0 6.0\n"],
-                    ["7.0 8.0 9.0\n", "10.0 11.0 12.0\n"]]
-        
+        expected = [
+            ["1.0 2.0 3.0\n", "4.0 5.0 6.0\n"],
+            ["7.0 8.0 9.0\n", "10.0 11.0 12.0\n"],
+        ]
+
         np.testing.assert_array_equal(result, expected)
-        
+
+
 class TestFindDirectoriesWithFiles(unittest.TestCase):
-    
+
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
         self.create_test_files()
@@ -194,54 +205,71 @@ def create_test_files(self):
         dir2 = os.path.join(self.temp_dir, "dir2")
         dir3 = os.path.join(self.temp_dir, "dir3")
         dir4 = os.path.join(self.temp_dir, "dir4")
-        
+
         os.makedirs(dir1)
         os.makedirs(dir2)
         os.makedirs(dir3)
         os.makedirs(dir4)
-        
-        with open(os.path.join(dir1, "file1.txt"), 'w') as file:
+
+        with open(os.path.join(dir1, "file1.txt"), "w") as file:
             file.write("This is file 1 in dir 1")
-            
-        with open(os.path.join(dir1, "file2.txt"), 'w') as file:
+
+        with open(os.path.join(dir1, "file2.txt"), "w") as file:
             file.write("This is file 2 in dir 1")
-            
-        with open(os.path.join(dir2, "file2.txt"), 'w') as file:
+
+        with open(os.path.join(dir2, "file2.txt"), "w") as file:
             file.write("This is file 2")
 
-        with open(os.path.join(dir3, "file3.txt"), 'w') as file:
+        with open(os.path.join(dir3, "file3.txt"), "w") as file:
             file.write("This is file 3")
 
         with open(os.path.join(dir4, "file9.txt"), "w") as file:
             file.write("This is file 9")
 
     def test_find_all_files_present(self):
-        result = find_directories_with_files(self.temp_dir, ["file1.txt", "file2.txt", "file3.txt"], all_present=False)
-        expected = [os.path.join(self.temp_dir, "dir1"), os.path.join(self.temp_dir, "dir2"), os.path.join(self.temp_dir, "dir3")]
+        result = find_directories_with_files(
+            self.temp_dir, ["file1.txt", "file2.txt", "file3.txt"], all_present=False
+        )
+        expected = [
+            os.path.join(self.temp_dir, "dir1"),
+            os.path.join(self.temp_dir, "dir2"),
+            os.path.join(self.temp_dir, "dir3"),
+        ]
         self.assertTrue(set(result) == set(expected))
 
     def test_find_some_files_present(self):
-        result = find_directories_with_files(self.temp_dir, ["file1.txt", "file2.txt"], all_present=True)
+        result = find_directories_with_files(
+            self.temp_dir, ["file1.txt", "file2.txt"], all_present=True
+        )
         expected = [os.path.join(self.temp_dir, "dir1")]
         self.assertTrue(set(result) == set(expected))
 
     def test_find_any_files_present(self):
-        result = find_directories_with_files(self.temp_dir, ["file2.txt", "file3.txt"], all_present=False)
-        expected = [os.path.join(self.temp_dir, "dir1"), os.path.join(self.temp_dir, "dir2"), os.path.join(self.temp_dir, "dir3")]
+        result = find_directories_with_files(
+            self.temp_dir, ["file2.txt", "file3.txt"], all_present=False
+        )
+        expected = [
+            os.path.join(self.temp_dir, "dir1"),
+            os.path.join(self.temp_dir, "dir2"),
+            os.path.join(self.temp_dir, "dir3"),
+        ]
         self.assertTrue(set(result) == set(expected))
 
     def test_find_no_files_present(self):
-        result = find_directories_with_files(self.temp_dir, ["file4.txt", "file5.txt"], all_present=True)
+        result = find_directories_with_files(
+            self.temp_dir, ["file4.txt", "file5.txt"], all_present=True
+        )
         expected = []
         self.assertTrue(set(result) == set(expected))
 
+
 class TestExtractTarball(unittest.TestCase):
-    
+
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
-        self.temp_file = os.path.join(self.temp_dir, 'test_file.txt')
-        with open(self.temp_file, 'w') as file:
-            file.write('This is the first line.\n')
+        self.temp_file = os.path.join(self.temp_dir, "test_file.txt")
+        with open(self.temp_file, "w") as file:
+            file.write("This is the first line.\n")
         self.create_test_tarball()
 
     def tearDown(self):
@@ -264,13 +292,14 @@ def test_extract_tarball(self):
         extracted_file_path = os.path.join(extraction_path, "dir1", "test_file.txt")
         self.assertTrue(os.path.exists(extracted_file_path))
 
+
 class TestFindAndExtractTarballsParallel(unittest.TestCase):
-    
+
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
-        self.temp_file = os.path.join(self.temp_dir, 'test_file.txt')
-        with open(self.temp_file, 'w') as file:
-            file.write('This is the first line.\n')
+        self.temp_file = os.path.join(self.temp_dir, "test_file.txt")
+        with open(self.temp_file, "w") as file:
+            file.write("This is the first line.\n")
         self.create_test_tarballs()
 
     def tearDown(self):
@@ -301,22 +330,23 @@ def create_test_tarballs(self):
         test_tarball_path3 = os.path.join(dir2, "test3.tar.bz2")
         with tarfile.open(test_tarball_path3, "w:bz2") as tar:
             tar.add(self.temp_file, arcname="test_file3.txt")
-            
+
     def test_find_and_extract_tarballs_parallel(self):
         parent_dir = self.temp_dir
         tarball_extension = ".tar.gz"
 
         find_and_extract_tarballs_parallel(parent_dir, tarball_extension)
-        
+
         extracted_file_path1 = os.path.join(self.temp_dir, "dir1", "test_file1.txt")
         self.assertTrue(os.path.exists(extracted_file_path1))
 
         extracted_file_path2 = os.path.join(self.temp_dir, "dir2", "test_file2.txt")
         self.assertTrue(os.path.exists(extracted_file_path2))
-        
+
         extracted_file_path3 = os.path.join(self.temp_dir, "dir2", "test_file3.txt")
         self.assertFalse(os.path.exists(extracted_file_path3))
- 
+
+
 class TestExtractFilesFromTarball(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
@@ -334,10 +364,10 @@ def tearDown(self):
     def create_test_tarball(self):
         self.test_tarball_path = os.path.join(self.temp_dir, "test.tar.gz")
         test_file_path = os.path.join(os.path.dirname(__file__), "test_file.txt")
-        
+
         with open(test_file_path, "w") as file:
             file.write("This is the content of the test file.")
-            
+
         with tarfile.open(self.test_tarball_path, "w:gz") as tar:
             tar.add(test_file_path, arcname="dir1/test_file.txt")
 
@@ -346,18 +376,23 @@ def test_extract_files_from_tarball(self):
         filenames = ["test_file.txt"]
         suffix = None
 
-        extracted_filepaths = extract_files_from_tarball(tarball_filepath, filenames, suffix)
+        extracted_filepaths = extract_files_from_tarball(
+            tarball_filepath, filenames, suffix
+        )
 
         extracted_file_path = os.path.join(self.temp_dir, "dir1", "test_file.txt")
         self.assertTrue(os.path.exists(extracted_file_path))
         self.assertListEqual(extracted_filepaths, [extracted_file_path])
-               
+
+
 class TestExtractFilesFromTarballsParallel(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
         self.create_test_tarballs()
-        self.tarball_paths = [os.path.join(self.temp_dir, "dir1", "test1.tar.gz"),
-                              os.path.join(self.temp_dir, "dir2", "test2.tar.gz")]
+        self.tarball_paths = [
+            os.path.join(self.temp_dir, "dir1", "test1.tar.gz"),
+            os.path.join(self.temp_dir, "dir2", "test2.tar.gz"),
+        ]
 
     def tearDown(self):
         for root, dirs, files in os.walk(self.temp_dir, topdown=False):
@@ -375,7 +410,7 @@ def create_test_tarballs(self):
 
         # Create test tarball 1 (.tar.gz)
         test_tarball_path1 = os.path.join(dir1, "test1.tar.gz")
-        with open(os.path.join(dir1, "file1.txt"), 'w') as file:
+        with open(os.path.join(dir1, "file1.txt"), "w") as file:
             file.write("This is file 1")
 
         with tarfile.open(test_tarball_path1, "w:gz") as tar:
@@ -383,7 +418,7 @@ def create_test_tarballs(self):
 
         # Create test tarball 2 (.tar.gz)
         test_tarball_path2 = os.path.join(dir2, "test2.tar.gz")
-        with open(os.path.join(dir2, "file2.txt"), 'w') as file:
+        with open(os.path.join(dir2, "file2.txt"), "w") as file:
             file.write("This is file 2")
 
         with tarfile.open(test_tarball_path2, "w:gz") as tar:
@@ -396,7 +431,9 @@ def test_extract_files_from_tarballs_parallel(self):
         ]
         suffix = False
 
-        extract_files_from_tarballs_parallel(self.tarball_paths, filenames, suffix=suffix)
+        extract_files_from_tarballs_parallel(
+            self.tarball_paths, filenames, suffix=suffix
+        )
 
         extracted_file_path1 = os.path.join(self.temp_dir, "dir1", "file1.txt")
         self.assertTrue(os.path.exists(extracted_file_path1))
@@ -413,7 +450,9 @@ def test_extract_files_with_leading_dot(self):
         extract_files_from_tarballs_parallel(self.tarball_paths, filenames)
 
         for i, filename in enumerate(filenames):
-            extracted_filepath = os.path.join(os.path.dirname(self.tarball_paths[i]), filename[2:])
+            extracted_filepath = os.path.join(
+                os.path.dirname(self.tarball_paths[i]), filename[2:]
+            )
             self.assertTrue(os.path.exists(extracted_filepath))
 
     def test_extract_files_no_suffix(self):
@@ -425,9 +464,12 @@ def test_extract_files_no_suffix(self):
         extract_files_from_tarballs_parallel(self.tarball_paths, filenames)
 
         for i, filename in enumerate(filenames):
-            extracted_filepath = os.path.join(os.path.dirname(self.tarball_paths[i]), filename)
+            extracted_filepath = os.path.join(
+                os.path.dirname(self.tarball_paths[i]), filename
+            )
             self.assertTrue(os.path.exists(extracted_filepath))
 
+
 class TestFindAndExtractFilesFromTarballsParallel(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
@@ -449,7 +491,7 @@ def create_test_tarballs(self):
 
         # Create test tarball 1 (.tar.gz)
         test_tarball_path1 = os.path.join(dir1, "test1.tar.gz")
-        with open(os.path.join(dir1, "file1.txt"), 'w') as file:
+        with open(os.path.join(dir1, "file1.txt"), "w") as file:
             file.write("This is file 1")
 
         with tarfile.open(test_tarball_path1, "w:gz") as tar:
@@ -457,7 +499,7 @@ def create_test_tarballs(self):
 
         # Create test tarball 2 (.tar.gz)
         test_tarball_path2 = os.path.join(dir2, "test2.tar.gz")
-        with open(os.path.join(dir2, "file2.txt"), 'w') as file:
+        with open(os.path.join(dir2, "file2.txt"), "w") as file:
             file.write("This is file 2")
 
         with tarfile.open(test_tarball_path2, "w:gz") as tar:
@@ -469,14 +511,17 @@ def test_find_and_extract_files_from_tarballs_parallel(self):
         filenames = ["file1.txt", "file2.txt"]
         suffix = False
 
-        find_and_extract_files_from_tarballs_parallel(parent_dir, extension, filenames, suffix)
+        find_and_extract_files_from_tarballs_parallel(
+            parent_dir, extension, filenames, suffix
+        )
 
         extracted_file_path1 = os.path.join(self.temp_dir, "dir1", "file1.txt")
         self.assertTrue(os.path.exists(extracted_file_path1))
 
         extracted_file_path2 = os.path.join(self.temp_dir, "dir2", "file2.txt")
         self.assertTrue(os.path.exists(extracted_file_path2))
-        
+
+
 class TestCompressDirectory(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
@@ -497,11 +542,11 @@ def create_test_directory(self):
 
         # Create test files in the directory
         file1_path = os.path.join(dir_path, "file1.txt")
-        with open(file1_path, 'w') as file:
+        with open(file1_path, "w") as file:
             file.write("This is file 1")
 
         file2_path = os.path.join(dir_path, "file2.txt")
-        with open(file2_path, 'w') as file:
+        with open(file2_path, "w") as file:
             file.write("This is file 2")
 
     def test_compress_directory(self):
@@ -511,7 +556,13 @@ def test_compress_directory(self):
         print_message = False
         inside_dir = True
 
-        compress_directory(directory_path, exclude_files, exclude_file_patterns, print_message, inside_dir)
+        compress_directory(
+            directory_path,
+            exclude_files,
+            exclude_file_patterns,
+            print_message,
+            inside_dir,
+        )
 
         compressed_file_path = os.path.join(self.temp_dir, "test_dir/test_dir.tar.gz")
         self.assertTrue(os.path.exists(compressed_file_path))
@@ -520,7 +571,8 @@ def test_compress_directory(self):
             file_names = tar.getnames()
             self.assertTrue(any(name.endswith("file1.txt") for name in file_names))
             self.assertFalse(any(name.endswith("file2.txt") for name in file_names))
-            
+
+
 class TestCompressDirectoryParallel(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
@@ -552,14 +604,20 @@ def create_test_directories(self):
     def test_compress_directory_parallel(self):
         directory_paths = [
             os.path.join(self.temp_dir, "dir1"),
-            os.path.join(self.temp_dir, "dir2")
+            os.path.join(self.temp_dir, "dir2"),
         ]
         exclude_files = [["file1.txt"], ["file2.txt"]]
         exclude_file_patterns = []
         print_message = [False]
         inside_dir = [True]
 
-        compress_directory_parallel(directory_paths, exclude_files, exclude_file_patterns, print_message, inside_dir)
+        compress_directory_parallel(
+            directory_paths,
+            exclude_files,
+            exclude_file_patterns,
+            print_message,
+            inside_dir,
+        )
 
         compressed_file_path1 = os.path.join(self.temp_dir, "dir1/dir1.tar.gz")
         self.assertTrue(os.path.exists(compressed_file_path1))
@@ -577,6 +635,7 @@ def test_compress_directory_parallel(self):
             self.assertFalse(any(name.endswith("file1.txt") for name in file_names))
             self.assertTrue(any(name.endswith("file2.txt") for name in file_names))
 
+
 class TestCleanupDir(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
@@ -608,7 +667,7 @@ def test_cleanup_dir_keep(self):
         keep = True
         files = ["file1.txt"]
         file_patterns = []
-        
+
         self.create_test_files()
         cleanup_dir(directory_path, keep, files, file_patterns)
 
@@ -637,7 +696,8 @@ def test_cleanup_dir_remove(self):
 
         file3_path = os.path.join(self.temp_dir, "file3.txt")
         self.assertTrue(os.path.exists(file3_path))
-        
+
+
 class TestCompressAndCleanup(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
@@ -674,8 +734,16 @@ def test_compress_and_cleanup_keep(self):
         print_msg = False
         inside_dir = True
 
-        compress_and_cleanup(directory_path, exclude_files_from_tarball, exclude_filepatterns_from_tarball,
-                             keep_after, files, file_patterns, print_msg, inside_dir)
+        compress_and_cleanup(
+            directory_path,
+            exclude_files_from_tarball,
+            exclude_filepatterns_from_tarball,
+            keep_after,
+            files,
+            file_patterns,
+            print_msg,
+            inside_dir,
+        )
 
         file1_path = os.path.join(self.temp_dir, "file1.txt")
         self.assertTrue(os.path.exists(file1_path))
@@ -686,7 +754,9 @@ def test_compress_and_cleanup_keep(self):
         file3_path = os.path.join(self.temp_dir, "file3.txt")
         self.assertFalse(os.path.exists(file3_path))
 
-        compressed_file_path = os.path.join(self.temp_dir, os.path.basename(self.temp_dir) + ".tar.gz")
+        compressed_file_path = os.path.join(
+            self.temp_dir, os.path.basename(self.temp_dir) + ".tar.gz"
+        )
         self.assertTrue(os.path.exists(compressed_file_path))
 
     def test_compress_and_cleanup_remove(self):
@@ -699,8 +769,16 @@ def test_compress_and_cleanup_remove(self):
         print_msg = False
         inside_dir = True
 
-        compress_and_cleanup(directory_path, exclude_files_from_tarball, exclude_filepatterns_from_tarball,
-                             keep_after, files, file_patterns, print_msg, inside_dir)
+        compress_and_cleanup(
+            directory_path,
+            exclude_files_from_tarball,
+            exclude_filepatterns_from_tarball,
+            keep_after,
+            files,
+            file_patterns,
+            print_msg,
+            inside_dir,
+        )
 
         file1_path = os.path.join(self.temp_dir, "file1.txt")
         self.assertFalse(os.path.exists(file1_path))
@@ -711,12 +789,15 @@ def test_compress_and_cleanup_remove(self):
         file3_path = os.path.join(self.temp_dir, "file3.txt")
         self.assertTrue(os.path.exists(file3_path))
 
-        compressed_file_path = os.path.join(self.temp_dir, os.path.basename(self.temp_dir) + ".tar.gz")
+        compressed_file_path = os.path.join(
+            self.temp_dir, os.path.basename(self.temp_dir) + ".tar.gz"
+        )
         self.assertTrue(os.path.exists(compressed_file_path))
 
         # compressed_dir_path = os.path.join(self.temp_dir, "test_dir")
         # self.assertFalse(os.path.exists(compressed_dir_path))
 
+
 class TestCompressDirectoryParallel(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
@@ -746,13 +827,22 @@ def create_test_directories(self):
             file.write("This is file 2")
 
     def test_compress_directory_parallel(self):
-        directory_paths = [os.path.join(self.temp_dir, "dir1"), os.path.join(self.temp_dir, "dir2")]
+        directory_paths = [
+            os.path.join(self.temp_dir, "dir1"),
+            os.path.join(self.temp_dir, "dir2"),
+        ]
         exclude_files = []
         exclude_file_patterns = []
         print_message = False
         inside_dir = True
 
-        compress_directory_parallel(directory_paths, exclude_files, exclude_file_patterns, print_message, inside_dir)
+        compress_directory_parallel(
+            directory_paths,
+            exclude_files,
+            exclude_file_patterns,
+            print_message,
+            inside_dir,
+        )
 
         compressed_file_path1 = os.path.join(self.temp_dir, "dir1/dir1.tar.gz")
         self.assertTrue(os.path.exists(compressed_file_path1))
@@ -772,8 +862,17 @@ def test_compress_directory_parallel(self):
         extracted_file_path2 = os.path.join(self.temp_dir, "dir2", "file2.txt")
         self.assertTrue(os.path.exists(extracted_file_path2))
 
-        self.assertTrue(filecmp.cmp(extracted_file_path1, os.path.join(self.temp_dir, "dir1", "file1.txt")))
-        self.assertTrue(filecmp.cmp(extracted_file_path2, os.path.join(self.temp_dir, "dir2", "file2.txt")))
+        self.assertTrue(
+            filecmp.cmp(
+                extracted_file_path1, os.path.join(self.temp_dir, "dir1", "file1.txt")
+            )
+        )
+        self.assertTrue(
+            filecmp.cmp(
+                extracted_file_path2, os.path.join(self.temp_dir, "dir2", "file2.txt")
+            )
+        )
+
 
 class TestFindAndCompressDirectoriesParallel(unittest.TestCase):
     def setUp(self):
@@ -822,10 +921,19 @@ def test_find_and_compress_directories_parallel(self):
         print_msg = False
         inside_dir = True
         all_present = False
-        
-        find_and_compress_directories_parallel(parent_dir, valid_dir_if_filenames, all_present, exclude_files_from_tarball,
-                                               exclude_filepatterns_from_tarball, keep_after, files, file_patterns,
-                                               print_msg, inside_dir)
+
+        find_and_compress_directories_parallel(
+            parent_dir,
+            valid_dir_if_filenames,
+            all_present,
+            exclude_files_from_tarball,
+            exclude_filepatterns_from_tarball,
+            keep_after,
+            files,
+            file_patterns,
+            print_msg,
+            inside_dir,
+        )
 
         compressed_file_path1 = os.path.join(self.temp_dir, "dir1/dir1.tar.gz")
         self.assertTrue(os.path.exists(compressed_file_path1))
@@ -847,17 +955,26 @@ def test_find_and_compress_directories_parallel(self):
 
         extracted_file_path2 = os.path.join(self.temp_dir, "dir2", "file2.txt")
         self.assertTrue(os.path.exists(extracted_file_path2))
-        
+
         self.assertFalse(os.path.exists(compressed_file_path3))
 
-        self.assertTrue(filecmp.cmp(extracted_file_path1, os.path.join(self.temp_dir, "dir1", "file1.txt")))
-        self.assertTrue(filecmp.cmp(extracted_file_path1, os.path.join(self.temp_dir, "dir1", "file1.txt")))                  
-          
+        self.assertTrue(
+            filecmp.cmp(
+                extracted_file_path1, os.path.join(self.temp_dir, "dir1", "file1.txt")
+            )
+        )
+        self.assertTrue(
+            filecmp.cmp(
+                extracted_file_path1, os.path.join(self.temp_dir, "dir1", "file1.txt")
+            )
+        )
+
+
 class TestIsLineInFile(unittest.TestCase):
 
     def test_exact_match_line_present(self):
         # Create a temporary file with some lines
-        with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file:
+        with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
             temp_file.write("Line 1\n")
             temp_file.write("Line 2\n")
             temp_file.write("Line 3\n")
@@ -880,7 +997,7 @@ def test_exact_match_line_not_present(self):
 
     def test_partial_match_line_present(self):
         # Create a temporary file with some lines
-        with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file:
+        with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
             temp_file.write("Hello, world!\n")
             temp_file.write("Goodbye, world!\n")
 
@@ -893,7 +1010,7 @@ def test_partial_match_line_present(self):
 
     def test_partial_match_line_not_present(self):
         # Create a temporary file with some lines
-        with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file:
+        with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
             temp_file.write("Hello, world!\n")
             temp_file.write("Goodbye, world!\n")
 
@@ -903,6 +1020,7 @@ def test_partial_match_line_not_present(self):
 
         result = is_line_in_file(filepath, line_to_search, exact_match)
         self.assertFalse(result)
-                                    
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_vasp.py b/tests/test_vasp.py
index a1fdb76..07aff0c 100644
--- a/tests/test_vasp.py
+++ b/tests/test_vasp.py
@@ -3,7 +3,8 @@
 import os
 import shutil
 
-from utils.vasp.database import (find_vasp_directories)
+from utils.vasp.database import find_vasp_directories
+
 
 class TestFindVaspDirectories(unittest.TestCase):
     def setUp(self):
@@ -57,14 +58,16 @@ def test_find_vasp_directories(self):
         all_present = False
         extract_tarballs = True
 
-        directories = find_vasp_directories(parent_dir, filenames, all_present, extract_tarballs)
+        directories = find_vasp_directories(
+            parent_dir, filenames, all_present, extract_tarballs
+        )
 
         self.assertEqual(len(directories), 2)
 
         expected_dirs = ["dir2", "dir3"]
         for dir_name in expected_dirs:
             self.assertIn(dir_name, [os.path.basename(dir) for dir in directories])
-            
+
     def test_find_vasp_directories_negative(self):
         # Create a temporary empty directory to test the negative case
         empty_dir = tempfile.mkdtemp()
@@ -75,10 +78,13 @@ def test_find_vasp_directories_negative(self):
         all_present = True
         extract_tarballs = True
 
-        directories = find_vasp_directories(parent_dir, filenames, all_present, extract_tarballs)
+        directories = find_vasp_directories(
+            parent_dir, filenames, all_present, extract_tarballs
+        )
 
         # Assert that the function returns an empty list as there are no directories that meet the criteria
         self.assertEqual(len(directories), 0)
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/utils/GNN_calculators/mace.py b/utils/GNN_calculators/mace.py
index 6d1918a..f53b6fc 100644
--- a/utils/GNN_calculators/mace.py
+++ b/utils/GNN_calculators/mace.py
@@ -17,6 +17,7 @@
 
 df = pd.read_pickle("unrel_df.pkl")
 
+
 def calc_static_CHGNET(structure, chgnet=None):
     try:
         if chgnet is None:
@@ -27,10 +28,13 @@ def calc_static_CHGNET(structure, chgnet=None):
         forces = chgnet_pred["f"]
         magmoms = chgnet_pred["m"]
     except Exception as e:
-        print(f"CHGNET evaluation failed with exception: {e} \n Probably the element you are trying does not exist in their dataset")
+        print(
+            f"CHGNET evaluation failed with exception: {e} \n Probably the element you are trying does not exist in their dataset"
+        )
         return np.nan, np.nan, np.nan
     return toten, forces, magmoms
 
+
 def calc_static_M3GNET(structure, m3gnet=None):
     try:
         if m3gnet is None:
@@ -40,27 +44,38 @@ def calc_static_M3GNET(structure, m3gnet=None):
         toten = atoms.get_potential_energy()
         forces = atoms.get_forces()
     except Exception as e:
-        print(f"M3GNET evaluation failed with exception: {e} \n Probably the element you are trying does not exist in their dataset")
+        print(
+            f"M3GNET evaluation failed with exception: {e} \n Probably the element you are trying does not exist in their dataset"
+        )
         return np.nan, np.nan, np.nan
     return toten, forces, np.nan
 
-def calc_static_MACE(structure, MACE="/g/data/v43/Han/mace/mace/calculators/foundations_models/2023-08-14-mace-universal.model", device="cpu", default_dtype="float32"):
+
+def calc_static_MACE(
+    structure,
+    MACE="/g/data/v43/Han/mace/mace/calculators/foundations_models/2023-08-14-mace-universal.model",
+    device="cpu",
+    default_dtype="float32",
+):
     try:
         MACE_calculator = MACECalculator(
-                                          model_paths=MACE,
-                                          device=device,
-                                          default_dtype=default_dtype,
-                                        )
+            model_paths=MACE,
+            device=device,
+            default_dtype=default_dtype,
+        )
         atoms = AseAtomsAdaptor().get_atoms(structure)
         atoms.set_calculator(MACE_calculator)
         toten = atoms.get_potential_energy()
         forces = atoms.get_forces()
     except Exception as e:
-        print(f"MACE evaluation failed with exception: {e} \n Probably the element you are trying does not exist in their dataset")
+        print(
+            f"MACE evaluation failed with exception: {e} \n Probably the element you are trying does not exist in their dataset"
+        )
         return np.nan, np.nan, np.nan
     return toten, forces, np.nan
 
-def calc_static_GNN(structure, model_type = None, model = None):
+
+def calc_static_GNN(structure, model_type=None, model=None):
     if model_type == "mace":
         toten, forces, magmoms = calc_static_MACE(structure)
     elif model_type == "m3gnet":
@@ -68,11 +83,15 @@ def calc_static_GNN(structure, model_type = None, model = None):
     elif model_type == "chgnet":
         toten, forces, magmoms = calc_static_CHGNET(structure, chgnet=model)
     else:
-        warnings.warn(f"Specified model {model} is not a valid calculator, returning np.nan")
+        warnings.warn(
+            f"Specified model {model} is not a valid calculator, returning np.nan"
+        )
         toten = np.nan
         forces = np.nan
         magmoms = np.nan
     return toten, forces, magmoms
+
+
 for model_type in ["chgnet"]:
     pureGB_toten_lst = []
     pureslab_toten_lst = []
@@ -105,16 +124,24 @@ def calc_static_GNN(structure, model_type = None, model = None):
 
     # Wrap the outer loop with tqdm to add a progress bar
     for idx, row in tqdm(df.iterrows(), total=len(df), desc=f"Model: {model_type}"):
-        i+=1
-        #if i > 5:
+        i += 1
+        # if i > 5:
         #    break
         pureGB_start = time.time()
 
         # Call the calc_static_GNN function
-        pureGB_toten, pureGB_f, pureGB_m = calc_static_GNN(row.struct_pureGB, model_type=model_type, model=model)
-        pureslab_toten, pureslab_f, pureslab_m = calc_static_GNN(row.struct_pureSLAB, model_type=model_type, model=model)
-        segGB_toten, segGB_f, segGB_m = calc_static_GNN(row.struct_segGB, model_type=model_type, model=model)
-        solslab_toten, solslab_f, solslab_m = calc_static_GNN(row.struct_solSLAB, model_type=model_type, model=model)
+        pureGB_toten, pureGB_f, pureGB_m = calc_static_GNN(
+            row.struct_pureGB, model_type=model_type, model=model
+        )
+        pureslab_toten, pureslab_f, pureslab_m = calc_static_GNN(
+            row.struct_pureSLAB, model_type=model_type, model=model
+        )
+        segGB_toten, segGB_f, segGB_m = calc_static_GNN(
+            row.struct_segGB, model_type=model_type, model=model
+        )
+        solslab_toten, solslab_f, solslab_m = calc_static_GNN(
+            row.struct_solSLAB, model_type=model_type, model=model
+        )
 
         # Append values to the corresponding lists
         pureGB_toten_lst.append(pureGB_toten)
@@ -133,11 +160,11 @@ def calc_static_GNN(structure, model_type = None, model = None):
         solslab_m_lst.append(solslab_m)
 
         # Calculate the energy of segregation at each step and append to the list
-        eseg = (
-            segGB_toten - pureGB_toten - (solslab_toten - pureslab_toten)
-        )
+        eseg = segGB_toten - pureGB_toten - (solslab_toten - pureslab_toten)
         eseg_lst.append(eseg)
-        print(f"{row.job_name}: Eseg = {eseg_lst[-1]}, DFT = {row.E_seg_DFT}, error_Eseg = {eseg_lst[-1] - row.E_seg_DFT}")
+        print(
+            f"{row.job_name}: Eseg = {eseg_lst[-1]}, DFT = {row.E_seg_DFT}, error_Eseg = {eseg_lst[-1] - row.E_seg_DFT}"
+        )
         print(f"Row processing time: {time.time() - pureGB_start:.4f} seconds")
 
     model_elapsed_time = time.time() - start_time
@@ -160,7 +187,7 @@ def calc_static_GNN(structure, model_type = None, model = None):
     df[f"segGB_m_{model_type}"] = segGB_m_lst
     df[f"solslab_m_{model_type}"] = solslab_m_lst
 
-    #df.to_pickle(f"df_{model_type}.pkl")
+    # df.to_pickle(f"df_{model_type}.pkl")
 
     # Attach energy of segregation to the DataFrame with the corresponding suffix
     df[f"eseg_{model_type}"] = eseg_lst
diff --git a/utils/StructureManipulator/cleave.py b/utils/StructureManipulator/cleave.py
index 6cc46b5..bb75e5d 100644
--- a/utils/StructureManipulator/cleave.py
+++ b/utils/StructureManipulator/cleave.py
@@ -11,6 +11,7 @@
 
 # RIPPED FROM MPINTERFACES
 
+
 def center_slab(structure):
     """
     Centers the atoms in a slab structure around 0.5
@@ -27,6 +28,7 @@ def center_slab(structure):
     structure.translate_sites(range(len(structure.sites)), translation)
     return structure
 
+
 def get_rotation_matrix(axis, theta):
     """
     Find the rotation matrix associated with counterclockwise rotation
@@ -43,16 +45,21 @@ def get_rotation_matrix(axis, theta):
 
     axis = np.array(list(axis))
     axis = axis / np.linalg.norm(axis)
-    axis *= -np.sin(theta/2.0)
-    a = np.cos(theta/2.0)
+    axis *= -np.sin(theta / 2.0)
+    a = np.cos(theta / 2.0)
     b, c, d = tuple(axis.tolist())
-    aa, bb, cc, dd = a*a, b*b, c*c, d*d
-    bc, ad, ac, ab, bd, cd = b*c, a*d, a*c, a*b, b*d, c*d
-    return np.array([[aa+bb-cc-dd, 2*(bc+ad), 2*(bd-ac)],
-                     [2*(bc-ad), aa+cc-bb-dd, 2*(cd+ab)],
-                     [2*(bd+ac), 2*(cd-ab), aa+dd-bb-cc]])
-
-def align_axis(structure, axis='c', direction=(0, 0, 1)):
+    aa, bb, cc, dd = a * a, b * b, c * c, d * d
+    bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
+    return np.array(
+        [
+            [aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
+            [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
+            [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc],
+        ]
+    )
+
+
+def align_axis(structure, axis="c", direction=(0, 0, 1)):
     """
     Rotates a structure so that the specified axis is along
     the [001] direction. This is useful for adding vacuum, and
@@ -66,23 +73,23 @@ def align_axis(structure, axis='c', direction=(0, 0, 1)):
         structure. Rotated to align axis along direction.
     """
 
-    if axis == 'a':
+    if axis == "a":
         axis = structure.lattice._matrix[0]
-    elif axis == 'b':
+    elif axis == "b":
         axis = structure.lattice._matrix[1]
-    elif axis == 'c':
+    elif axis == "c":
         axis = structure.lattice._matrix[2]
     proj_axis = np.cross(axis, direction)
-    if not(proj_axis[0] == 0 and proj_axis[1] == 0):
-        theta = (
-            np.arccos(np.dot(axis, direction)
-            / (np.linalg.norm(axis) * np.linalg.norm(direction)))
+    if not (proj_axis[0] == 0 and proj_axis[1] == 0):
+        theta = np.arccos(
+            np.dot(axis, direction) / (np.linalg.norm(axis) * np.linalg.norm(direction))
         )
         R = get_rotation_matrix(proj_axis, theta)
         rotation = SymmOp.from_rotation_and_translation(rotation_matrix=R)
         structure.apply_operation(rotation)
     return structure
 
+
 def add_vacuum(structure, vacuum):
     """
     Adds padding to a slab or 2D material.
@@ -103,21 +110,28 @@ def add_vacuum(structure, vacuum):
     structure = Structure(lattice_C, species, coords, coords_are_cartesian=True)
     return center_slab(structure)
 
+
 def cleave_sites(structure, cleave_line_coord, vacuum_size):
-    site_list = []; site_list2 = []
+    site_list = []
+    site_list2 = []
     for idx, sites in enumerate(structure):
         if sites.frac_coords[-1] > cleave_line_coord:
-            #print(idx)
+            # print(idx)
             site_list.append(idx)
         else:
-            #print(idx)
+            # print(idx)
             site_list2.append(idx)
-    transformation_shift_up = transform.TranslateSitesTransformation(site_list,(0,0,vacuum_size/2),vector_in_frac_coords=False)
-    transformation_shift_down = transform.TranslateSitesTransformation(site_list2,(0,0,-vacuum_size/2),vector_in_frac_coords=False)
+    transformation_shift_up = transform.TranslateSitesTransformation(
+        site_list, (0, 0, vacuum_size / 2), vector_in_frac_coords=False
+    )
+    transformation_shift_down = transform.TranslateSitesTransformation(
+        site_list2, (0, 0, -vacuum_size / 2), vector_in_frac_coords=False
+    )
     cleaved_cell = transformation_shift_up.apply_transformation(structure)
     cleaved_cell = transformation_shift_down.apply_transformation(cleaved_cell)
     return cleaved_cell
 
+
 def get_unique_values_in_nth_value(arr_list, n, tolerance):
     """
     Returns unique values in the n-th element of sublists in arr_list within a specified tolerance.
@@ -142,6 +156,7 @@ def get_unique_values_in_nth_value(arr_list, n, tolerance):
             unique_values.append(value)
     return np.sort(unique_values)
 
+
 def compute_average_pairs(lst):
     """
     Computes the average of consecutive pairs in the given list.
@@ -158,6 +173,7 @@ def compute_average_pairs(lst):
         averages.append(average)
     return averages
 
+
 def get_non_host_ele_idx(structure, host_elements):
     """
     Returns the indices of non-host elements in the structure.
@@ -169,10 +185,17 @@ def get_non_host_ele_idx(structure, host_elements):
     Returns:
     - list: Indices of non-host elements in the structure.
     """
-    non_host_indices = [i for i, site in enumerate(structure) if site.species_string not in host_elements]
+    non_host_indices = [
+        i
+        for i, site in enumerate(structure)
+        if site.species_string not in host_elements
+    ]
     return non_host_indices
 
-def get_min_max_cp_coords_solute(structure, host_elements, axis, threshold=5, fractional=True):
+
+def get_min_max_cp_coords_solute(
+    structure, host_elements, axis, threshold=5, fractional=True
+):
     """
     Returns the minimum and maximum coordinates of solute elements along the specified axis.
 
@@ -192,15 +215,18 @@ def get_min_max_cp_coords_solute(structure, host_elements, axis, threshold=5, fr
     for site_idx in non_host_indices:
         coord = structure[site_idx].frac_coords[axis]
         if max_coord is None or coord > max_coord:
-            max_coord = (coord + threshold/structure.lattice.abc[axis]) 
+            max_coord = coord + threshold / structure.lattice.abc[axis]
         if min_coord is None or coord < min_coord:
-            min_coord = (coord - threshold/structure.lattice.abc[axis])
+            min_coord = coord - threshold / structure.lattice.abc[axis]
     if not fractional:
         max_coord = max_coord * structure.lattice.abc[axis]
         min_coord = min_coord * structure.lattice.abc[axis]
     return [min_coord, max_coord]
 
-def get_cp_coords_solute(structure, host_elements, axis, threshold=5, tolerance=0.01, fractional=True):
+
+def get_cp_coords_solute(
+    structure, host_elements, axis, threshold=5, tolerance=0.01, fractional=True
+):
     """
     Returns viable coordinates for solute elements within a specified range along the specified axis.
 
@@ -215,11 +241,17 @@ def get_cp_coords_solute(structure, host_elements, axis, threshold=5, tolerance=
     Returns:
     - list: List of viable coordinates for solute elements.
     """
-    min_max = get_min_max_cp_coords_solute(structure, host_elements, axis, fractional=fractional, threshold=threshold)
+    min_max = get_min_max_cp_coords_solute(
+        structure, host_elements, axis, fractional=fractional, threshold=threshold
+    )
     if fractional:
-        atomic_layers = get_unique_values_in_nth_value(structure.frac_coords, -1, tolerance=tolerance/structure.lattice.abc[axis])
+        atomic_layers = get_unique_values_in_nth_value(
+            structure.frac_coords, -1, tolerance=tolerance / structure.lattice.abc[axis]
+        )
     else:
-        atomic_layers = get_unique_values_in_nth_value(structure.cart_coords, -1, tolerance=tolerance)
+        atomic_layers = get_unique_values_in_nth_value(
+            structure.cart_coords, -1, tolerance=tolerance
+        )
     cp_list = compute_average_pairs(atomic_layers)
     min_cp_thres = min_max[0]
     max_cp_thres = min_max[1]
@@ -227,14 +259,17 @@ def get_cp_coords_solute(structure, host_elements, axis, threshold=5, tolerance=
     cp_viable = [cp for cp in cp_list if min_cp_thres <= cp <= max_cp_thres]
     return cp_viable
 
-def cleave_structure(structure, cleave_line_coord, cleave_vacuum_length, axis, fractional=True):
+
+def cleave_structure(
+    structure, cleave_line_coord, cleave_vacuum_length, axis, fractional=True
+):
     """
-    Cleaves the structure along a specified coordinate line. 
-    Assumes vacuum is already present! 
+    Cleaves the structure along a specified coordinate line.
+    Assumes vacuum is already present!
     If not, please:
-    
-    structure = add_vacuum(structure) 
-    
+
+    structure = add_vacuum(structure)
+
     before this!
 
     Parameters:
@@ -247,47 +282,76 @@ def cleave_structure(structure, cleave_line_coord, cleave_vacuum_length, axis, f
     Returns:
     - pymatgen.Structure: Cleaved structure.
     """
-    site_list = []; site_list2 = []
+    site_list = []
+    site_list2 = []
     for idx, sites in enumerate(structure):
         if fractional:
-            if sites.frac_coords[axis] > cleave_line_coord:     
+            if sites.frac_coords[axis] > cleave_line_coord:
                 site_list.append(idx)
             else:
                 site_list2.append(idx)
         else:
-            if sites.coords[axis] > cleave_line_coord:     
+            if sites.coords[axis] > cleave_line_coord:
                 site_list.append(idx)
             else:
                 site_list2.append(idx)
     shift = [0, 0, 0]
-    shift[axis] = cleave_vacuum_length/2
+    shift[axis] = cleave_vacuum_length / 2
     shift2 = shift.copy()
-    shift2[axis] = -cleave_vacuum_length/2
-    transformation_shift_up = transform.TranslateSitesTransformation(site_list,tuple(shift),vector_in_frac_coords=False)
-    transformation_shift_down = transform.TranslateSitesTransformation(site_list2,tuple(shift2),vector_in_frac_coords=False)
+    shift2[axis] = -cleave_vacuum_length / 2
+    transformation_shift_up = transform.TranslateSitesTransformation(
+        site_list, tuple(shift), vector_in_frac_coords=False
+    )
+    transformation_shift_down = transform.TranslateSitesTransformation(
+        site_list2, tuple(shift2), vector_in_frac_coords=False
+    )
     cleaved_struct = transformation_shift_up.apply_transformation(structure)
     cleaved_struct = transformation_shift_down.apply_transformation(cleaved_struct)
     return cleaved_struct
 
-def cleave_structure_around_solutes(structure,
-                                    host_elements,
-                                    axis=2,
-                                    cleave_vacuum_length=6,
-                                    sol_dist_threshold=5,
-                                    tolerance=0.01,
-                                    add_vacuum_block_length=None):
+
+def cleave_structure_around_solutes(
+    structure,
+    host_elements,
+    axis=2,
+    cleave_vacuum_length=6,
+    sol_dist_threshold=5,
+    tolerance=0.01,
+    add_vacuum_block_length=None,
+):
     if add_vacuum_block_length is not None:
-        structure = add_vacuum(structure,vacuum=add_vacuum_block_length)
-    cp_coords = get_cp_coords_solute(structure, host_elements=host_elements, axis=axis, threshold=sol_dist_threshold, tolerance=tolerance)
+        structure = add_vacuum(structure, vacuum=add_vacuum_block_length)
+    cp_coords = get_cp_coords_solute(
+        structure,
+        host_elements=host_elements,
+        axis=axis,
+        threshold=sol_dist_threshold,
+        tolerance=tolerance,
+    )
     cleaved_struct_list = []
     for cp in cp_coords:
-        cleaved_struct = cleave_structure(structure,cleave_line_coord=cp,cleave_vacuum_length=cleave_vacuum_length, axis=axis)
+        cleaved_struct = cleave_structure(
+            structure,
+            cleave_line_coord=cp,
+            cleave_vacuum_length=cleave_vacuum_length,
+            axis=axis,
+        )
         cleaved_struct_list.append(cleaved_struct)
     return cleaved_struct_list
 
-def cleave_structures_around_site(structure, site_index, axis=2, cleave_vacuum_length=6, site_dist_threshold=5, tolerance=0.01, add_vacuum_block_length=None, fractional=True):
+
+def cleave_structures_around_site(
+    structure,
+    site_index,
+    axis=2,
+    cleave_vacuum_length=6,
+    site_dist_threshold=5,
+    tolerance=0.01,
+    add_vacuum_block_length=None,
+    fractional=True,
+):
     """
-    Cleaves a structure around a specified site. Assumes vacuum is already present! 
+    Cleaves a structure around a specified site. Assumes vacuum is already present!
     If not, add vacuum before this.
 
     Parameters:
@@ -306,11 +370,15 @@ def cleave_structures_around_site(structure, site_index, axis=2, cleave_vacuum_l
     if add_vacuum_block_length is not None:
         structure = add_vacuum(structure, vacuum=add_vacuum_block_length)
 
-    site_coord = structure[site_index].frac_coords[axis] if fractional else structure[site_index].coords[axis]
+    site_coord = (
+        structure[site_index].frac_coords[axis]
+        if fractional
+        else structure[site_index].coords[axis]
+    )
 
     # Determine the range of coordinates around the specified site
-    min_coord = site_coord - site_dist_threshold/structure.lattice.abc[axis]
-    max_coord = site_coord + site_dist_threshold/structure.lattice.abc[axis]
+    min_coord = site_coord - site_dist_threshold / structure.lattice.abc[axis]
+    max_coord = site_coord + site_dist_threshold / structure.lattice.abc[axis]
 
     # Get unique values in the specified axis within the tolerance
     coords = structure.frac_coords if fractional else structure.cart_coords
@@ -322,7 +390,13 @@ def cleave_structures_around_site(structure, site_index, axis=2, cleave_vacuum_l
 
     cleaved_struct_list = []
     for cp in cp_viable:
-        cleaved_struct = cleave_structure(structure, cleave_line_coord=cp, cleave_vacuum_length=cleave_vacuum_length, axis=axis, fractional=fractional)
+        cleaved_struct = cleave_structure(
+            structure,
+            cleave_line_coord=cp,
+            cleave_vacuum_length=cleave_vacuum_length,
+            axis=axis,
+            fractional=fractional,
+        )
         cleaved_struct_list.append(cleaved_struct)
 
     return cleaved_struct_list
diff --git a/utils/StructureManipulator/interstitial.py b/utils/StructureManipulator/interstitial.py
index 4b3d6ba..4a42fd0 100644
--- a/utils/StructureManipulator/interstitial.py
+++ b/utils/StructureManipulator/interstitial.py
@@ -23,17 +23,20 @@
 import scipy.optimize as optimization
 
 import json
+
 # Reload packages when they change (mostly for custom modules)
-from IPython.lib.deepreload import reload
-%load_ext autoreload
-%autoreload 2
+# from IPython.lib.deepreload import reload
+# %load_ext autoreload
+# %autoreload 2
 
 import warnings
+
 warnings.filterwarnings("ignore")
 
+
 # pyscal version <= 2.10.15
 def get_all_vertices(sys):
-    '''
+    """
     Calculate all Voronoi vertices
 
     Parameters
@@ -44,16 +47,17 @@ def get_all_vertices(sys):
     -------
     all_vertices_raw: list of floats
     list of all Voronoi vertices
-    '''
-    sys.find_neighbors(method='voronoi')
+    """
+    sys.find_neighbors(method="voronoi")
     all_vertices_raw = []
     for atom in sys.iter_atoms():
         for v in atom.vertex_positions:
             all_vertices_raw.append(v)
     return all_vertices_raw
 
+
 def get_octahedral_positions(sys_in, alat):
-    '''
+    """
     Get all octahedral vertex positions
 
     Parameters
@@ -67,33 +71,34 @@ def get_octahedral_positions(sys_in, alat):
     -------
     octahedral_at: list of floats
     position of octahedral voids
-    '''
+    """
     octahedral_at = []
     real_pos = np.array([np.array(atox.pos) for atox in sys_in.iter_atoms()])
     atoms = sys_in.get_all_atoms()
     box = sys_in.box
     count = 0
     for i in range(len(atoms)):
-        for j in range(i+1, len(atoms)):
+        for j in range(i + 1, len(atoms)):
             dist = sys_in.get_distance(atoms[i], atoms[j])
-            if np.abs(dist-alat) < 1E-2:
+            if np.abs(dist - alat) < 1e-2:
                 count += 1
-                npos = (np.array(atoms[i].pos)+np.array(atoms[j].pos))/2
+                npos = (np.array(atoms[i].pos) + np.array(atoms[j].pos)) / 2
                 if 0 <= npos[0] <= box[0][0]:
                     if 0 <= npos[1] <= box[1][1]:
                         if 0 <= npos[2] <= box[2][2]:
-                            #print(np.abs(np.sum(npos-real_pos)))
-                            #print(npos)
+                            # print(np.abs(np.sum(npos-real_pos)))
+                            # print(npos)
                             found = False
                             for rpos in real_pos:
-                                if np.sum(np.abs(npos-rpos)) < 1E-5:
+                                if np.sum(np.abs(npos - rpos)) < 1e-5:
                                     found = True
                             if not found:
                                 octahedral_at.append(npos)
     return octahedral_at
 
+
 def add_sphereatoms(sys, all_vertices, max_type):
-    '''
+    """
     Add ghost atoms at vertex positions
 
     Parameters
@@ -111,16 +116,17 @@ def add_sphereatoms(sys, all_vertices, max_type):
     -------
     sys: pyscal System object
 
-    '''
+    """
     new_atoms = []
     for vertex in all_vertices:
-        atom=pc.Atom(pos=vertex, type=max_type+1)
+        atom = pc.Atom(pos=vertex, type=max_type + 1)
         new_atoms.append(atom)
     sys.add_atoms(new_atoms)
     return sys
 
+
 def get_ra(sys, natoms, pf):
-    '''
+    """
     Calculate radius ra
 
     Parameters
@@ -137,15 +143,16 @@ def get_ra(sys, natoms, pf):
     -------
     ra: float
     Calculated ra
-    '''
+    """
     box = sys.box
     vol = np.dot(np.cross(box[0], box[1]), box[2])
-    volatom = vol/natoms
-    ra = ((pf*volatom)/((4/3)*np.pi))**(1/3)
+    volatom = vol / natoms
+    ra = ((pf * volatom) / ((4 / 3) * np.pi)) ** (1 / 3)
     return ra
 
+
 def get_rvv(sys, max_type, ra):
-    '''
+    """
     Calculate rvv for each atom
 
     Parameters
@@ -157,42 +164,47 @@ def get_rvv(sys, max_type, ra):
 
     ra: float
     calculated ra value
-    '''
+    """
     rlist = []
     atoms = sys.atoms
     for atom in atoms:
-        if atom.type == max_type+1:
-            #collect ”real” neighbors
-            nns = [x for x in atom.neighbors if atoms[x].type<=max_type]
-            #get the distances
+        if atom.type == max_type + 1:
+            # collect ”real” neighbors
+            nns = [x for x in atom.neighbors if atoms[x].type <= max_type]
+            # get the distances
             dists = [sys.get_distance(atom, atoms[n]) for n in nns]
-            #get minimum distance
+            # get minimum distance
             Rvv = min(dists)
-            rvv = (Rvv-ra)/ra
+            rvv = (Rvv - ra) / ra
             atom.cutoff = rvv
             rlist.append(rvv)
-    return rlist,atoms
+    return rlist, atoms
+
 
-def get_interstitial_structure(input_file, output_file = "poscar.vasp", alat = 2.84, pf = 0.68):
+def get_interstitial_structure(
+    input_file, output_file="poscar.vasp", alat=2.84, pf=0.68
+):
     # Input parameters
-    '''
+    """
     pf = 0.68 # Packing factor of the input crystal lattice
     alat = 2.84 # Lattice constant in Angstroms
     example usage:
     iGB_struct = get_interstitial_structure("tempGB.vasp", output_file = "GB.vasp", alat = 2.84, pf = 0.68)
     struct_list, struct_all_studied_sites = get_int_struct_list(GB_struct_list[i], midpoint=midpoints[i])
 
-    '''
+    """
     # Read input from CONTCAR file
     sys_in = pc.System()
     sys_out = pc.System()
 
-    sys_in.read_inputfile(input_file, format='poscar')
-    sys_out.read_inputfile(input_file, format='poscar')
+    sys_in.read_inputfile(input_file, format="poscar")
+    sys_out.read_inputfile(input_file, format="poscar")
 
     # Find all Voronoi vertices and obtain unique ones with a precision of 2 decimal points
     all_vertices_raw = get_all_vertices(sys_in)
-    all_vertices = np.unique((np.array(all_vertices_raw)*100).astype(int)/100, axis=0)
+    all_vertices = np.unique(
+        (np.array(all_vertices_raw) * 100).astype(int) / 100, axis=0
+    )
 
     # Get all octahedral positions
     octahedral_at = get_octahedral_positions(sys_in, alat)
@@ -202,24 +214,24 @@ def get_interstitial_structure(input_file, output_file = "poscar.vasp", alat = 2
     natoms = sys_in.natoms
     max_type = len(conc.keys())
 
-    #Combine vertices and octahedral sites
+    # Combine vertices and octahedral sites
     combined_list = np.concatenate((all_vertices, octahedral_at))
 
-    #add ghost atoms at vertex positions
-    sys_out = add_sphereatoms(sys_out,combined_list,max_type)
+    # add ghost atoms at vertex positions
+    sys_out = add_sphereatoms(sys_out, combined_list, max_type)
 
-    #calculate ra
+    # calculate ra
     ra = get_ra(sys_out, natoms, pf)
 
-    #Ghost atoms are used in pyscal to compensate for the small number of total real atoms
-    #The remap_atoms method removes these ghost atoms, including:
-    #(i) remapping atoms back to the simulation box,
-    #(ii) remove the pyscal inbuilt ghost atoms, given by atom.id > total atoms,
-    #(iii) remove atoms that are too close to each other - the distance tolerance can be set using ‘dtol‘
+    # Ghost atoms are used in pyscal to compensate for the small number of total real atoms
+    # The remap_atoms method removes these ghost atoms, including:
+    # (i) remapping atoms back to the simulation box,
+    # (ii) remove the pyscal inbuilt ghost atoms, given by atom.id > total atoms,
+    # (iii) remove atoms that are too close to each other - the distance tolerance can be set using ‘dtol‘
 
     nx = sys_out.remap_atoms(dtol=0.4, remove_images=False)
-    sys_out.to_file(output_file, format = 'poscar')
-    
+    sys_out.to_file(output_file, format="poscar")
+
     struct = Structure.from_file(filename=output_file)
     # Define a mapping from the current species to the desired species
     species_mapping = {Element("H"): Element("Fe"), Element("He"): Element("H")}
@@ -230,42 +242,57 @@ def get_interstitial_structure(input_file, output_file = "poscar.vasp", alat = 2
     return struct
     # Get radius of VV sphere, rvv
     # Calculate neighbors again
+
+
 #     sys_out.find_neighbors(method="cutoff",cutoff=alat)
 
 #     # Output void ratios (rvv/ra) and count
 #     rlist,atoms = get_rvv(sys_out,max_type,ra)
 #     void_ratios, void_count = np.unique(np.round(rlist, decimals=1), return_counts=True)
 
-def get_int_struct_list(structure, zlims = [], host_elements=["Fe"], within_GB_distance=3, midpoint = 0.50945):
-    
-    int_id = [i for i, site in enumerate(structure) if site.species_string not in host_elements]
-    GB_id = [i for i, site in enumerate(structure) if site.species_string in host_elements]
-    
+
+def get_int_struct_list(
+    structure, zlims=[], host_elements=["Fe"], within_GB_distance=3, midpoint=0.50945
+):
+
+    int_id = [
+        i
+        for i, site in enumerate(structure)
+        if site.species_string not in host_elements
+    ]
+    GB_id = [
+        i for i, site in enumerate(structure) if site.species_string in host_elements
+    ]
+
     GB_struct = structure.copy()
     GB_struct.remove_sites(int_id)
-    
+
     z_frac_coords = [site.frac_coords[-1] for site in GB_struct]
     zlims = [min(z_frac_coords), max(z_frac_coords)]
-    
+
     only_intsites_struct = structure.copy()
     only_intsites_struct.remove_sites(GB_id)
-    only_intsites_struct.merge_sites(tol = 0.35, mode = "a")
+    only_intsites_struct.merge_sites(tol=0.35, mode="a")
     only_intsites_struct.sort(lambda x: x.frac_coords[-1])
-    
-    int_fcoords = [site.frac_coords for site in only_intsites_struct if site.species_string == "H"]
 
-    # Get the ones we are interested in computing 
+    int_fcoords = [
+        site.frac_coords for site in only_intsites_struct if site.species_string == "H"
+    ]
+
+    # Get the ones we are interested in computing
     struct_list = []
     struct_all_studied_sites = GB_struct.copy()
-    
+
     for int_sites in int_fcoords:
         compute_struct = GB_struct.copy()
-        if int_sites[-1] > min(zlims)\
-        and int_sites[-1] < max(zlims)\
-        and abs(int_sites[-1] - midpoint) * structure.lattice.c < within_GB_distance:
-        # and int_sites[-1] <= midpoint+0.01:
+        if (
+            int_sites[-1] > min(zlims)
+            and int_sites[-1] < max(zlims)
+            and abs(int_sites[-1] - midpoint) * structure.lattice.c < within_GB_distance
+        ):
+            # and int_sites[-1] <= midpoint+0.01:
             compute_struct.append("H", int_sites)
             struct_list.append(compute_struct)
             struct_all_studied_sites.append("H", int_sites)
-            
+
     return struct_list, struct_all_studied_sites
diff --git a/utils/ace_descriptor_utils.py b/utils/ace_descriptor_utils.py
index 08ea53a..b7061cc 100644
--- a/utils/ace_descriptor_utils.py
+++ b/utils/ace_descriptor_utils.py
@@ -1,10 +1,7 @@
 import string
 from random import sample
 import math
-from itertools import (
-        starmap,
-        combinations
-)
+from itertools import starmap, combinations
 
 import numpy as np
 import scipy.stats as ss
@@ -24,46 +21,54 @@
 from pyace.atomicenvironment import aseatoms_to_atomicenvironment
 from tqdm.auto import tqdm
 
-def make_ace(rmax, number_of_functions=250, element='H', **kwargs):
+
+def make_ace(rmax, number_of_functions=250, element="H", **kwargs):
     pot_conf = {
-        'deltaSplineBins': 0.001,
-        'elements': [element],
-        'embeddings': {'ALL': {'drho_core_cut': 250,
-                            'fs_parameters': [1, 1],
-                            'ndensity': 1,
-                            'npot': 'FinnisSinclair',
-                            'rho_core_cut': 200000},
-                    },
-        'bonds': {
-            'ALL': {'NameOfCutoffFunction': 'cos',
-                        'core-repulsion': [10000.0, 5.0],
-                        'dcut': 0.01,
-                        'radbase': 'ChebPow',
-                        # 'nradbase': 10,
-                        'radparameters': [2.0],
-                        'rcut': 1.1 * rmax},
+        "deltaSplineBins": 0.001,
+        "elements": [element],
+        "embeddings": {
+            "ALL": {
+                "drho_core_cut": 250,
+                "fs_parameters": [1, 1],
+                "ndensity": 1,
+                "npot": "FinnisSinclair",
+                "rho_core_cut": 200000,
+            },
+        },
+        "bonds": {
+            "ALL": {
+                "NameOfCutoffFunction": "cos",
+                "core-repulsion": [10000.0, 5.0],
+                "dcut": 0.01,
+                "radbase": "ChebPow",
+                # 'nradbase': 10,
+                "radparameters": [2.0],
+                "rcut": 1.1 * rmax,
+            },
+        },
+        "functions": {
+            "number_of_functions_per_element": number_of_functions,
+            "UNARY":
+            # simple default from Yury
+            {
+                "nradmax_by_orders": [15, 6, 4, 3, 2, 2],
+                "lmax_by_orders": [0, 3, 3, 2, 2, 1],
+            },
+            # {'nradmax_by_orders': [ 10, 5, 3, 2, ],
+            #  'lmax_by_orders':    [ 0 , 3, 3, 1, ]}
         },
-        'functions': {
-                'number_of_functions_per_element': number_of_functions,
-                'UNARY':
-                # simple default from Yury
-                    { 'nradmax_by_orders': [ 15, 6, 4, 3, 2, 2 ],
-                    'lmax_by_orders':    [ 0 , 3, 3, 2, 2, 1 ]}
-                # {'nradmax_by_orders': [ 10, 5, 3, 2, ],
-                #  'lmax_by_orders':    [ 0 , 3, 3, 1, ]}
-        }
     }
     calc = pyace.PyACECalculator(
-            pyace.create_multispecies_basis_config(pot_conf),
-            **kwargs
+        pyace.create_multispecies_basis_config(pot_conf), **kwargs
     )
     return calc
 
+
 def get_ace_descr(calc, structure, max_params=None, copy=True, overwrite_type=True):
     if copy:
         structure = structure.copy()
     if overwrite_type:
-        structure[:] = 'H'
+        structure[:] = "H"
     structure.calc = calc
     structure.get_potential_energy()
 
@@ -79,12 +84,10 @@ def get_ace_descr(calc, structure, max_params=None, copy=True, overwrite_type=Tr
     #     axis=1
     # )
     if max_params is not None and max_params < min(descr.shape):
-        descr = PCA(
-                whiten=True,
-                n_components=max_params
-        ).fit_transform(descr)
+        descr = PCA(whiten=True, n_components=max_params).fit_transform(descr)
     return descr
 
+
 def suggest_sites(structure, num_sites, mask=None):
     if mask is None:
         mask = np.ones(len(structure), dtype=bool)
@@ -107,7 +110,7 @@ def suggest_sites(structure, num_sites, mask=None):
     # pick the largest peak as the mode
     mode = x[p[si.peak_prominences(k.pdf(x), p)[0].argmax()]]
     # sort all atoms by their deviation from the mode (ie. bulk atoms)
-    SA = np.argsort(abs(pca-mode))
+    SA = np.argsort(abs(pca - mode))
     # the mask needs to be sorted in the same way, then we pick num_sites
     # atoms that are furthest from the mode
     sites = SA[mask[SA]][-num_sites:]
@@ -116,6 +119,7 @@ def suggest_sites(structure, num_sites, mask=None):
     else:
         raise ValueError("Lazy developer error!")
 
+
 def plot_sites(structure, sites):
     I = np.zeros(len(structure))
     for i in sites:
@@ -123,8 +127,10 @@ def plot_sites(structure, sites):
 
     return structure.plot3d(scalar_field=I)
 
+
 ### SPACE routines
 
+
 def space(calc, structure, pure_descr, indices, per_atom=False):
     """
     Calculate the SPACE descriptors from a given unary ACE model.
@@ -143,8 +149,12 @@ def space(calc, structure, pure_descr, indices, per_atom=False):
     """
 
     single = pure_descr[indices].reshape(len(indices), -1)
-    solutes = ASEAtoms(['H']*len(indices), structure.positions[indices],
-                        cell=structure.cell, pbc=structure.pbc)
+    solutes = ASEAtoms(
+        ["H"] * len(indices),
+        structure.positions[indices],
+        cell=structure.cell,
+        pbc=structure.pbc,
+    )
     calc.ace.compute(aseatoms_to_atomicenvironment(solutes))
     projections = np.asarray(calc.ace.projections)
     inter = projections
@@ -154,98 +164,102 @@ def space(calc, structure, pure_descr, indices, per_atom=False):
     else:
         return full
 
+
 def calc_space_descriptors(
-        structure, segregations, calc, pure_descr=None, per_atom=False,
-        tqdm_enabled=True
+    structure, segregations, calc, pure_descr=None, per_atom=False, tqdm_enabled=True
 ):
     if pure_descr is None:
-        pure_descr = get_ace_descr(
-                calc,
-                structure,
-                max_params=None
-        )
+        pure_descr = get_ace_descr(calc, structure, max_params=None)
 
     descr_shape = (pure_descr.shape[1] + len(calc.basis.basis_coeffs),)
 
     # Check For structure descriptor array
-    info = segregations.has_array('descriptors')
-    if info and info['shape'] != descr_shape:
-        del segregations._per_chunk_arrays['descriptors']
-    info = segregations.has_array('descriptors')
+    info = segregations.has_array("descriptors")
+    if info and info["shape"] != descr_shape:
+        del segregations._per_chunk_arrays["descriptors"]
+    info = segregations.has_array("descriptors")
     if not info:
-        segregations.add_array('descriptors',
-                shape=descr_shape,
-                per='chunk',
-                fill=np.nan
+        segregations.add_array(
+            "descriptors", shape=descr_shape, per="chunk", fill=np.nan
         )
 
     # Check For atom descriptor array
     if per_atom:
-        info = segregations.has_array('atomic_descriptors')
-        if info and info['shape'] != descr_shape:
-            del segregations._per_chunk_arrays['atomic_descriptors']
-        info = segregations.has_array('atomic_descriptors')
+        info = segregations.has_array("atomic_descriptors")
+        if info and info["shape"] != descr_shape:
+            del segregations._per_chunk_arrays["atomic_descriptors"]
+        info = segregations.has_array("atomic_descriptors")
         if not info:
-            segregations.add_array('atomic_descriptors',
-                    shape=descr_shape,
-                    per='element',
-                    fill=np.nan
+            segregations.add_array(
+                "atomic_descriptors", shape=descr_shape, per="element", fill=np.nan
             )
 
-    for i in tqdm(range(len(segregations)),
-                  desc='SPACE', disable=not tqdm_enabled):
-        if np.isnan(segregations['descriptors', i]).any():
-            descr = space(calc, structure, pure_descr, segregations['indices', i],
-                          per_atom=per_atom)
+    for i in tqdm(range(len(segregations)), desc="SPACE", disable=not tqdm_enabled):
+        if np.isnan(segregations["descriptors", i]).any():
+            descr = space(
+                calc,
+                structure,
+                pure_descr,
+                segregations["indices", i],
+                per_atom=per_atom,
+            )
             if not per_atom:
-                segregations['descriptors', i] = descr
+                segregations["descriptors", i] = descr
             else:
-                segregations['atomic_descriptors', i] = descr
-                segregations['descriptors', i] = descr.sum(axis=0)
+                segregations["atomic_descriptors", i] = descr
+                segregations["descriptors", i] = descr.sum(axis=0)
+
 
 def reduce_sites(
-        structure,
-        segregations,
-        ace,
-        cluster_threshold=1e-4, cluster=True, check_cluster=True
+    structure,
+    segregations,
+    ace,
+    cluster_threshold=1e-4,
+    cluster=True,
+    check_cluster=True,
 ):
     """
     Find and filter equivalent segregation patterns with ACE.
     """
 
     calc_space_descriptors(structure, segregations, ace)
-    descr = segregations['descriptors']
+    descr = segregations["descriptors"]
 
     if cluster:
         _, unique, inverse, counts = np.unique(
-                DBSCAN(min_samples=1, eps=cluster_threshold).fit_predict(
-                    StandardScaler().fit_transform(descr)
-                ),
-                return_index=True, return_inverse=True, return_counts=True
+            DBSCAN(min_samples=1, eps=cluster_threshold).fit_predict(
+                StandardScaler().fit_transform(descr)
+            ),
+            return_index=True,
+            return_inverse=True,
+            return_counts=True,
         )
         inverse = unique[inverse]
         if check_cluster:
             for R in unique:
-                D = descr[inverse==R]
-                assert np.abs(D-D[0]).mean() < 1e-4
+                D = descr[inverse == R]
+                assert np.abs(D - D[0]).mean() < 1e-4
                 # assert np.allclose(D, D[0], atol=1e-5)
     else:
         _, unique, inverse, counts = np.unique(
-                descr.round(7), # get rid of floating point noise
-                # StandardScaler().fit_transform(descr).round(
-                #     -int(np.ceil(np.log10(cluster_threshold)))
-                # ),
-                axis=0,
-                return_index=True, return_inverse=True, return_counts=True
+            descr.round(7),  # get rid of floating point noise
+            # StandardScaler().fit_transform(descr).round(
+            #     -int(np.ceil(np.log10(cluster_threshold)))
+            # ),
+            axis=0,
+            return_index=True,
+            return_inverse=True,
+            return_counts=True,
         )
         inverse = unique[inverse]
 
     return unique, inverse, counts
 
-def fit_space(df, D, E='excess', LM=Ridge, plot=True):
-    df = df.query('n_sites>0')
-    df['index'] = df['index'].astype(int)
-    SI, I = np.unique( df['index'], return_index=True )
+
+def fit_space(df, D, E="excess", LM=Ridge, plot=True):
+    df = df.query("n_sites>0")
+    df["index"] = df["index"].astype(int)
+    SI, I = np.unique(df["index"], return_index=True)
     Dr = D[SI]
     Er = df[E].iloc[I]
     lm = LM(fit_intercept=False)
@@ -255,19 +269,22 @@ def fit_space(df, D, E='excess', LM=Ridge, plot=True):
         if len(Ep) < 500:
             plt.scatter(Er, Ep)
         else:
-            plt.hexbin(Er, Ep, bins='log')
+            plt.hexbin(Er, Ep, bins="log")
         plt.gca().set_aspect(1)
-        plt.plot([Er.min()]*2, [Er.max()]*2, 'r-')
-    rmse = np.sqrt( np.mean((Er - Ep)**2) )
+        plt.plot([Er.min()] * 2, [Er.max()] * 2, "r-")
+    rmse = np.sqrt(np.mean((Er - Ep) ** 2))
     return lm, rmse, np.abs(Er - Ep).max()
 
+
 ### Sampling routines
 
+
 def random_combination(pool, r):
     n = len(pool)
     indices = sorted(sample(range(n), r))
     return tuple(pool[i] for i in indices)
 
+
 def n_random_combinations(iterable, r, n):
     pool = tuple(iterable)
     if n >= math.comb(len(pool), r):
@@ -276,104 +293,101 @@ def n_random_combinations(iterable, r, n):
         for _ in range(n):
             yield random_combination(pool, r)
 
+
 def make_individual_segregation(seg, name, indices, **kwargs):
     seg.add_chunk(
-            len(indices),
-            identifier=name,
-            indices=indices,
-            n_sites=len(indices),
-            **kwargs
+        len(indices), identifier=name, indices=indices, n_sites=len(indices), **kwargs
     )
 
-def add_segregations(seg, all_sites, max_sites, cache=None, tqdm_enabled=True, **kwargs):
+
+def add_segregations(
+    seg, all_sites, max_sites, cache=None, tqdm_enabled=True, **kwargs
+):
     num_sites = len(all_sites)
     # distribute n_sites evenly, but take into account that we have added
     # n=1 & n=full already by default
-    max_per_n_sites = {
-            i: max_sites // (num_sites - 2)
-                for i in range(2, num_sites)
-    }
-    for i in range(2, num_sites//2 + 1):
+    max_per_n_sites = {i: max_sites // (num_sites - 2) for i in range(2, num_sites)}
+    for i in range(2, num_sites // 2 + 1):
         # can't add more structures than permutationally possible
         nmax = math.comb(num_sites, i)
         navg = max_per_n_sites[i]
         if navg > nmax:
             max_per_n_sites[i] = nmax
             max_per_n_sites[num_sites - i] = nmax
-            for j in range(i+1, num_sites):
-                max_per_n_sites[j] += 2*(navg - nmax)//(num_sites-i)
+            for j in range(i + 1, num_sites):
+                max_per_n_sites[j] += 2 * (navg - nmax) // (num_sites - i)
 
     if cache is None:
         if len(seg) > 0:
-            cache = set(seg['identifier'])
+            cache = set(seg["identifier"])
         else:
             cache = set()
-    for o in tqdm(range(2, num_sites), desc='Order', disable=not tqdm_enabled):
-        for names, indices in starmap(zip,
-                n_random_combinations(all_sites.items(), o, max_per_n_sites[i])
+    for o in tqdm(range(2, num_sites), desc="Order", disable=not tqdm_enabled):
+        for names, indices in starmap(
+            zip, n_random_combinations(all_sites.items(), o, max_per_n_sites[i])
         ):
-            sites = '|'.join(names)
+            sites = "|".join(names)
             if sites not in cache:
-                make_individual_segregation(
-                        seg, sites, indices, **kwargs
-                )
+                make_individual_segregation(seg, sites, indices, **kwargs)
                 cache.add(sites)
     return cache
 
+
 ### Analysis routines
 
 
-def get_excess_energies(df, E='[E]N', cname='coverage'):
+def get_excess_energies(df, E="[E]N", cname="coverage"):
     c = df[cname] / df[cname].max()
     cmin = df[cname].min()
     cmax = df[cname].max()
-    e0 = df.query(f'{cname}==@cmin')[E].min()
-    e1 = df.query(f'{cname}==@cmax')[E].min()
-    df['excess'] = df[E] - (1-c)*e0 - e1 * c
+    e0 = df.query(f"{cname}==@cmin")[E].min()
+    e1 = df.query(f"{cname}==@cmax")[E].min()
+    df["excess"] = df[E] - (1 - c) * e0 - e1 * c
 
-    ch = ConvexHull(df[[cname, 'excess']].to_numpy())
-    df['stable'] = False
-    df['stable'].iloc[
-            df.iloc[ np.unique(ch.simplices)
-    ].query('excess<=0').index] = True
+    ch = ConvexHull(df[[cname, "excess"]].to_numpy())
+    df["stable"] = False
+    df["stable"].iloc[df.iloc[np.unique(ch.simplices)].query("excess<=0").index] = True
 
     # S = df.query('stable').sites
     # makes sure that degenerate sites of the ones found by CH are also
     # marked stable, not needed after we move this to analyze
     # df.stable.iloc[df.query('original.isin(@S)').index] = True
 
-    chex = sint.interp1d(*df.query('stable')[[cname, 'excess']].to_numpy().T)
-    df['energy_above_hull'] = df.excess - df[cname].map(chex)
+    chex = sint.interp1d(*df.query("stable")[[cname, "excess"]].to_numpy().T)
+    df["energy_above_hull"] = df.excess - df[cname].map(chex)
     # better version of the paragraph above
-    df.loc[df.energy_above_hull==0].stable = True
+    df.loc[df.energy_above_hull == 0].stable = True
     return df
 
-def plot_excess_energies(df, cname='n_sites'):
 
-    sns.violinplot(
-            data=df,
-            x=cname, y='excess',
-            cut=0
-    )
+def plot_excess_energies(df, cname="n_sites"):
+
+    sns.violinplot(data=df, x=cname, y="excess", cut=0)
 
     sns.lineplot(
-            data=df.query('stable'), marker='o', color='k',
-            x=cname, y='excess', zorder=1,
+        data=df.query("stable"),
+        marker="o",
+        color="k",
+        x=cname,
+        y="excess",
+        zorder=1,
     )
 
-
     return df
 
-def plot_energies_above_hull(df, temperature_units=False, cname='n_sites'):
+
+def plot_energies_above_hull(df, temperature_units=False, cname="n_sites"):
 
     E = df.energy_above_hull.to_numpy()
     if temperature_units:
         E /= 8.6e-5
 
     sns.scatterplot(
-            data=df, alpha=.5,
-            x=cname, y=E,
-            hue='stable', #size='degeneracy'
+        data=df,
+        alpha=0.5,
+        x=cname,
+        y=E,
+        hue="stable",  # size='degeneracy'
     )
 
-    return df
\ No newline at end of file
+    return df
diff --git a/utils/analysis_functions.py b/utils/analysis_functions.py
index a8dc244..ccedcb7 100644
--- a/utils/analysis_functions.py
+++ b/utils/analysis_functions.py
@@ -52,47 +52,70 @@
 job_path_standard_Gadi = "%s\\jobfile-conv-Gadi" % job_script_folder
 job_path_standard_Magnus = "%s\\jobfile-conv-Magnus" % job_script_folder
 job_path_standard_Setonix = "%s/jobfile-conv-Setonix" % job_script_folder
-job_path_DoubleRelaxation_DDEC6_Setonix = os.path.join(job_script_folder, "jobfile-Setonix-DoubleRelaxation-DDEC6")
-job_path_DoubleRelaxation_DDEC6_Gadi = os.path.join(job_script_folder, "jobfile-Gadi-DoubleRelaxation-DDEC6")
-job_path_DoubleRelaxation_DDEC6_Gadi_GPU = os.path.join(job_script_folder, "jobfile-Gadi-DoubleRelaxation-DDEC6-GPU")
-job_path_StaticImage_DDEC6_Setonix = os.path.join(job_script_folder, "jobfile-Setonix-StaticImage-DDEC6")
-job_path_StaticImage_DDEC6_Gadi = os.path.join(job_script_folder, "jobfile-Gadi-StaticImage-DDEC6")
+job_path_DoubleRelaxation_DDEC6_Setonix = os.path.join(
+    job_script_folder, "jobfile-Setonix-DoubleRelaxation-DDEC6"
+)
+job_path_DoubleRelaxation_DDEC6_Gadi = os.path.join(
+    job_script_folder, "jobfile-Gadi-DoubleRelaxation-DDEC6"
+)
+job_path_DoubleRelaxation_DDEC6_Gadi_GPU = os.path.join(
+    job_script_folder, "jobfile-Gadi-DoubleRelaxation-DDEC6-GPU"
+)
+job_path_StaticImage_DDEC6_Setonix = os.path.join(
+    job_script_folder, "jobfile-Setonix-StaticImage-DDEC6"
+)
+job_path_StaticImage_DDEC6_Gadi = os.path.join(
+    job_script_folder, "jobfile-Gadi-StaticImage-DDEC6"
+)
 
 VASP_job_INCAR_path = "%s\\INCAR" % job_script_folder
 VASP_job_INCAR_DDEC6_path = "%s\\INCAR-DDEC6" % job_script_folder
 
-GB_Kpoint_rough_dict = {"S11-RA110-S3-32": func.KPOINTS([3, 3, 1], [0, 0, 0]),
-                        "S3-RA110-S1-11": func.KPOINTS([4, 2, 1], [0, 0, 0]),
-                        "S3-RA110-S1-12": func.KPOINTS([4, 3, 1], [0, 0, 0]),
-                        "S5-RA001-S210": func.KPOINTS([3, 3, 1], [0, 0, 0]),
-                        "S5-RA001-S310": func.KPOINTS([3, 2, 1], [0, 0, 0]),
-                        "S9-RA110-S2-21": func.KPOINTS([3, 4, 1], [0, 0, 0])}
+GB_Kpoint_rough_dict = {
+    "S11-RA110-S3-32": func.KPOINTS([3, 3, 1], [0, 0, 0]),
+    "S3-RA110-S1-11": func.KPOINTS([4, 2, 1], [0, 0, 0]),
+    "S3-RA110-S1-12": func.KPOINTS([4, 3, 1], [0, 0, 0]),
+    "S5-RA001-S210": func.KPOINTS([3, 3, 1], [0, 0, 0]),
+    "S5-RA001-S310": func.KPOINTS([3, 2, 1], [0, 0, 0]),
+    "S9-RA110-S2-21": func.KPOINTS([3, 4, 1], [0, 0, 0]),
+}
 # KPOINT NUMBER CALCULATED:  S3-1 6, S3-2 7, S9 7, S5-2 5, S5-3 4, S11 7
-GB_Kpoint_fine_dict = {"S11-RA110-S3-32": func.KPOINTS([6, 6, 1], [0, 0, 0]),
-                        "S3-RA110-S1-11": func.KPOINTS([6, 3, 1], [0, 0, 0]),
-                        "S3-RA110-S1-12": func.KPOINTS([6, 6, 1], [0, 0, 0]),
-                        "S5-RA001-S210": func.KPOINTS([4, 4, 1], [0, 0, 0]),
-                        "S5-RA001-S310": func.KPOINTS([4, 3, 1], [0, 0, 0]),
-                        "S9-RA110-S2-21": func.KPOINTS([4, 6, 1], [0, 0, 0])}
-
-list_GB = ["S11-RA110-S3-32","S3-RA110-S1-11", "S3-RA110-S1-12", "S9-RA110-S2-21"]
+GB_Kpoint_fine_dict = {
+    "S11-RA110-S3-32": func.KPOINTS([6, 6, 1], [0, 0, 0]),
+    "S3-RA110-S1-11": func.KPOINTS([6, 3, 1], [0, 0, 0]),
+    "S3-RA110-S1-12": func.KPOINTS([6, 6, 1], [0, 0, 0]),
+    "S5-RA001-S210": func.KPOINTS([4, 4, 1], [0, 0, 0]),
+    "S5-RA001-S310": func.KPOINTS([4, 3, 1], [0, 0, 0]),
+    "S9-RA110-S2-21": func.KPOINTS([4, 6, 1], [0, 0, 0]),
+}
+
+list_GB = ["S11-RA110-S3-32", "S3-RA110-S1-11", "S3-RA110-S1-12", "S9-RA110-S2-21"]
 list_element = ["P", "Ti", "V", "Cr", "Mn", "Co", "Ni", "Cu", "Nb", "Mo", "W"]
+
+
 def get_immediate_subdirectories(a_dir):
-    return [f.path for f in os.scandir(a_dir) if f.is_dir() and os.path.basename(f) != ".ipynb_checkpoints"]
+    return [
+        f.path
+        for f in os.scandir(a_dir)
+        if f.is_dir() and os.path.basename(f) != ".ipynb_checkpoints"
+    ]
+
 
 class DataPaths:
-    def __init__(self,
-                 DataPath = "C:\\Users\\liger\\OneDrive - The University of Sydney (Staff)\\FeGBProject-Data",
-                 Fpath_Krough = "%s\\P-X-Krough" % DataPath,
-                 Fpath_Kfine = "%s\\P-X-Krefined" % DataPath,
-                 Seg1_path = "%s\\Segregation_1sol" % DataPath,
-                 Seg2_path = "%s\\Segregation_2sol" % DataPath,
-                 Wsep1_rigid_path = "%s\\RGS-1sol" % DataPath,
-                 Wsep1_rel_path = "%s\\Wsep_relaxed-1sol" % DataPath,
-                 Wsep2_rigid_path = "%s\\RGS" % DataPath,
-                 Wsep2_rel_path = "%s\\Wsep_rel" % DataPath,
-                 BO1_path = "%s\\BondOrder-1solute" % DataPath,
-                 BO2_path = "%s\\BondOrder-2solute" % DataPath):
+    def __init__(
+        self,
+        DataPath="C:\\Users\\liger\\OneDrive - The University of Sydney (Staff)\\FeGBProject-Data",
+        Fpath_Krough="%s\\P-X-Krough" % DataPath,
+        Fpath_Kfine="%s\\P-X-Krefined" % DataPath,
+        Seg1_path="%s\\Segregation_1sol" % DataPath,
+        Seg2_path="%s\\Segregation_2sol" % DataPath,
+        Wsep1_rigid_path="%s\\RGS-1sol" % DataPath,
+        Wsep1_rel_path="%s\\Wsep_relaxed-1sol" % DataPath,
+        Wsep2_rigid_path="%s\\RGS" % DataPath,
+        Wsep2_rel_path="%s\\Wsep_rel" % DataPath,
+        BO1_path="%s\\BondOrder-1solute" % DataPath,
+        BO2_path="%s\\BondOrder-2solute" % DataPath,
+    ):
 
         self.DataPath = DataPath
         self.Fpath_Krough = "%s\\P-X-Krough" % DataPath
@@ -106,39 +129,98 @@ def __init__(self,
         self.BO1_path = "%s\\BondOrder-1solute" % DataPath
         self.BO2_path = "%s\\BondOrder-2solute" % DataPath
 
+
 class PlotParameters:
     """
     PlotParameters class contains object-based convenience functionality for plotting parameters
     """
-    def  __init__(self,
-                output_path = "C:\\Users\\liger\\OneDrive - The University of Sydney (Staff)\\FeGB-P-TM-Project\\Manuscript\\Figures-P-TM"):
-        self.marker_dict = dict(zip(["S3-RA110-S1-11", "S3-RA110-S1-12", "S9-RA110-S2-21", "S11-RA110-S3-32"],
-                                    ['o','s','d','^']))
-        self.GB_labels = dict(zip(["S3-RA110-S1-11", "S3-RA110-S1-12", "S9-RA110-S2-21", "S11-RA110-S3-32"],
-                                    [r"$\Sigma3\ [110](1\bar{1}1)$", r"$\Sigma3\ [110](1\bar{1}2)$", r"$\Sigma9\ [110](2\bar{2}1)$", r"$\Sigma11\ [110](3\bar{3}2)$"]))
-        self.GB_labels_short = dict(zip(["S3-RA110-S1-11", "S3-RA110-S1-12", "S9-RA110-S2-21", "S11-RA110-S3-32"],
-                            [r"$\Sigma3(1\bar{1}1)$", r"$\Sigma3(1\bar{1}2)$", r"$\Sigma9(2\bar{2}1)$", r"$\Sigma11(3\bar{3}2)$"]))
+
+    def __init__(
+        self,
+        output_path="C:\\Users\\liger\\OneDrive - The University of Sydney (Staff)\\FeGB-P-TM-Project\\Manuscript\\Figures-P-TM",
+    ):
+        self.marker_dict = dict(
+            zip(
+                [
+                    "S3-RA110-S1-11",
+                    "S3-RA110-S1-12",
+                    "S9-RA110-S2-21",
+                    "S11-RA110-S3-32",
+                ],
+                ["o", "s", "d", "^"],
+            )
+        )
+        self.GB_labels = dict(
+            zip(
+                [
+                    "S3-RA110-S1-11",
+                    "S3-RA110-S1-12",
+                    "S9-RA110-S2-21",
+                    "S11-RA110-S3-32",
+                ],
+                [
+                    r"$\Sigma3\ [110](1\bar{1}1)$",
+                    r"$\Sigma3\ [110](1\bar{1}2)$",
+                    r"$\Sigma9\ [110](2\bar{2}1)$",
+                    r"$\Sigma11\ [110](3\bar{3}2)$",
+                ],
+            )
+        )
+        self.GB_labels_short = dict(
+            zip(
+                [
+                    "S3-RA110-S1-11",
+                    "S3-RA110-S1-12",
+                    "S9-RA110-S2-21",
+                    "S11-RA110-S3-32",
+                ],
+                [
+                    r"$\Sigma3(1\bar{1}1)$",
+                    r"$\Sigma3(1\bar{1}2)$",
+                    r"$\Sigma9(2\bar{2}1)$",
+                    r"$\Sigma11(3\bar{3}2)$",
+                ],
+            )
+        )
         self.output_path = "C:\\Users\\liger\\OneDrive - The University of Sydney (Staff)\\FeGB-P-TM-Project\\Manuscript\\Figures-P-TM"
 
         self.label_string_S11_RA110_S3_32 = r"$\Sigma11\ [110](3\bar{3}2)$"
         self.label_string_S3_RA110_S1_11 = r"$\Sigma3\ [110](1\bar{1}1)$"
         self.label_string_S3_RA110_S1_12 = r"$\Sigma3\ [110](1\bar{1}2)$"
         self.label_string_S9_RA110_S2_21 = r"$\Sigma9\ [110](2\bar{2}1)$"
-        self.color_ele_dict = dict(zip(["P", "Ti", "V", "Cr", "Mn", "Co", "Ni", "Cu", "Nb", "Mo", "W"],
-                    ["olive", "blue", "orange", "green", "red", "black", "brown", "pink", "darkviolet", "lime", "cyan"]))
-
-class SegregationEnergyData_2sol():
-    def __init__(self, savefile = True):
+        self.color_ele_dict = dict(
+            zip(
+                ["P", "Ti", "V", "Cr", "Mn", "Co", "Ni", "Cu", "Nb", "Mo", "W"],
+                [
+                    "olive",
+                    "blue",
+                    "orange",
+                    "green",
+                    "red",
+                    "black",
+                    "brown",
+                    "pink",
+                    "darkviolet",
+                    "lime",
+                    "cyan",
+                ],
+            )
+        )
+
+
+class SegregationEnergyData_2sol:
+    def __init__(self, savefile=True):
 
         Segregation_1sol = SegregationEnergyData_1sol()
-        def get_1sol_cohesion_effect(GB, element, property, df = None):
-            '''
+
+        def get_1sol_cohesion_effect(GB, element, property, df=None):
+            """
             GB = GB string (e.g. S11-RA110-S3-32)
             element = element string (e.g. V)
             property = one of string: eta_RGS, eta_rel, eta_ANSBO
             df = the output from get_1sol_cohesion_summary() method
             not strictly necessary, but is required for the purposes of speedup
-            '''
+            """
             if df is None:
                 df = get_1sol_cohesion_summary(GB)
             ele_df = df[df["element"] == element]
@@ -153,10 +235,18 @@ def get_2sol_cohesion_RGS(GB, case):
                 area = get_area("%s\\%s\\Co\\GB\\CONTCAR" % (fp_Seg1_path, GB))
                 case_df = pd.read_csv(csv_path)
                 # total energy of non-cleaved GB structure
-                total_energy = GB_energetics_df[GB_energetics_df["system"] == case].energy.values[0]
-                Wsep_RGS_list = [(row.energy - total_energy) * 16.02176565 / area for _, row in case_df.iterrows()]
+                total_energy = GB_energetics_df[
+                    GB_energetics_df["system"] == case
+                ].energy.values[0]
+                Wsep_RGS_list = [
+                    (row.energy - total_energy) * 16.02176565 / area
+                    for _, row in case_df.iterrows()
+                ]
                 Wsep_RGS = min(np.array(Wsep_RGS_list))
-                cp_list = [float(row.system.split(sep="-")[-1]) for _, row in case_df.iterrows()]
+                cp_list = [
+                    float(row.system.split(sep="-")[-1])
+                    for _, row in case_df.iterrows()
+                ]
             else:
                 Wsep_RGS = np.nan
                 Wsep_RGS_list = np.nan
@@ -169,13 +259,21 @@ def get_2sol_cohesion_Wseprel(GB, case):
             GB_energetics_df = self.GB_treated_dfs_dict[GB]
             csv_path = "%s\\%s\\info.csv" % (fp_Wsep2_rel, GB)
             df = pd.read_csv(csv_path)
-            df["base_system"] = ["-".join(row.system.split(sep = "-")[:-2]) for _, row in df.iterrows()]
+            df["base_system"] = [
+                "-".join(row.system.split(sep="-")[:-2]) for _, row in df.iterrows()
+            ]
 
             if case in df.base_system.values:
                 area = get_area("%s\\%s\\Co\\GB\\CONTCAR" % (fp_Seg1_path, GB))
                 # total energy of non-cleaved GB structure
-                total_energy = GB_energetics_df[GB_energetics_df["system"] == case].energy.values[0]
-                Wsep_rel = (df[df["base_system"] == case].energy.values[0] - total_energy) * 16.02176565 / area
+                total_energy = GB_energetics_df[
+                    GB_energetics_df["system"] == case
+                ].energy.values[0]
+                Wsep_rel = (
+                    (df[df["base_system"] == case].energy.values[0] - total_energy)
+                    * 16.02176565
+                    / area
+                )
             else:
                 Wsep_rel = np.nan
 
@@ -183,13 +281,20 @@ def get_2sol_cohesion_Wseprel(GB, case):
 
         def get_2sol_cohesion_BO(GB, case):
             csv_RGS_path = "%s\\%s\\%s\\info.csv" % (fp_Wsep2_rigid, GB, case)
-            if os.path.isfile(csv_RGS_path) and os.path.isfile("%s\\%s\\%s\\CONTCAR" % (fp_BO2, GB, case)):
+            if os.path.isfile(csv_RGS_path) and os.path.isfile(
+                "%s\\%s\\%s\\CONTCAR" % (fp_BO2, GB, case)
+            ):
                 case_df = pd.read_csv(csv_RGS_path)
-                cp_list = [float(row.system.split(sep="-")[-1]) for _, row in case_df.iterrows()]
-                min_bo_list, _ = cp_bondorder(structure_path = "%s\\%s\\%s\\CONTCAR" % (fp_BO2, GB, case),\
-                                        DDEC_output_path = "%s\\%s\\%s" % (fp_BO2, GB, case),\
-                                        cleavage_plane_array = cp_list,\
-                                        bo_threshold = 0)
+                cp_list = [
+                    float(row.system.split(sep="-")[-1])
+                    for _, row in case_df.iterrows()
+                ]
+                min_bo_list, _ = cp_bondorder(
+                    structure_path="%s\\%s\\%s\\CONTCAR" % (fp_BO2, GB, case),
+                    DDEC_output_path="%s\\%s\\%s" % (fp_BO2, GB, case),
+                    cleavage_plane_array=cp_list,
+                    bo_threshold=0,
+                )
                 min_bo_CP = cp_list[np.argmin(min_bo_list)]
                 min_bo = min(min_bo_list)
             else:
@@ -204,31 +309,55 @@ def get_2sol_cohesion_BO(GB, case):
             df_Krough_list = []
             for element in get_immediate_subdirectories(GB):
                 df_Krough = pd.read_csv("%s\\info.csv" % element)
-                df_Krough["distance"] = [get_dist_solutes("%s\\%s" % (element, row.system)) for i, row in df_Krough.iterrows()]
-                df_Krough["element"] = [row.system.split(sep="-")[0]
-                                        if row.system.split(sep="-")[0] != "P"
-                                        else row.system.split(sep="-")[-2]
-                                        for i, row in df_Krough.iterrows()]
+                df_Krough["distance"] = [
+                    get_dist_solutes("%s\\%s" % (element, row.system))
+                    for i, row in df_Krough.iterrows()
+                ]
+                df_Krough["element"] = [
+                    (
+                        row.system.split(sep="-")[0]
+                        if row.system.split(sep="-")[0] != "P"
+                        else row.system.split(sep="-")[-2]
+                    )
+                    for i, row in df_Krough.iterrows()
+                ]
                 df_Krough_list.append(df_Krough)
             df_Krough_all = pd.concat(df_Krough_list)
             # Re-organise into ordering agnostic blocks (e.g. P-X and X-P both fall under "X" calls to dict)
             df_Krough_list = []
             for element in df_Krough_all.element.unique():
-                df_Krough_list.append(df_Krough_all[df_Krough_all["element"] == element])
+                df_Krough_list.append(
+                    df_Krough_all[df_Krough_all["element"] == element]
+                )
             df_Krough_dict = dict(zip(df_Krough_all.element.unique(), df_Krough_list))
             dict_Krough_list.append(df_Krough_dict)
-        self.GB_Krough_df_dict = dict(zip([os.path.basename(GB) for GB in get_immediate_subdirectories(fp_Fpath_Krough)], dict_Krough_list))
+        self.GB_Krough_df_dict = dict(
+            zip(
+                [
+                    os.path.basename(GB)
+                    for GB in get_immediate_subdirectories(fp_Fpath_Krough)
+                ],
+                dict_Krough_list,
+            )
+        )
 
         dict_Kfine_list = []
         for GB in get_immediate_subdirectories(fp_Fpath_Kfine):
             df_Kfine_list = []
             for element in get_immediate_subdirectories(GB):
                 df_Kfine = pd.read_csv("%s\\info.csv" % element)
-                df_Kfine["distance"] = [get_dist_solutes("%s\\%s" % (element, row.system)) for i, row in df_Kfine.iterrows()]
-                df_Kfine["element"] = [row.system.split(sep="-")[0]
-                                        if row.system.split(sep="-")[0] != "P"
-                                        else row.system.split(sep="-")[-2]
-                                        for i, row in df_Kfine.iterrows()]
+                df_Kfine["distance"] = [
+                    get_dist_solutes("%s\\%s" % (element, row.system))
+                    for i, row in df_Kfine.iterrows()
+                ]
+                df_Kfine["element"] = [
+                    (
+                        row.system.split(sep="-")[0]
+                        if row.system.split(sep="-")[0] != "P"
+                        else row.system.split(sep="-")[-2]
+                    )
+                    for i, row in df_Kfine.iterrows()
+                ]
                 df_Kfine_list.append(df_Kfine)
             df_Kfine_all = pd.concat(df_Kfine_list)
 
@@ -237,7 +366,15 @@ def get_2sol_cohesion_BO(GB, case):
                 df_Kfine_list.append(df_Kfine_all[df_Kfine_all["element"] == element])
             df_Kfine_dict = dict(zip(df_Kfine_all.element.unique(), df_Kfine_list))
             dict_Kfine_list.append(df_Kfine_dict)
-        self.GB_Kfine_df_dict = dict(zip([os.path.basename(GB) for GB in get_immediate_subdirectories(fp_Fpath_Kfine)], dict_Kfine_list))
+        self.GB_Kfine_df_dict = dict(
+            zip(
+                [
+                    os.path.basename(GB)
+                    for GB in get_immediate_subdirectories(fp_Fpath_Kfine)
+                ],
+                dict_Kfine_list,
+            )
+        )
 
         df_all_list = []
         for GB in self.GB_Kfine_df_dict:
@@ -249,18 +386,24 @@ def get_2sol_cohesion_BO(GB, case):
                     corr_list = []
                     for i, case in df_Kf.iterrows():
                         if len(df_Kr[df_Kr["system"] == case.system]):
-                            energy_Kf = df_Kf[df_Kf["system"] == case.system].energy.values[0]
-                            energy_Kr = df_Kr[df_Kr["system"] == case.system].energy.values[0]
+                            energy_Kf = df_Kf[
+                                df_Kf["system"] == case.system
+                            ].energy.values[0]
+                            energy_Kr = df_Kr[
+                                df_Kr["system"] == case.system
+                            ].energy.values[0]
                             # E_kr + corr = E_kf
                             correction = energy_Kf - energy_Kr
                             corr_list.append(correction)
                         else:
                             corr_list.append(np.nan)
-                    df_Kr['energy'] = [row.energy + np.mean(corr_list) for i, row in df_Kr.iterrows()]
+                    df_Kr["energy"] = [
+                        row.energy + np.mean(corr_list) for i, row in df_Kr.iterrows()
+                    ]
                     df = df_Kf.append(df_Kr)
-                    #print(corr_list)
-                    #print("GB: %s, element: %s, corr_value: %.2f eV" % (GB, element, np.round(np.mean(corr_list),4)))
-                    df.drop_duplicates(subset=['system'], keep='first', inplace=True)
+                    # print(corr_list)
+                    # print("GB: %s, element: %s, corr_value: %.2f eV" % (GB, element, np.round(np.mean(corr_list),4)))
+                    df.drop_duplicates(subset=["system"], keep="first", inplace=True)
                     df.sort_values("system", inplace=True)
                 else:
                     df = self.GB_Kfine_df_dict[GB][element]
@@ -289,27 +432,41 @@ def get_2sol_cohesion_BO(GB, case):
             # Get the site number of solute 2
             df["site_2"] = [int(x.split(sep="-")[-1]) for x in df.system.values]
             # Get the Eseg in isolation of solute 1
-            df["E_seg_s1"] = [Segregation_1sol.get_Eseg(GB=GB, element=case.element_1, site=case.site_1) for i, case in df.iterrows()]
+            df["E_seg_s1"] = [
+                Segregation_1sol.get_Eseg(
+                    GB=GB, element=case.element_1, site=case.site_1
+                )
+                for i, case in df.iterrows()
+            ]
             # Get the Eseg in isolation of solute 2
-            df["E_seg_s2"] = [Segregation_1sol.get_Eseg(GB=GB, element=case.element_2, site=case.site_2) for i, case in df.iterrows()]
+            df["E_seg_s2"] = [
+                Segregation_1sol.get_Eseg(
+                    GB=GB, element=case.element_2, site=case.site_2
+                )
+                for i, case in df.iterrows()
+            ]
             if GB == "S11-RA110-S3-32":
                 GB_use = "S11-RA110-S3-32-2x2"
             else:
                 GB_use = GB
             # Get the energy of solution
-            df["solnE_s1"] = [get_solution_energy(GB_use, case.element_1) for i, case in df.iterrows()]
-            df["solnE_s2"] = [get_solution_energy(GB_use, case.element_2) for i, case in df.iterrows()]
+            df["solnE_s1"] = [
+                get_solution_energy(GB_use, case.element_1) for i, case in df.iterrows()
+            ]
+            df["solnE_s2"] = [
+                get_solution_energy(GB_use, case.element_2) for i, case in df.iterrows()
+            ]
             ## Get the energy of base solute structure in isolation of solute 1
             #### THIS NEEDS TO BE FIXED - TOTEN NEEDS TO BE THAT OF THE ORIGINAL SIZED GB (1x1 in S11, S9)
-            #conditional_timer = time.time()
+            # conditional_timer = time.time()
             E_base_list = []
             for i, case in df.iterrows():
                 if GB == "S9-RA110-S2-21" and case.system.split(sep="-")[0] == "P":
                     # -549.38781758 is the value for P-36-d-0.0 structure in the 1x1 S9 cell
                     E_base_list.append(-549.38781758)
                 else:
-                    E_base_list.append(get_totalenergy(GB, structure_name = case.system))
-            #print("%.2fs elapsed for checking conditional"% (time.time() - conditional_timer))
+                    E_base_list.append(get_totalenergy(GB, structure_name=case.system))
+            # print("%.2fs elapsed for checking conditional"% (time.time() - conditional_timer))
             df["E_base"] = E_base_list
             df["E_GB"] = get_totalenergy(GB)
             # Incremental energy of segregation
@@ -321,139 +478,215 @@ def get_2sol_cohesion_BO(GB, case):
             # Eseg_inc = 0
             df["E_seg_inc_c"] = [x if x < 0 else 0 for x in df.E_seg_inc.values]
             # Total energy of segregation
-            df["E_seg_total"] = df["energy"] - df["E_GB"] - df["solnE_s2"] - df["solnE_s1"]
+            df["E_seg_total"] = (
+                df["energy"] - df["E_GB"] - df["solnE_s2"] - df["solnE_s1"]
+            )
             # This only works if the E_int is NOT POSITIVE
             df["E_correction"] = [0 if x < 0 else -x for x in df.E_seg_inc.values]
             # total energy with correction if Eseg_inc > 0
             df["toten_c"] = df["energy"] + df["E_correction"]
             # E_correction for applying to total energy.
-            df["E_seg_total_c"] = df["energy"] - df["E_GB"] - df["solnE_s2"] - df["solnE_s1"] + df["E_correction"]
+            df["E_seg_total_c"] = (
+                df["energy"]
+                - df["E_GB"]
+                - df["solnE_s2"]
+                - df["solnE_s1"]
+                + df["E_correction"]
+            )
             # Interaction energy (If the first solute wasn't there, difference in energy)
             df["E_int"] = df["E_seg_inc"] - df["E_seg_s2"]
 
-        #         # corrected energy of interaction
-        #         df["E_int_c"] = [-float(get_1sol_EsegWithoutSite(os.path.basename(GB),x.element_2,x.site_1))\
-        #                            if x.E_seg_inc_c == 0\
-        #                            else x.E_seg_inc - x.E_seg_s2 for _, x in df.iterrows()]
-
-            #df_out = df[["system","E_seg_inc", "E_int", "distance", "convergence"]]\
-
-            df["Wsep_RGS"] = [get_2sol_cohesion_RGS(GB, row.system)[0] for _, row in df.iterrows()]
-            df["Wsep_RGS_list"] = [get_2sol_cohesion_RGS(GB, row.system)[1] for _, row in df.iterrows()]
-            df["cp_list"] = [get_2sol_cohesion_RGS(GB, row.system)[2] for _, row in df.iterrows()]
+            #         # corrected energy of interaction
+            #         df["E_int_c"] = [-float(get_1sol_EsegWithoutSite(os.path.basename(GB),x.element_2,x.site_1))\
+            #                            if x.E_seg_inc_c == 0\
+            #                            else x.E_seg_inc - x.E_seg_s2 for _, x in df.iterrows()]
+
+            # df_out = df[["system","E_seg_inc", "E_int", "distance", "convergence"]]\
+
+            df["Wsep_RGS"] = [
+                get_2sol_cohesion_RGS(GB, row.system)[0] for _, row in df.iterrows()
+            ]
+            df["Wsep_RGS_list"] = [
+                get_2sol_cohesion_RGS(GB, row.system)[1] for _, row in df.iterrows()
+            ]
+            df["cp_list"] = [
+                get_2sol_cohesion_RGS(GB, row.system)[2] for _, row in df.iterrows()
+            ]
             df["eta_RGS"] = df["Wsep_RGS"] - GB_pure_WsepRGS
-            df["eta_RGS_ele1"] = [get_1sol_cohesion_effect(GB, row.element_1, "eta_RGS", df = df_1sol) for _, row in df.iterrows()]
-            df["eta_RGS_ele2"] = [get_1sol_cohesion_effect(GB, row.element_2, "eta_RGS", df = df_1sol) for _, row in df.iterrows()]
+            df["eta_RGS_ele1"] = [
+                get_1sol_cohesion_effect(GB, row.element_1, "eta_RGS", df=df_1sol)
+                for _, row in df.iterrows()
+            ]
+            df["eta_RGS_ele2"] = [
+                get_1sol_cohesion_effect(GB, row.element_2, "eta_RGS", df=df_1sol)
+                for _, row in df.iterrows()
+            ]
             df["heur_eta_RGS"] = df["eta_RGS_ele1"] + df["eta_RGS_ele2"]
 
-            df["Wsep_rel"] = [get_2sol_cohesion_Wseprel(GB, row.system) for _, row in df.iterrows()]
+            df["Wsep_rel"] = [
+                get_2sol_cohesion_Wseprel(GB, row.system) for _, row in df.iterrows()
+            ]
             df["eta_rel"] = df["Wsep_rel"] - GB_pure_Wseprel
-            df["eta_rel_ele1"] = [get_1sol_cohesion_effect(GB, row.element_1, "eta_rel", df = df_1sol) for _, row in df.iterrows()]
-            df["eta_rel_ele2"] = [get_1sol_cohesion_effect(GB, row.element_2, "eta_rel", df = df_1sol) for _, row in df.iterrows()]
+            df["eta_rel_ele1"] = [
+                get_1sol_cohesion_effect(GB, row.element_1, "eta_rel", df=df_1sol)
+                for _, row in df.iterrows()
+            ]
+            df["eta_rel_ele2"] = [
+                get_1sol_cohesion_effect(GB, row.element_2, "eta_rel", df=df_1sol)
+                for _, row in df.iterrows()
+            ]
             df["heur_eta_rel"] = df["eta_rel_ele1"] + df["eta_rel_ele2"]
 
-            df["ANSBO"] = [get_2sol_cohesion_BO(GB, row.system)[0] if row.E_seg_inc < 0 else np.nan for _, row in df.iterrows()]
-            df["ANSBO_list"] = [get_2sol_cohesion_BO(GB, row.system)[1] if row.E_seg_inc < 0 else np.nan for _, row in df.iterrows() ]
-            df["ANSBO_min_cp"] = [get_2sol_cohesion_BO(GB, row.system)[2] if row.E_seg_inc < 0 else np.nan for _, row in df.iterrows()]
+            df["ANSBO"] = [
+                get_2sol_cohesion_BO(GB, row.system)[0] if row.E_seg_inc < 0 else np.nan
+                for _, row in df.iterrows()
+            ]
+            df["ANSBO_list"] = [
+                get_2sol_cohesion_BO(GB, row.system)[1] if row.E_seg_inc < 0 else np.nan
+                for _, row in df.iterrows()
+            ]
+            df["ANSBO_min_cp"] = [
+                get_2sol_cohesion_BO(GB, row.system)[2] if row.E_seg_inc < 0 else np.nan
+                for _, row in df.iterrows()
+            ]
             df["eta_ANSBO"] = df["ANSBO"] - GB_pure_ANSBO
-            df["eta_ANSBO_ele1"] = [get_1sol_cohesion_effect(GB, row.element_1, "eta_ANSBO", df = df_1sol) for _, row in df.iterrows()]
-            df["eta_ANSBO_ele2"] = [get_1sol_cohesion_effect(GB, row.element_2, "eta_ANSBO", df = df_1sol) for _, row in df.iterrows()]
+            df["eta_ANSBO_ele1"] = [
+                get_1sol_cohesion_effect(GB, row.element_1, "eta_ANSBO", df=df_1sol)
+                for _, row in df.iterrows()
+            ]
+            df["eta_ANSBO_ele2"] = [
+                get_1sol_cohesion_effect(GB, row.element_2, "eta_ANSBO", df=df_1sol)
+                for _, row in df.iterrows()
+            ]
             df["heur_eta_ANSBO"] = df["eta_ANSBO_ele1"] + df["eta_ANSBO_ele2"]
 
-            df["element"] = [row.system.split(sep="-")[0]
-                            if row.system.split(sep="-")[0] != "P"
-                            else row.system.split(sep="-")[-2]
-                            for i, row in df.iterrows()]
-            df_output = df.copy()[["system", "element_1", "site_1", "element_2", "site_2", "energy",\
-                                   "solnE_s1", "solnE_s2", "E_base", "E_GB",\
-                                   "E_seg_s1", "E_seg_s2", "E_seg_inc", "E_seg_total_c", "E_int", "distance", "element",\
-                                   "Wsep_RGS", "Wsep_rel", "ANSBO", "cp_list", "Wsep_RGS_list", "ANSBO_list"]]
-            #df = np.round(df, 3)
+            df["element"] = [
+                (
+                    row.system.split(sep="-")[0]
+                    if row.system.split(sep="-")[0] != "P"
+                    else row.system.split(sep="-")[-2]
+                )
+                for i, row in df.iterrows()
+            ]
+            df_output = df.copy()[
+                [
+                    "system",
+                    "element_1",
+                    "site_1",
+                    "element_2",
+                    "site_2",
+                    "energy",
+                    "solnE_s1",
+                    "solnE_s2",
+                    "E_base",
+                    "E_GB",
+                    "E_seg_s1",
+                    "E_seg_s2",
+                    "E_seg_inc",
+                    "E_seg_total_c",
+                    "E_int",
+                    "distance",
+                    "element",
+                    "Wsep_RGS",
+                    "Wsep_rel",
+                    "ANSBO",
+                    "cp_list",
+                    "Wsep_RGS_list",
+                    "ANSBO_list",
+                ]
+            ]
+            # df = np.round(df, 3)
             GB_df_list.append(df)
-            print("%.2fs elapsed for GB step"% (time.time() - GB_step_time))
+            print("%.2fs elapsed for GB step" % (time.time() - GB_step_time))
             if i == 0 and savefile:
-                df_output.to_excel("%s\\energetics_analysis.xlsx" % (os.getcwd()),\
-                                sheet_name = "%s" % (os.path.basename(GB)))
+                df_output.to_excel(
+                    "%s\\energetics_analysis.xlsx" % (os.getcwd()),
+                    sheet_name="%s" % (os.path.basename(GB)),
+                )
             else:
-                with pd.ExcelWriter("%s\\energetics_analysis.xlsx" % (os.getcwd()), mode="a", engine="openpyxl") as writer:
-                    df_output.to_excel(writer, sheet_name = "%s" % (os.path.basename(GB)))
+                with pd.ExcelWriter(
+                    "%s\\energetics_analysis.xlsx" % (os.getcwd()),
+                    mode="a",
+                    engine="openpyxl",
+                ) as writer:
+                    df_output.to_excel(writer, sheet_name="%s" % (os.path.basename(GB)))
         self.GB_energetics_dict = dict(zip(list_GB, GB_df_list))
 
 
-
-class SegregationEnergyData_1sol():
+class SegregationEnergyData_1sol:
     def __init__(self):
         ####################################################################################################
         # S3 S111
         studied_list = [20, 22, 24, 26, 28, 30, 32, 34, 36]
         # 0.5-1ML available
-        symmetry = [[21, 52, 53],\
-                     [23, 50, 51],\
-                     [25, 48, 49],\
-                     [27, 46, 47],\
-                     [29, 44, 45],\
-                     [31, 42, 43],\
-                     [33, 40, 41],\
-                     [35, 38, 39],\
-                     [37]]
+        symmetry = [
+            [21, 52, 53],
+            [23, 50, 51],
+            [25, 48, 49],
+            [27, 46, 47],
+            [29, 44, 45],
+            [31, 42, 43],
+            [33, 40, 41],
+            [35, 38, 39],
+            [37],
+        ]
         # When the site is on the GB plane, we don't need to calculate values on both sides
-        self.S3_RA110_S1_11_symmetrydict = dict(zip(studied_list,symmetry))
+        self.S3_RA110_S1_11_symmetrydict = dict(zip(studied_list, symmetry))
         ####################################################################################################
         # S3 S112
         studied_list = [12, 14, 16, 18, 20, 22, 24]
         # 0.5-1ML available
-        symmetry = [[13, 36, 37],\
-                     [15, 34, 35],\
-                     [17, 32, 33],\
-                     [19, 30, 31],\
-                     [21, 28, 29],\
-                     [23, 26, 27],\
-                     [25]]
+        symmetry = [
+            [13, 36, 37],
+            [15, 34, 35],
+            [17, 32, 33],
+            [19, 30, 31],
+            [21, 28, 29],
+            [23, 26, 27],
+            [25],
+        ]
         # When the site is on the GB plane, we don't need to calculate values on both sides
-        self.S3_RA110_S1_12_symmetrydict = dict(zip(studied_list,symmetry))
+        self.S3_RA110_S1_12_symmetrydict = dict(zip(studied_list, symmetry))
         ####################################################################################################
         # S9
         studied_list = [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]
         # only 0-1 ML available
-        symmetry = [[47],\
-                   [46],\
-                   [45],\
-                   [44],\
-                   [43],\
-                   [42],\
-                   [41],\
-                   [40],\
-                   [39],\
-                   [38],\
-                   [37],\
-                   [],\
-                   [],\
-                   []]
+        symmetry = [
+            [47],
+            [46],
+            [45],
+            [44],
+            [43],
+            [42],
+            [41],
+            [40],
+            [39],
+            [38],
+            [37],
+            [],
+            [],
+            [],
+        ]
         # When the site is on the GB plane, we don't need to calculate values on both sides
-        self.S9_RA110_S2_21_symmetrydict = dict(zip(studied_list,symmetry))
+        self.S9_RA110_S2_21_symmetrydict = dict(zip(studied_list, symmetry))
         ####################################################################################################
         # S11
         studied_list = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
         # only 0-1 ML available
-        symmetry = [[32],\
-                    [31],\
-                    [30],\
-                    [29],\
-                    [28],\
-                    [27],\
-                    [26],\
-                    [25],\
-                    [24],\
-                    [23],\
-                    [],\
-                    []]
+        symmetry = [[32], [31], [30], [29], [28], [27], [26], [25], [24], [23], [], []]
         # Full dictionary of solutes and sites
-        self.S11_RA110_S3_32_symmetrydict = dict(zip(studied_list,symmetry))
-        self.GB_sym_dict = dict(zip(list_GB,
-                                   [self.S11_RA110_S3_32_symmetrydict,
-                                    self.S3_RA110_S1_11_symmetrydict,
-                                    self.S3_RA110_S1_12_symmetrydict,
-                                    self.S9_RA110_S2_21_symmetrydict]))
+        self.S11_RA110_S3_32_symmetrydict = dict(zip(studied_list, symmetry))
+        self.GB_sym_dict = dict(
+            zip(
+                list_GB,
+                [
+                    self.S11_RA110_S3_32_symmetrydict,
+                    self.S3_RA110_S1_11_symmetrydict,
+                    self.S3_RA110_S1_12_symmetrydict,
+                    self.S9_RA110_S2_21_symmetrydict,
+                ],
+            )
+        )
         ####################################################################################################
         # Create a dictionary that may be accessed
         # dict[GB][element][site] that contains information on all sites
@@ -477,19 +710,19 @@ def __init__(self):
                 df = get_1sol_df("%s\\%s" % (GB_path, os.path.basename(element)))
 
                 sym_df = df.copy()
-                sym_df["base_site"] = [int(x) for x in sym_df['site']]
-                sym_df['site'] = [int(x.site) for i, x in sym_df.iterrows()]
+                sym_df["base_site"] = [int(x) for x in sym_df["site"]]
+                sym_df["site"] = [int(x.site) for i, x in sym_df.iterrows()]
 
                 for _, sites_calculated in sym_df.iterrows():
-                    for x in sym_dict[sites_calculated['base_site']]:
+                    for x in sym_dict[sites_calculated["base_site"]]:
                         sym_site = sites_calculated.copy()
 
-                        sym_sys = sym_site['system'].split(sep="-")
+                        sym_sys = sym_site["system"].split(sep="-")
                         sym_sys[1] = str(x)
                         sym_sys = "-".join(sym_sys)
-                        sym_site['system'] = sym_sys
+                        sym_site["system"] = sym_sys
 
-                        sym_site['site'] = int(x)
+                        sym_site["site"] = int(x)
                         sym_df = sym_df.append(sym_site)
 
                 ele_site_eseg_dict = dict(zip(sym_df.site, sym_df.E_seg.values))
@@ -509,87 +742,103 @@ def __init__(self):
         self.Eseg_dict = dict(zip(list_GB, Eseg_data_list))
         self.toten_dict = dict(zip(list_GB, toten_data_list))
 
-    def get_Eseg(self, GB, element, site, warning = True):
-        '''
+    def get_Eseg(self, GB, element, site, warning=True):
+        """
         Convenience value extractor for single solute segregation energy at specified GB, element, site
         Args:
         GB = string for GB (e.g. S11-RA110-S3-32)
         element = string for element (e.g. "W" for tungsten)
         site = integer for site (0-indexed)
-        '''
+        """
         if GB in self.Eseg_dict:
             if element in self.Eseg_dict[GB]:
                 if site in self.Eseg_dict[GB][element]:
                     E_seg = self.Eseg_dict[GB][element][site]
-                    #print(E_seg)
+                    # print(E_seg)
                 else:
                     E_seg = np.nan
                     if warning:
-                        print("No such site \"%s\" in element \"%s\" in %s dict: check site" % (site, element, GB))
+                        print(
+                            'No such site "%s" in element "%s" in %s dict: check site'
+                            % (site, element, GB)
+                        )
             else:
                 E_seg = np.nan
                 if warning:
-                    print("No such element \"%s\" in %s dict: check element" % (element, GB))
+                    print(
+                        'No such element "%s" in %s dict: check element' % (element, GB)
+                    )
         else:
             E_seg = np.nan
             if warning:
-                print("No such GB \"%s\" in dict: check GB string" % GB)
+                print('No such GB "%s" in dict: check GB string' % GB)
         return E_seg
 
-    def get_toten(self, GB, element, site, warning = True):
-        '''
+    def get_toten(self, GB, element, site, warning=True):
+        """
         Convenience value extractor for single solute total energy at specified GB, element, site
         Args:
         GB = string for GB (e.g. S11-RA110-S3-32)
         element = string for element (e.g. "W" for tungsten)
         site = integer for site (0-indexed)
-        '''
+        """
         if GB in self.toten_dict:
             if element in self.toten_dict[GB]:
                 if site in self.toten_dict[GB][element]:
                     toten = self.toten_dict[GB][element][site]
-                    #print(E_seg)
+                    # print(E_seg)
                 else:
                     toten = np.nan
                     if warning:
-                        print("No such site \"%s\" in element \"%s\" in %s dict: check site" % (site, element, GB))
+                        print(
+                            'No such site "%s" in element "%s" in %s dict: check site'
+                            % (site, element, GB)
+                        )
             else:
                 toten = np.nan
                 if warning:
-                    print("No such element \"%s\" in %s dict: check element" % (element, GB))
+                    print(
+                        'No such element "%s" in %s dict: check element' % (element, GB)
+                    )
         else:
             toten = np.nan
             if warning:
-                print("No such GB \"%s\" in dict: check GB string" % GB)
+                print('No such GB "%s" in dict: check GB string' % GB)
         return toten
 
-    def get_min_Eseg_without_site(self, GB, element, site, warning = True):
-        '''
+    def get_min_Eseg_without_site(self, GB, element, site, warning=True):
+        """
         Convenience value extractor for minimum single solute segregation energy at specified GB, element, without specified site
         Args:
         GB = string for GB (e.g. S11-RA110-S3-32)
         element = string for element (e.g. "W" for tungsten)
         site = integer for site to exclude (0-indexed)
-        '''
+        """
         if GB in self.Eseg_dict:
             if element in self.Eseg_dict[GB]:
                 if site in self.Eseg_dict[GB][element]:
                     min_Eseg = self.Eseg_dict[GB][element][site]
-                    #print(E_seg)
+                    # print(E_seg)
                 else:
                     min_Eseg = np.nan
                     if warning:
-                        print("No such site \"%s\" in element \"%s\" in %s dict: check site" % (site, element, GB))
+                        print(
+                            'No such site "%s" in element "%s" in %s dict: check site'
+                            % (site, element, GB)
+                        )
             else:
                 min_Eseg = np.nan
                 if warning:
-                    print("No such element \"%s\" in %s dict: check element" % (element, GB))
+                    print(
+                        'No such element "%s" in %s dict: check element' % (element, GB)
+                    )
         else:
             min_Eseg = np.nan
             if warning:
-                print("No such GB \"%s\" in dict: check GB string" % GB)
+                print('No such GB "%s" in dict: check GB string' % GB)
         return toten
 
+
 def get_dist_solutes(fp_2solutes):
     """
     Returns distance between pair of non-Fe solutes for a specified structure
@@ -599,12 +848,15 @@ def get_dist_solutes(fp_2solutes):
     """
     structure = Structure.from_file("%s\\CONTCAR" % (fp_2solutes))
     # get pair of non-Fe site idx
-    distance_pair = [i for i, site in enumerate(structure) if site.species_string != "Fe"]
+    distance_pair = [
+        i for i, site in enumerate(structure) if site.species_string != "Fe"
+    ]
     distance = structure[distance_pair[1]].distance(structure[distance_pair[0]])
 
     return distance
 
-def get_totalenergy(GB, structure_name = "GB"):
+
+def get_totalenergy(GB, structure_name="GB"):
     """
     Returns a total energy (eV) value for a specified GB (1 sol case)
 
@@ -615,30 +867,34 @@ def get_totalenergy(GB, structure_name = "GB"):
         df = pd.read_csv("%s\\%s\\Co\\info.csv" % (fp_Seg1_path, os.path.basename(GB)))
         E_GB = df[df["system"] == "GB"].energy.values[0]
     else:
-        structure_name = '-'.join(structure_name.split(sep="-")[0:-2])
-        element = structure_name.split(sep = "-")[0]
-        df = pd.read_csv("%s\\%s\\%s\\info.csv" % (fp_Seg1_path, os.path.basename(GB), element))
+        structure_name = "-".join(structure_name.split(sep="-")[0:-2])
+        element = structure_name.split(sep="-")[0]
+        df = pd.read_csv(
+            "%s\\%s\\%s\\info.csv" % (fp_Seg1_path, os.path.basename(GB), element)
+        )
         E_GB = df[df["system"] == structure_name].energy.values[0]
 
     return E_GB
 
 
-def get_1sol_df(folder_path, midpoint = 0.5000):
+def get_1sol_df(folder_path, midpoint=0.5000):
     """
     Returns a pandas dataframe containing segregation energy, voronoi volume, and magnetic moment profiles
     for all 1 solute cases in a specified GB folder: "folder_path"
     """
     results = pd.read_csv("%s\\info.csv" % folder_path)
     # Energy of the pure slab structure
-    E_slab = results.loc[results['system'] == "SLAB"]['energy'].values[0]
+    E_slab = results.loc[results["system"] == "SLAB"]["energy"].values[0]
     # Energy of the slab + 1 solute structure
-    E_slab_imp = results.loc[results['system'].str.contains('-SLAB-')]['energy'].values[0]
+    E_slab_imp = results.loc[results["system"].str.contains("-SLAB-")]["energy"].values[
+        0
+    ]
     # Energy of the pure GB
-    E_GB = results.loc[results['system'] == "GB"]['energy'].values[0]
+    E_GB = results.loc[results["system"] == "GB"]["energy"].values[0]
     # Drop any results related to pure GB and any slab structures
-    df = results.copy().loc[~results['system'].str.contains("SLAB|GB")]
+    df = results.copy().loc[~results["system"].str.contains("SLAB|GB")]
     # Calculate energy of segregation
-    df['E_seg'] = df['energy'] - E_GB - (E_slab_imp - E_slab)
+    df["E_seg"] = df["energy"] - E_GB - (E_slab_imp - E_slab)
     ## This section assigns a distance from GB for the solute
     distance_compiled = []
     magmom_list_compiled = []
@@ -647,15 +903,25 @@ def get_1sol_df(folder_path, midpoint = 0.5000):
     vvol_compiled = []
     for _, system in df.iterrows():
         # Read the CONTCAR structure in the folder
-        structure = Structure.from_file('%s\\%s\\CONTCAR' % (folder_path, system['system']))
+        structure = Structure.from_file(
+            "%s\\%s\\CONTCAR" % (folder_path, system["system"])
+        )
         # Get solute number in structure
-        solute_no = int(system['system'].split(sep = '-')[1:2][0])
-        solute_no = [i for i, site in enumerate(structure) if site.species_string != "Fe"][0]
+        solute_no = int(system["system"].split(sep="-")[1:2][0])
+        solute_no = [
+            i for i, site in enumerate(structure) if site.species_string != "Fe"
+        ][0]
         # Distance to GB plane (center plane frac z)
-        distance = abs(midpoint - structure[solute_no].frac_coords[-1])*structure.lattice.c
+        distance = (
+            abs(midpoint - structure[solute_no].frac_coords[-1]) * structure.lattice.c
+        )
         distance_compiled.append(distance)
         # Magnetic moment
-        magmom_df = pd.read_csv("%s\\%s\\magnet.csv" % (folder_path, system['system']), delim_whitespace=True, header=None)[[1,2,3,4]]
+        magmom_df = pd.read_csv(
+            "%s\\%s\\magnet.csv" % (folder_path, system["system"]),
+            delim_whitespace=True,
+            header=None,
+        )[[1, 2, 3, 4]]
         ## magnetic moment list
         magmom_list = list(magmom_df[4].values)
         magmom_list_compiled.append(magmom_list)
@@ -663,29 +929,30 @@ def get_1sol_df(folder_path, midpoint = 0.5000):
         magmom = magmom_list[solute_no]
         magmom_compiled.append(magmom)
         # Voronoi volume
-        vvol_df = pd.read_excel("%s\\%s\\vvol.xlsx" % (folder_path, system['system']))
+        vvol_df = pd.read_excel("%s\\%s\\vvol.xlsx" % (folder_path, system["system"]))
         ## Voronoi volume list
         vvol_list = json.loads(vvol_df.iloc[1].values[1])
         vvol_list_compiled.append(vvol_list)
         ## Voronoi volume of non Fe solute
         vvol = vvol_list[solute_no]
         vvol_compiled.append(vvol)
-    df['distance_GB'] = distance_compiled
-    df['magmom'] = magmom_compiled
-    df['magmom_list'] = magmom_list_compiled
-    df['vvol'] = vvol_compiled
-    df['vvol_list'] = vvol_list_compiled
-    df['d'] = [row.system.split(sep='-')[-1] for i, row in df.iterrows()]
-    df['site'] = [row.system.split(sep='-')[1] for i, row in df.iterrows()]
+    df["distance_GB"] = distance_compiled
+    df["magmom"] = magmom_compiled
+    df["magmom_list"] = magmom_list_compiled
+    df["vvol"] = vvol_compiled
+    df["vvol_list"] = vvol_list_compiled
+    df["d"] = [row.system.split(sep="-")[-1] for i, row in df.iterrows()]
+    df["site"] = [row.system.split(sep="-")[1] for i, row in df.iterrows()]
 
     return df
 
 
 def get_area(path):
     structure = Structure.from_file(path)
-    area = structure.volume/structure.lattice.c
+    area = structure.volume / structure.lattice.c
     return area
 
+
 def get_1sol_etarel_cohesion_df(folder_path):
     """
     Returns Wsep_rel df for all elements in a GB folder
@@ -704,11 +971,26 @@ def getEnergyFromData_1sol(GB_name, element, site):
             # To fix the segregation profile.
             # Obviously, you can't compute a work of separation comparing 2x1 GB to 1x1 energies.
             # So, I am adding this exception.
-            data_specified_path = "%s\\%s\\%s\\info-old.csv" % (fp_Seg1_path, GB_name, element)
+            data_specified_path = "%s\\%s\\%s\\info-old.csv" % (
+                fp_Seg1_path,
+                GB_name,
+                element,
+            )
         else:
-            data_specified_path = "%s\\%s\\%s\\info.csv" % (fp_Seg1_path, GB_name, element)
+            data_specified_path = "%s\\%s\\%s\\info.csv" % (
+                fp_Seg1_path,
+                GB_name,
+                element,
+            )
         df = pd.read_csv(data_specified_path)
-        df['solute_no'] = [int(row.system.split(sep="-")[1]) if not any(slabgb in row.system for slabgb in ["SLAB", "GB"]) else "NaN" for _, row in df.iterrows()]
+        df["solute_no"] = [
+            (
+                int(row.system.split(sep="-")[1])
+                if not any(slabgb in row.system for slabgb in ["SLAB", "GB"])
+                else "NaN"
+            )
+            for _, row in df.iterrows()
+        ]
         if element == "GB":
             # listen here you little shit
             energy = df.loc[df["system"] == "GB"].energy.values[0]
@@ -717,16 +999,30 @@ def getEnergyFromData_1sol(GB_name, element, site):
 
         return energy
 
-    df = pd.read_csv('%s\\info.csv' % folder_path)
-    df["area"] = [get_area("%s\\%s\\CONTCAR" % (folder_path, cleave_case.system)) for _, cleave_case in df.iterrows()]
-    df['element'] = [row.system.split(sep="-")[0] for _, row in df.iterrows()]
-    df["site"] = [int(row.system.split(sep="-")[1]) if "GB" not in row.system else "GB" for _, row in df.iterrows()]
-    df["GB_energy"] = [getEnergyFromData_1sol(os.path.basename(folder_path), row.element, row.site) for _, row in df.iterrows()]
-    df["Wsep_rel"] = np.round((df['energy'] - df["GB_energy"]) * 16.02176565 / (df['area']), 3)
-    df["system_base"] = ["-".join(row.system.split(sep="-")[0:-2]) for _, row in df.iterrows()]
+    df = pd.read_csv("%s\\info.csv" % folder_path)
+    df["area"] = [
+        get_area("%s\\%s\\CONTCAR" % (folder_path, cleave_case.system))
+        for _, cleave_case in df.iterrows()
+    ]
+    df["element"] = [row.system.split(sep="-")[0] for _, row in df.iterrows()]
+    df["site"] = [
+        int(row.system.split(sep="-")[1]) if "GB" not in row.system else "GB"
+        for _, row in df.iterrows()
+    ]
+    df["GB_energy"] = [
+        getEnergyFromData_1sol(os.path.basename(folder_path), row.element, row.site)
+        for _, row in df.iterrows()
+    ]
+    df["Wsep_rel"] = np.round(
+        (df["energy"] - df["GB_energy"]) * 16.02176565 / (df["area"]), 3
+    )
+    df["system_base"] = [
+        "-".join(row.system.split(sep="-")[0:-2]) for _, row in df.iterrows()
+    ]
 
     return df
 
+
 def get_1sol_etarigid_cohesion_df(folder_path):
     """
     Returns Wsep_RGS values for all elements in a GB folder
@@ -736,71 +1032,124 @@ def get_1sol_etarigid_cohesion_df(folder_path):
     """
     case_df_list = []
     for cases in get_immediate_subdirectories(folder_path):
-        #print(cases)
-        results = pd.read_csv('%s\\info.csv' % cases)
-        GB_energy = results[results['system'] == os.path.basename(cases)]['energy'].values[0]
-        results["area"] = [get_area("%s\\%s\\CONTCAR" % (cases, cleave_case.system)) for _, cleave_case in results.iterrows()]
-        results["Wsep"] = (results['energy'] - GB_energy) * 16.02176565 / (results['area'])
-        #print(results.loc[0::, ['system','Wsep']])
+        # print(cases)
+        results = pd.read_csv("%s\\info.csv" % cases)
+        GB_energy = results[results["system"] == os.path.basename(cases)][
+            "energy"
+        ].values[0]
+        results["area"] = [
+            get_area("%s\\%s\\CONTCAR" % (cases, cleave_case.system))
+            for _, cleave_case in results.iterrows()
+        ]
+        results["Wsep"] = (
+            (results["energy"] - GB_energy) * 16.02176565 / (results["area"])
+        )
+        # print(results.loc[0::, ['system','Wsep']])
         df = results[results["Wsep"] > 0.0001]
         new_df = df.copy()
-        new_df["cleavage_plane_name"] = [x.split(sep="-")[-2] for x in df['system'].values]
-        new_df["cleavage_plane"] = [float(x.split(sep="-")[-1]) for x in df['system'].values]
+        new_df["cleavage_plane_name"] = [
+            x.split(sep="-")[-2] for x in df["system"].values
+        ]
+        new_df["cleavage_plane"] = [
+            float(x.split(sep="-")[-1]) for x in df["system"].values
+        ]
         # Create a single row DF with column names assigned cleavage plane values
         case_df = pd.DataFrame(new_df["Wsep"].values).transpose()
         case_df.columns = new_df["cleavage_plane_name"].values
-        case_df['cleavage_planes'] = [new_df["cleavage_plane"].values]
-        case_df['cp_names'] = [new_df["cleavage_plane_name"].values]
-        case_df['system'] = os.path.basename(cases)
-        case_df['Wsep_RGS_list'] = [np.round(new_df["Wsep"].values,3)]
+        case_df["cleavage_planes"] = [new_df["cleavage_plane"].values]
+        case_df["cp_names"] = [new_df["cleavage_plane_name"].values]
+        case_df["system"] = os.path.basename(cases)
+        case_df["Wsep_RGS_list"] = [np.round(new_df["Wsep"].values, 3)]
         case_df_list.append(case_df)
-    GB_df = np.round(pd.concat(case_df_list),2)
-    GB_df['Wsep_RGS'] = [np.round(min(wsep_lists),3) for wsep_lists in GB_df['Wsep_RGS_list']]
-    GB_df['min_cp_name'] = [row.cp_names[np.argmin(row['Wsep_RGS_list'])] for _, row in GB_df.iterrows()]
-    GB_df['min_cp'] = [row.cleavage_planes[np.argmin(row['Wsep_RGS_list'])] for _, row in GB_df.iterrows()]
+    GB_df = np.round(pd.concat(case_df_list), 2)
+    GB_df["Wsep_RGS"] = [
+        np.round(min(wsep_lists), 3) for wsep_lists in GB_df["Wsep_RGS_list"]
+    ]
+    GB_df["min_cp_name"] = [
+        row.cp_names[np.argmin(row["Wsep_RGS_list"])] for _, row in GB_df.iterrows()
+    ]
+    GB_df["min_cp"] = [
+        row.cleavage_planes[np.argmin(row["Wsep_RGS_list"])]
+        for _, row in GB_df.iterrows()
+    ]
     return GB_df
 
-def cp_bondorder(structure_path = "%s\\CONTCAR" % os.getcwd(),\
-                 DDEC_output_path = "%s" % os.getcwd(),\
-                 cleavage_plane_array = [0.5],\
-                 bo_threshold = 0.0):
+
+def cp_bondorder(
+    structure_path="%s\\CONTCAR" % os.getcwd(),
+    DDEC_output_path="%s" % os.getcwd(),
+    cleavage_plane_array=[0.5],
+    bo_threshold=0.0,
+):
     # This function calculates the bond order sum and returns a value, given a structure and chargemol output path
     # Read the DDEC Output and convert it into a csv temp file
     structure = Structure.from_file(structure_path)
-    VASPDDEC_2_CSV("%s\\VASP_DDEC_analysis.output" % DDEC_output_path,"%s\\chargemol.csv" % os.getcwd())
-    chargemol_data = pd.read_csv("%s\\chargemol.csv" % os.getcwd(), delim_whitespace=True)
+    VASPDDEC_2_CSV(
+        "%s\\VASP_DDEC_analysis.output" % DDEC_output_path,
+        "%s\\chargemol.csv" % os.getcwd(),
+    )
+    chargemol_data = pd.read_csv(
+        "%s\\chargemol.csv" % os.getcwd(), delim_whitespace=True
+    )
     chargemol_data = chargemol_data[chargemol_data["final_bond_order"] > bo_threshold]
-    bond_data = chargemol_data.copy()[['atom1','atom2','repeata','repeatb','final_bond_order']]
+    bond_data = chargemol_data.copy()[
+        ["atom1", "atom2", "repeata", "repeatb", "final_bond_order"]
+    ]
     # -1 because chargemol begins indexing at 1, equivalent to structure[0]
-    bond_data['atom1pos'] = [structure[x-1].frac_coords for x in bond_data['atom1'].values]
-    bond_data['atom2pos'] = [structure[x-1].frac_coords for x in bond_data['atom2'].values]
+    bond_data["atom1pos"] = [
+        structure[x - 1].frac_coords for x in bond_data["atom1"].values
+    ]
+    bond_data["atom2pos"] = [
+        structure[x - 1].frac_coords for x in bond_data["atom2"].values
+    ]
     # zpos fractional
-    bond_data['atom1zpos'] = [structure[x-1].frac_coords[-1] for x in bond_data['atom1'].values]
-    bond_data['atom2zpos'] = [structure[x-1].frac_coords[-1] for x in bond_data['atom2'].values]
+    bond_data["atom1zpos"] = [
+        structure[x - 1].frac_coords[-1] for x in bond_data["atom1"].values
+    ]
+    bond_data["atom2zpos"] = [
+        structure[x - 1].frac_coords[-1] for x in bond_data["atom2"].values
+    ]
     # zpos fractional
-    bond_data['atom1_ele'] = [structure[x-1].species_string for x in bond_data['atom1'].values]
-    bond_data['atom2_ele'] = [structure[x-1].species_string for x in bond_data['atom2'].values]
-    cp_bo = []; clp_df_list = []
+    bond_data["atom1_ele"] = [
+        structure[x - 1].species_string for x in bond_data["atom1"].values
+    ]
+    bond_data["atom2_ele"] = [
+        structure[x - 1].species_string for x in bond_data["atom2"].values
+    ]
+    cp_bo = []
+    clp_df_list = []
     for cp in cleavage_plane_array:
         # cleavage plane and find the sum of bond orders passing through it
-        clp_df = bond_data[(bond_data[['atom2zpos','atom1zpos']].max(axis=1) > cp)
-                         & (bond_data[['atom2zpos','atom1zpos']].min(axis=1) < cp) ]
+        clp_df = bond_data[
+            (bond_data[["atom2zpos", "atom1zpos"]].max(axis=1) > cp)
+            & (bond_data[["atom2zpos", "atom1zpos"]].min(axis=1) < cp)
+        ]
         clp_df = clp_df.copy()[(clp_df["repeata"] == 0) | (clp_df["repeatb"] == 0)]
         # We only want to calculate for atoms that exist  cell. This is important for bond order/area normalisation
-        clp_df_countonce = clp_df.copy()[(clp_df["repeata"] == 0) & (clp_df["repeatb"] == 0)]
-        clp_df_counthalf = clp_df.copy()[(clp_df["repeata"] != 0) | (clp_df["repeatb"] != 0)]
+        clp_df_countonce = clp_df.copy()[
+            (clp_df["repeata"] == 0) & (clp_df["repeatb"] == 0)
+        ]
+        clp_df_counthalf = clp_df.copy()[
+            (clp_df["repeata"] != 0) | (clp_df["repeatb"] != 0)
+        ]
         # Basic summed bond order over CP
-        final_bond_order = clp_df_countonce.final_bond_order.sum() + 0.5*clp_df_counthalf.final_bond_order.sum()
+        final_bond_order = (
+            clp_df_countonce.final_bond_order.sum()
+            + 0.5 * clp_df_counthalf.final_bond_order.sum()
+        )
         # N largest
-        #final_bond_order = clp_df.nlargest(15, ['final_bond_order'])["final_bond_order"].sum()
+        # final_bond_order = clp_df.nlargest(15, ['final_bond_order'])["final_bond_order"].sum()
         # IMPORTANT: This assumes that the cross sectional area can be calculated this way
-        a_fbo = final_bond_order/(float(structure.lattice.volume)/float(structure.lattice.c))
-        #print("area of this is %s" % (float(structure.lattice.volume)/float(structure.lattice.c)))
+        a_fbo = final_bond_order / (
+            float(structure.lattice.volume) / float(structure.lattice.c)
+        )
+        # print("area of this is %s" % (float(structure.lattice.volume)/float(structure.lattice.c)))
         cp_bo.append(a_fbo)
         clp_df_list.append(clp_df)
     return cp_bo, clp_df_list
 
-def VASPDDEC_2_CSV( filename, output_filename ):
+
+def VASPDDEC_2_CSV(filename, output_filename):
     flist = open(filename).readlines()
     parsing = False
     matrix = []
@@ -809,18 +1158,21 @@ def VASPDDEC_2_CSV( filename, output_filename ):
             parsing = False
         if parsing:
             matrix.append(line)
-            #print(line)
+            # print(line)
         if "The final bond pair matrix is" in line:
             parsing = True
-    f=open(output_filename,'w')
-    f.write("atom1 atom2 repeata repeatb repeatc " + \
-            "min-na max-na min-nb max-nb min-nc max-nc contact-exchange avg-spin-pol-bonding-term overlap-population " + \
-            "isoaepfcbo coord-term-tanh pairwise-term exp-term-comb-coord-pairwise " + \
-            "bond-idx-before-self-exch final_bond_order \n")
+    f = open(output_filename, "w")
+    f.write(
+        "atom1 atom2 repeata repeatb repeatc "
+        + "min-na max-na min-nb max-nb min-nc max-nc contact-exchange avg-spin-pol-bonding-term overlap-population "
+        + "isoaepfcbo coord-term-tanh pairwise-term exp-term-comb-coord-pairwise "
+        + "bond-idx-before-self-exch final_bond_order \n"
+    )
     for bond in matrix:
         f.write(bond)
     f.close()
 
+
 def get_1sol_site_SBO(GB_path):
     """
     Returns summed bond order (DDEC6) for a single-solute case
@@ -830,14 +1182,17 @@ def get_1sol_site_SBO(GB_path):
     GB_path = directory path to GB
     """
     structure = Structure.from_file("%s\\CONTCAR" % GB_path)
-    solute_no = [i for i, site in enumerate(structure) if site.species_string != "Fe"][0]
+    solute_no = [i for i, site in enumerate(structure) if site.species_string != "Fe"][
+        0
+    ]
     BO_dict = get_BondOrderInfo(GB_path)
-    SBO = BO_dict[solute_no]['bond_order_sum']
+    SBO = BO_dict[solute_no]["bond_order_sum"]
     atoms_bond_array = []
-    for i in BO_dict[solute_no]['bonded_to']:
-        atoms_bond_array.append(i['index'])
+    for i in BO_dict[solute_no]["bonded_to"]:
+        atoms_bond_array.append(i["index"])
     return SBO, atoms_bond_array, solute_no
 
+
 def get_BondOrderInfo(filename):
     """
     Internal command to process pairwise bond order information
@@ -875,6 +1230,7 @@ def get_BondOrderInfo(filename):
 
     return bond_order_info
 
+
 def get_site_SBO(filename, site):
     """
     Internal command to process pairwise bond order information
@@ -909,12 +1265,13 @@ def get_site_SBO(filename, site):
                 )
             elif "The sum of bond orders for this atom" in line:
                 bond_order_info[start_idx]["bond_order_sum"] = float(l[-1])
-#     site_sbo = 0
-#     for j in bond_order_info[site]['bonded_to']:
-#         site_sbo += j['bond_order']
-    site_sbo = bond_order_info[site]['bond_order_sum']
+    #     site_sbo = 0
+    #     for j in bond_order_info[site]['bonded_to']:
+    #         site_sbo += j['bond_order']
+    site_sbo = bond_order_info[site]["bond_order_sum"]
     return site_sbo
 
+
 def get_solution_energy(GB, element):
     """
     Returns solution energy for an element of a single-solute case in eV
@@ -925,14 +1282,17 @@ def get_solution_energy(GB, element):
     """
     results = pd.read_csv("%s\\%s\\%s\\info.csv" % (fp_Seg1_path, GB, element))
     # Energy of the pure slab structure
-    E_slab = results.loc[results['system'] == "SLAB"]['energy'].values[0]
+    E_slab = results.loc[results["system"] == "SLAB"]["energy"].values[0]
     # Energy of the slab + 1 solute structure
-    E_slab_imp = results.loc[results['system'].str.contains('-SLAB-')]['energy'].values[0]
+    E_slab_imp = results.loc[results["system"].str.contains("-SLAB-")]["energy"].values[
+        0
+    ]
     # Energy of solution
     solution_energy = E_slab_imp - E_slab
 
     return solution_energy
 
+
 def get_1sol_cohesion_summary(GB_string):
     """
     Returns a cohesion-energy of segregation summary df that is used for generating
@@ -940,42 +1300,56 @@ def get_1sol_cohesion_summary(GB_string):
     """
     RGS_1sol_df = get_1sol_etarigid_cohesion_df("%s\\%s" % (fp_Wsep1_rigid, GB_string))
     Wsep_rel_1sol_df = get_1sol_etarel_cohesion_df("%s\\%s" % (fp_Wsep1_rel, GB_string))
-    rig_df_merge = RGS_1sol_df.copy()[["system", "Wsep_RGS", "cleavage_planes", "Wsep_RGS_list"]]
+    rig_df_merge = RGS_1sol_df.copy()[
+        ["system", "Wsep_RGS", "cleavage_planes", "Wsep_RGS_list"]
+    ]
     rel_df_merge = Wsep_rel_1sol_df.copy()[["system_base", "Wsep_rel"]]
-    rel_df_merge = rel_df_merge.rename(columns = {"system_base" : "system"})
+    rel_df_merge = rel_df_merge.rename(columns={"system_base": "system"})
     df = pd.merge(rig_df_merge, rel_df_merge)
-    df["eta_RGS"] = [np.round(row.Wsep_RGS - df.loc[df['system'] == "GB"].Wsep_RGS.values[0],2) \
-                         for _, row in df.iterrows()]
-    df["eta_rel"] = [np.round(row.Wsep_rel - df.loc[df['system'] == "GB"].Wsep_rel.values[0],2) \
-                         for _, row in df.iterrows()]
-    df['d_eta'] = df["eta_rel"] - df["eta_RGS"]
-    df["eta_rel_pct"] = (( df["eta_rel"] * 100) \
-                            / df.loc[df['system'] == "GB"].Wsep_rel.values[0])
-    df["eta_RGS_pct"] = (( df["eta_RGS"] * 100 ) \
-                            / df.loc[df['system'] == "GB"].Wsep_RGS.values[0])
+    df["eta_RGS"] = [
+        np.round(row.Wsep_RGS - df.loc[df["system"] == "GB"].Wsep_RGS.values[0], 2)
+        for _, row in df.iterrows()
+    ]
+    df["eta_rel"] = [
+        np.round(row.Wsep_rel - df.loc[df["system"] == "GB"].Wsep_rel.values[0], 2)
+        for _, row in df.iterrows()
+    ]
+    df["d_eta"] = df["eta_rel"] - df["eta_RGS"]
+    df["eta_rel_pct"] = (df["eta_rel"] * 100) / df.loc[
+        df["system"] == "GB"
+    ].Wsep_rel.values[0]
+    df["eta_RGS_pct"] = (df["eta_RGS"] * 100) / df.loc[
+        df["system"] == "GB"
+    ].Wsep_RGS.values[0]
     df["element"] = [x.system.split(sep="-")[0] for _, x in df.iterrows()]
 
-    min_bo = []; bo_df_list = []; bo_array_list = []
+    min_bo = []
+    bo_df_list = []
+    bo_array_list = []
     for idx, row in RGS_1sol_df.iterrows():
-        cp_array = row['cleavage_planes']
-        system = row['system']
-        bo_array, bodf_list = cp_bondorder(structure_path = "%s\\%s\\%s\\CONTCAR" %
-                                             (fp_BO1, GB_string, system),\
-                                             DDEC_output_path = "%s\\%s\\%s"
-                                             % (fp_BO1, GB_string, system),\
-                                             cleavage_plane_array = cp_array,\
-                                             bo_threshold = 0)
+        cp_array = row["cleavage_planes"]
+        system = row["system"]
+        bo_array, bodf_list = cp_bondorder(
+            structure_path="%s\\%s\\%s\\CONTCAR" % (fp_BO1, GB_string, system),
+            DDEC_output_path="%s\\%s\\%s" % (fp_BO1, GB_string, system),
+            cleavage_plane_array=cp_array,
+            bo_threshold=0,
+        )
         bo_array_list.append(bo_array)
         min_bo.append(min(bo_array))
 
-        bo_df = bodf_list[np.argmin(bo_array)][["atom1_ele", "atom2_ele", "atom1", "atom2", "final_bond_order"]]
+        bo_df = bodf_list[np.argmin(bo_array)][
+            ["atom1_ele", "atom2_ele", "atom1", "atom2", "final_bond_order"]
+        ]
         bo_df = bo_df[bo_df["final_bond_order"] > 0.01]
         bo_df_list.append(bo_df)
 
-    df['ANSBO'] = min_bo
-    df["eta_ANSBO"] = [row.ANSBO - df.loc[df['system'] == "GB"].ANSBO.values[0] \
-                         for _, row in df.iterrows()]
-    df['bond_df'] = bo_df_list
+    df["ANSBO"] = min_bo
+    df["eta_ANSBO"] = [
+        row.ANSBO - df.loc[df["system"] == "GB"].ANSBO.values[0]
+        for _, row in df.iterrows()
+    ]
+    df["bond_df"] = bo_df_list
     eseg_list = []
     # Section that extracts information about summed bond orders
     SBO_list = []
@@ -986,14 +1360,16 @@ def get_1sol_cohesion_summary(GB_string):
             pGB_SBO = np.nan
         else:
             SBO = get_1sol_site_SBO("%s\\%s\\%s" % (fp_BO1, GB_string, row.system))[0]
-            solute_no = get_1sol_site_SBO("%s\\%s\\%s" % (fp_BO1, GB_string, row.system))[2]
+            solute_no = get_1sol_site_SBO(
+                "%s\\%s\\%s" % (fp_BO1, GB_string, row.system)
+            )[2]
             pGB_SBO = get_site_SBO("%s\\%s\\%s" % (fp_BO1, GB_string, "GB"), solute_no)
-            #print("%s\\%s\\%s" % (fp_BO1, GB_string, row.system), solute_no)
+            # print("%s\\%s\\%s" % (fp_BO1, GB_string, row.system), solute_no)
         SBO_list.append(SBO)
         pGB_SBO_list.append(pGB_SBO)
-    df['site_SBO'] = SBO_list
-    df['site_pGB_SBO'] = pGB_SBO_list
-    df['site_SBO_delta'] = df['site_SBO'] - df['site_pGB_SBO']
+    df["site_SBO"] = SBO_list
+    df["site_pGB_SBO"] = pGB_SBO_list
+    df["site_SBO_delta"] = df["site_SBO"] - df["site_pGB_SBO"]
     # Exception in the case of the S11-RA110-S3-32 case:
     # Take the segregation energies from the 2x2 cell instead of the 1x1 cell
     # This was done since interface reconstruction occurs heavily in the 1x1 cell
@@ -1001,11 +1377,14 @@ def get_1sol_cohesion_summary(GB_string):
         GB_string = "S11-RA110-S3-32-2x2"
     for _, row in df.iterrows():
         if row.system.split(sep="-")[0] != "GB":
-            #print(row.system.split(sep="-")[0])
-            eseg_df = get_1sol_df("%s\\%s\\%s" %
-                                (fp_Seg1_path, GB_string, row.system.split(sep="-")[0]),
-                                midpoint = 0.5094)
-            eseg = np.round(eseg_df.loc[eseg_df["system"] == row.system].E_seg.values[0],3)
+            # print(row.system.split(sep="-")[0])
+            eseg_df = get_1sol_df(
+                "%s\\%s\\%s" % (fp_Seg1_path, GB_string, row.system.split(sep="-")[0]),
+                midpoint=0.5094,
+            )
+            eseg = np.round(
+                eseg_df.loc[eseg_df["system"] == row.system].E_seg.values[0], 3
+            )
         else:
             eseg = np.nan
         eseg_list.append(eseg)
diff --git a/utils/chargemol.py b/utils/chargemol.py
index 5da8ba1..c4e25c6 100644
--- a/utils/chargemol.py
+++ b/utils/chargemol.py
@@ -1,8 +1,9 @@
-
 import os
 
 from pymatgen.core import Structure, Element
-from pymatgen.command_line.chargemol_caller import ChargemolAnalysis as PMGChargemolAnalysis
+from pymatgen.command_line.chargemol_caller import (
+    ChargemolAnalysis as PMGChargemolAnalysis,
+)
 
 import pandas as pd
 import numpy as np
@@ -13,7 +14,8 @@
 import matplotlib.pyplot as plt
 import matplotlib.ticker as ticker
 
-import time 
+import time
+
 
 def get_stats(property_list, property_str):
     """
@@ -40,27 +42,51 @@ def get_stats(property_list, property_str):
         f"{property_str}_max": np.max(property_list),
     }
 
-def check_chargemol_output_present(directory,\
-                                   required_files = ["DDEC6_even_tempered_atomic_spin_moments.xyz",\
-                                                     "DDEC6_even_tempered_net_atomic_charges.xyz",\
-                                                     "DDEC_atomic_Rfourth_moments.xyz",\
-                                                     "overlap_populations.xyz",\
-                                                     "DDEC6_even_tempered_bond_orders.xyz",\
-                                                     "DDEC_atomic_Rcubed_moments.xyz",\
-                                                     "DDEC_atomic_Rsquared_moments.xyz",\
-                                                     "POTCAR"]):
-    missing_files = [file for file in required_files if not os.path.exists(os.path.join(directory, file))]
+
+def check_chargemol_output_present(
+    directory,
+    required_files=[
+        "DDEC6_even_tempered_atomic_spin_moments.xyz",
+        "DDEC6_even_tempered_net_atomic_charges.xyz",
+        "DDEC_atomic_Rfourth_moments.xyz",
+        "overlap_populations.xyz",
+        "DDEC6_even_tempered_bond_orders.xyz",
+        "DDEC_atomic_Rcubed_moments.xyz",
+        "DDEC_atomic_Rsquared_moments.xyz",
+        "POTCAR",
+    ],
+):
+    missing_files = [
+        file
+        for file in required_files
+        if not os.path.exists(os.path.join(directory, file))
+    ]
     if missing_files:
         return False
     else:
         return True  # All required files are present
-    
+
+
 def summarise_DDEC_data(directory, bond_order_threshold=0.05):
     if not check_chargemol_output_present(directory):
         # Some files are missing, return a DataFrame with NaN values and the filepath
-        columns = ["bond_order_std", "bond_order_mean", "bond_order_min", "bond_order_max", "n_bonds",
-                    "element", "bond_order_sums", "ddec_charges", "cm5_charges", "ddec_rcubed_moments",
-                    "ddec_rfourth_moments", "ddec_spin_moments", "dipoles", "charge_transfer", "partial_charge"]
+        columns = [
+            "bond_order_std",
+            "bond_order_mean",
+            "bond_order_min",
+            "bond_order_max",
+            "n_bonds",
+            "element",
+            "bond_order_sums",
+            "ddec_charges",
+            "cm5_charges",
+            "ddec_rcubed_moments",
+            "ddec_rfourth_moments",
+            "ddec_spin_moments",
+            "dipoles",
+            "charge_transfer",
+            "partial_charge",
+        ]
         empty_data = [[np.nan] * len(columns)]
         ddec_df = pd.DataFrame(empty_data, columns=columns)
         ddec_df["filepath"] = directory
@@ -74,18 +100,22 @@ def summarise_DDEC_data(directory, bond_order_threshold=0.05):
             df_thres = df[df["bond_order"] > bond_order_threshold]
             # This is a failsafe because certain atoms just don't bond (e.g. He/Ar)
             if len(df_thres) == 0:
-                df_thres = df        
+                df_thres = df
                 bo_stats_df = get_stats(df_thres.bond_order.tolist(), "bond_order")
-                bo_stats_df = pd.DataFrame.from_dict(bo_stats_df, orient='index', columns=[str(entries)]).T
+                bo_stats_df = pd.DataFrame.from_dict(
+                    bo_stats_df, orient="index", columns=[str(entries)]
+                ).T
                 bo_stats_df["n_bonds"] = 0
             else:
                 bo_stats_df = get_stats(df_thres.bond_order.tolist(), "bond_order")
-                bo_stats_df = pd.DataFrame.from_dict(bo_stats_df, orient='index', columns=[str(entries)]).T
+                bo_stats_df = pd.DataFrame.from_dict(
+                    bo_stats_df, orient="index", columns=[str(entries)]
+                ).T
                 bo_stats_df["n_bonds"] = len(df_thres)
             bo_df.append(bo_stats_df)
             element_symbol = ca.bond_order_dict[entries]["element"].symbol
             element_list.append(element_symbol)
-            
+
         ddec_df = pd.concat(bo_df)
         ddec_df["filepath"] = directory
         ddec_df["element"] = element_list
@@ -94,45 +124,63 @@ def summarise_DDEC_data(directory, bond_order_threshold=0.05):
         try:
             ddec_df["cm5_charges"] = ca.cm5_charges
         except Exception as e:
-            print(f"{directory}: FAILED DUE TO EXCEPTION {e}") 
+            print(f"{directory}: FAILED DUE TO EXCEPTION {e}")
             ddec_df["cm5_charges"] = np.nan
         ddec_df["ddec_rcubed_moments"] = ca.ddec_rcubed_moments
         ddec_df["ddec_rfourth_moments"] = ca.ddec_rfourth_moments
         ddec_df["ddec_spin_moments"] = ca.ddec_spin_moments
         ddec_df["dipoles"] = ca.dipoles
-        ddec_df["charge_transfer"] = [ca.get_charge_transfer(i) for i in ca.bond_order_dict]
-        ddec_df["partial_charge"] = [ca.get_partial_charge(i) for i in ca.bond_order_dict]
+        ddec_df["charge_transfer"] = [
+            ca.get_charge_transfer(i) for i in ca.bond_order_dict
+        ]
+        ddec_df["partial_charge"] = [
+            ca.get_partial_charge(i) for i in ca.bond_order_dict
+        ]
 
     return ddec_df
 
-def get_solute_summary_DDEC_data(directory, bond_order_threshold=0.05, base_solute="Fe"):
-    df = summarise_DDEC_data(directory=directory, bond_order_threshold=bond_order_threshold)
-    df = df[df["element"]==base_solute]
+
+def get_solute_summary_DDEC_data(
+    directory, bond_order_threshold=0.05, base_solute="Fe"
+):
+    df = summarise_DDEC_data(
+        directory=directory, bond_order_threshold=bond_order_threshold
+    )
+    df = df[df["element"] == base_solute]
     return df
 
-class DatabaseGenerator():
-    
+
+class DatabaseGenerator:
+
     def __init__(self, parent_dir):
         self.parent_dir = parent_dir
-        
-    def build_database(self,
-                       target_directory = None,
-                       extract_directories = False,
-                       cleanup=False,
-                       keep_filenames_after_cleanup = [],
-                       keep_filename_patterns_after_cleanup = [],
-                       max_dir_count = None,
-                       df_filename = None):
-        
+
+    def build_database(
+        self,
+        target_directory=None,
+        extract_directories=False,
+        cleanup=False,
+        keep_filenames_after_cleanup=[],
+        keep_filename_patterns_after_cleanup=[],
+        max_dir_count=None,
+        df_filename=None,
+    ):
+
         start_time = time.time()
-        
+
         if target_directory:
-            dirs = find_chargemol_directories(parent_dir=target_directory, extract_tarballs=extract_directories)
+            dirs = find_chargemol_directories(
+                parent_dir=target_directory, extract_tarballs=extract_directories
+            )
         else:
-            dirs = find_chargemol_directories(parent_dir=self.parent_dir, extract_tarballs=extract_directories)
-        
-        print(f"The total number of vasp directories that we are building the database out of is {len(dirs)}")
-        
+            dirs = find_chargemol_directories(
+                parent_dir=self.parent_dir, extract_tarballs=extract_directories
+            )
+
+        print(
+            f"The total number of vasp directories that we are building the database out of is {len(dirs)}"
+        )
+
         if max_dir_count:
             pkl_filenames = []
             for i, chunks in enumerate(gen_tools.chunk_list(dirs, max_dir_count)):
@@ -144,9 +192,11 @@ def build_database(self,
                     db_filename = f"{i}.pkl"
                 pkl_filenames.append(os.path.join(self.parent_dir, db_filename))
                 df.to_pickle(os.path.join(self.parent_dir, db_filename))
-                step_taken_time = np.round(step_time - time.time(),3)
-                print(f"Step {i}: {step_taken_time} seconds taken for {len(chunks)} parse steps")
-                
+                step_taken_time = np.round(step_time - time.time(), 3)
+                print(
+                    f"Step {i}: {step_taken_time} seconds taken for {len(chunks)} parse steps"
+                )
+
             df = pd.concat([pd.read_pickle(partial_df) for partial_df in pkl_filenames])
 
         else:
@@ -157,34 +207,49 @@ def build_database(self,
             df.to_pickle(os.path.join(self.parent_dir, f"vasp_database.pkl"))
         end_time = time.time()
         elapsed_time = end_time - start_time
-        
+
         # not optional - keep the tarballs/zips..
         keep_filename_patterns_after_cleanup += ".tar.gz"
         keep_filename_patterns_after_cleanup += ".tar.bz2"
         keep_filename_patterns_after_cleanup += ".zip"
 
         if cleanup:
-            gen_tools.cleanup_dir(directory_path=dirs, keep=True, files=[], file_patterns=[])
-            parallelise(gen_tools.cleanup_dir, dirs, [True] * len(dirs), keep_filenames_after_cleanup*len(dirs), keep_filename_patterns_after_cleanup*len(dirs))
-        
+            gen_tools.cleanup_dir(
+                directory_path=dirs, keep=True, files=[], file_patterns=[]
+            )
+            parallelise(
+                gen_tools.cleanup_dir,
+                dirs,
+                [True] * len(dirs),
+                keep_filenames_after_cleanup * len(dirs),
+                keep_filename_patterns_after_cleanup * len(dirs),
+            )
+
         print("Elapsed time:", np.round(elapsed_time, 3), "seconds")
 
         return df
-    
-class ChargemolAnalysis():
-    def __init__(self, directory, extract_dir = False):
+
+
+class ChargemolAnalysis:
+    def __init__(self, directory, extract_dir=False):
         self.directory = directory
         self._struct = None
         self._bond_matrix = None
         if extract_dir:
             directory = find_chargemol_directories(directory)[0]
-        if check_valid_chargemol_output(os.path.join(directory, "VASP_DDEC_analysis.output")):
+        if check_valid_chargemol_output(
+            os.path.join(directory, "VASP_DDEC_analysis.output")
+        ):
             self.parse_DDEC6_analysis_output()
         else:
-            print("No valid output available! Try extracting any tarballs? Set extract_dir=True")
-        
+            print(
+                "No valid output available! Try extracting any tarballs? Set extract_dir=True"
+            )
+
     def parse_DDEC6_analysis_output(self):
-        struct, bond_matrix = parse_DDEC6_analysis_output(os.path.join(self.directory, "VASP_DDEC_analysis.output"))
+        struct, bond_matrix = parse_DDEC6_analysis_output(
+            os.path.join(self.directory, "VASP_DDEC_analysis.output")
+        )
         self.struct = struct
         self.bond_matrix = bond_matrix
         return struct, bond_matrix
@@ -207,53 +272,73 @@ def set_bond_matrix(self, bond_matrix):
 
     def plot_ANSBO_profile(self):
         plot_ANSBO_profile_and_structure(self.struct, self.bond_matrix)
-        
+
     def get_ANSBO_profile(self, axis=2, tolerance=0.1):
-        return get_ANSBO_all_cleavage_planes(self.struct, self.bond_matrix, axis=axis, tolerance=tolerance)
+        return get_ANSBO_all_cleavage_planes(
+            self.struct, self.bond_matrix, axis=axis, tolerance=tolerance
+        )
 
     def get_min_ANSBO(self, axis=2, tolerance=0.1):
-        return min(get_ANSBO_all_cleavage_planes(self.struct, self.bond_matrix, axis=axis, tolerance=tolerance))
-    
+        return min(
+            get_ANSBO_all_cleavage_planes(
+                self.struct, self.bond_matrix, axis=axis, tolerance=tolerance
+            )
+        )
+
     def analyse_ANSBO(self, axis=2, tolerance=0.1):
         return analyse_ANSBO(self.directory, axis=axis, tolerance=tolerance)
 
+
 def analyse_ANSBO(directory, axis=2, tolerance=0.1):
-    """
-    
-    """
-    struct, bond_matrix = parse_DDEC6_analysis_output(os.path.join(directory, "VASP_DDEC_analysis.output"))
-    atomic_layers = get_unique_values_in_nth_value(struct.cart_coords, axis, tolerance = tolerance)
+    """ """
+    struct, bond_matrix = parse_DDEC6_analysis_output(
+        os.path.join(directory, "VASP_DDEC_analysis.output")
+    )
+    atomic_layers = get_unique_values_in_nth_value(
+        struct.cart_coords, axis, tolerance=tolerance
+    )
     cp_list = compute_average_pairs(atomic_layers)
-    ANSBO_profile = get_ANSBO_all_cleavage_planes(struct, bond_matrix, axis=axis, tolerance=tolerance)
-    
-    results_dict = {"layer_boundaries": atomic_layers,
-                    "cleavage_coord": cp_list,
-                    "ANSBO_profile": ANSBO_profile}
+    ANSBO_profile = get_ANSBO_all_cleavage_planes(
+        struct, bond_matrix, axis=axis, tolerance=tolerance
+    )
+
+    results_dict = {
+        "layer_boundaries": atomic_layers,
+        "cleavage_coord": cp_list,
+        "ANSBO_profile": ANSBO_profile,
+    }
     return results_dict
-    
-def find_chargemol_directories(parent_dir,
-                            filenames=["DDEC6_even_tempered_atomic_spin_moments.xyz",
-                                     "DDEC6_even_tempered_net_atomic_charges.xyz",
-                                     "DDEC_atomic_Rfourth_moments.xyz",
-                                     "overlap_populations.xyz",
-                                     "DDEC6_even_tempered_bond_orders.xyz",
-                                     "DDEC_atomic_Rcubed_moments.xyz",
-                                     "DDEC_atomic_Rsquared_moments.xyz",
-                                     "POTCAR"],
-                          all_present=True,
-                          extract_tarballs=True,
-                          only_valid_output=True):
+
+
+def find_chargemol_directories(
+    parent_dir,
+    filenames=[
+        "DDEC6_even_tempered_atomic_spin_moments.xyz",
+        "DDEC6_even_tempered_net_atomic_charges.xyz",
+        "DDEC_atomic_Rfourth_moments.xyz",
+        "overlap_populations.xyz",
+        "DDEC6_even_tempered_bond_orders.xyz",
+        "DDEC_atomic_Rcubed_moments.xyz",
+        "DDEC_atomic_Rsquared_moments.xyz",
+        "POTCAR",
+    ],
+    all_present=True,
+    extract_tarballs=True,
+    only_valid_output=True,
+):
     if extract_tarballs:
-        gen_tools.find_and_extract_files_from_tarballs_parallel(parent_dir=parent_dir, 
-                                                                extension=".tar.gz",
-                                                                filenames=filenames,                                                            
-                                                                suffix=None,
-                                                                prefix=None)
-   
-    directories =  gen_tools.find_directories_with_files(parent_dir=parent_dir,
-                                          filenames=filenames,
-                                          all_present=all_present)
-    
+        gen_tools.find_and_extract_files_from_tarballs_parallel(
+            parent_dir=parent_dir,
+            extension=".tar.gz",
+            filenames=filenames,
+            suffix=None,
+            prefix=None,
+        )
+
+    directories = gen_tools.find_directories_with_files(
+        parent_dir=parent_dir, filenames=filenames, all_present=all_present
+    )
+
     if only_valid_output:
         converged_list = []
         non_converged_list = []
@@ -266,6 +351,7 @@ def find_chargemol_directories(parent_dir,
         directories = converged_list
     return directories
 
+
 def parse_DDEC6_analysis_output(filename):
     """
     Parses VASP_DDEC_analysis.output files and returns a Structure object and bond matrix.
@@ -316,46 +402,69 @@ def parse_DDEC6_analysis_output(filename):
     flist = open(filename).readlines()
 
     bohr_to_angstrom_conversion_factor = 0.529177
-    structure_lattice = gen_tools.parse_lines(flist, trigger_start="vectors", trigger_end="direct_coords")[0]
-    structure_lattice = np.array([list(map(float, line.split())) for line in structure_lattice])
+    structure_lattice = gen_tools.parse_lines(
+        flist, trigger_start="vectors", trigger_end="direct_coords"
+    )[0]
+    structure_lattice = np.array(
+        [list(map(float, line.split())) for line in structure_lattice]
+    )
     structure_lattice = structure_lattice * bohr_to_angstrom_conversion_factor
 
-    structure_frac_coords = gen_tools.parse_lines(flist, trigger_start="direct_coords", trigger_end="totnumA")[0]
-    structure_frac_coords = [np.array([float(coord) for coord in entry.split()]) for entry in structure_frac_coords]
+    structure_frac_coords = gen_tools.parse_lines(
+        flist, trigger_start="direct_coords", trigger_end="totnumA"
+    )[0]
+    structure_frac_coords = [
+        np.array([float(coord) for coord in entry.split()])
+        for entry in structure_frac_coords
+    ]
 
     # Convert atomic numbers to element symbols
-    structure_atomic_no = gen_tools.parse_lines(flist, trigger_start="(Missing core electrons will be inserted using stored core electron reference densities.)", trigger_end=" Finished the check for missing core electrons.")
-    structure_atomic_no = [Element.from_Z(int(atomic_number.split()[1])).symbol for atomic_number in structure_atomic_no[0]]
+    structure_atomic_no = gen_tools.parse_lines(
+        flist,
+        trigger_start="(Missing core electrons will be inserted using stored core electron reference densities.)",
+        trigger_end=" Finished the check for missing core electrons.",
+    )
+    structure_atomic_no = [
+        Element.from_Z(int(atomic_number.split()[1])).symbol
+        for atomic_number in structure_atomic_no[0]
+    ]
 
     structure = Structure(structure_lattice, structure_atomic_no, structure_frac_coords)
 
-    data_column_names = ['atom1',\
-                'atom2',\
-                'repeata',\
-                'repeatb',\
-                'repeatc',\
-                'min-na',\
-                'max-na',\
-                'min-nb',\
-                'max-nb',\
-                'min-nc',\
-                'max-nc',\
-                'contact-exchange',\
-                'avg-spin-pol-bonding-term',\
-                'overlap-population',\
-                'isoaepfcbo',\
-                'coord-term-tanh',\
-                'pairwise-term',\
-                'exp-term-comb-coord-pairwise',\
-                'bond-idx-before-self-exch',\
-                'final_bond_order']
-
-    bond_matrix = gen_tools.parse_lines(flist, trigger_start="The final bond pair matrix is", trigger_end="The legend for the bond pair matrix follows:")[0]
+    data_column_names = [
+        "atom1",
+        "atom2",
+        "repeata",
+        "repeatb",
+        "repeatc",
+        "min-na",
+        "max-na",
+        "min-nb",
+        "max-nb",
+        "min-nc",
+        "max-nc",
+        "contact-exchange",
+        "avg-spin-pol-bonding-term",
+        "overlap-population",
+        "isoaepfcbo",
+        "coord-term-tanh",
+        "pairwise-term",
+        "exp-term-comb-coord-pairwise",
+        "bond-idx-before-self-exch",
+        "final_bond_order",
+    ]
+
+    bond_matrix = gen_tools.parse_lines(
+        flist,
+        trigger_start="The final bond pair matrix is",
+        trigger_end="The legend for the bond pair matrix follows:",
+    )[0]
     bond_matrix = np.array([list(map(float, line.split())) for line in bond_matrix])
     bond_matrix = pd.DataFrame(bond_matrix, columns=data_column_names)
 
     return structure, bond_matrix
 
+
 def check_valid_chargemol_output(vasp_ddec_analysis_output_filepath):
     """
     Checks if a VASP DDEC analysis output file indicates successful completion of Chargemol.
@@ -384,18 +493,23 @@ def check_valid_chargemol_output(vasp_ddec_analysis_output_filepath):
           contains the necessary information.
 
     """
-    convergence = gen_tools.search_line_in_file(vasp_ddec_analysis_output_filepath, "Finished chargemol in")
+    convergence = gen_tools.search_line_in_file(
+        vasp_ddec_analysis_output_filepath, "Finished chargemol in"
+    )
 
     return convergence
 
-def plot_structure_projection(structure,
-                              projection_axis = [1, 2], 
-                              bond_matrix = None,
-                              atom_size=250,
-                              figsize=(8, 6),
-                              cell_border_colour = "r",
-                              atom_colour_dict = {},
-                              fontsize=16):
+
+def plot_structure_projection(
+    structure,
+    projection_axis=[1, 2],
+    bond_matrix=None,
+    atom_size=250,
+    figsize=(8, 6),
+    cell_border_colour="r",
+    atom_colour_dict={},
+    fontsize=16,
+):
     """
     Plots the projection of a pymatgen structure on a 2D plane based on the specified projection axis.
 
@@ -414,52 +528,73 @@ def plot_structure_projection(structure,
     # plt.figure(figsize=figsize)
     for site in structure:
         species = site.species_string
-        color = atom_colour_dict.get(species, 'b')  # Default to blue if species not in atom_colour_dict
-        plt.scatter(site.coords[projection_axis[0]], site.coords[projection_axis[1]], color=color, s=atom_size, edgecolors='black')
+        color = atom_colour_dict.get(
+            species, "b"
+        )  # Default to blue if species not in atom_colour_dict
+        plt.scatter(
+            site.coords[projection_axis[0]],
+            site.coords[projection_axis[1]],
+            color=color,
+            s=atom_size,
+            edgecolors="black",
+        )
 
     # Set plot title and labels
-    plt.title('Projection of the Cell', fontsize=16)
-    plt.xlabel(f'Axis {projection_axis[0]} Coordinate', fontsize=12)
-    plt.ylabel(f'Axis {projection_axis[1]} Coordinate', fontsize=12)
+    plt.title("Projection of the Cell", fontsize=16)
+    plt.xlabel(f"Axis {projection_axis[0]} Coordinate", fontsize=12)
+    plt.ylabel(f"Axis {projection_axis[1]} Coordinate", fontsize=12)
 
     # Set plot limits based on the atomic coordinates
     x_min, x_max = min(x_coords), max(x_coords)
     y_min, y_max = min(y_coords), max(y_coords)
     plt.xlim(x_min - 1, x_max + 1)
     plt.ylim(y_min - 1, y_max + 1)
-    
+
     if bond_matrix is not None:
-        relevant_plot_bonds = bond_matrix[(bond_matrix['repeata'] == 0) & (bond_matrix['repeatb'] == 0)]
-        for idx, bonds in relevant_plot_bonds.iterrows():        
-            atom1 = int(bonds["atom1"])-1
-            atom2 = int(bonds["atom2"])-1
-            bondstrength = np.round(bonds["final_bond_order"],2)
+        relevant_plot_bonds = bond_matrix[
+            (bond_matrix["repeata"] == 0) & (bond_matrix["repeatb"] == 0)
+        ]
+        for idx, bonds in relevant_plot_bonds.iterrows():
+            atom1 = int(bonds["atom1"]) - 1
+            atom2 = int(bonds["atom2"]) - 1
+            bondstrength = np.round(bonds["final_bond_order"], 2)
             if bondstrength < 0.28:
-                c = 'r'
+                c = "r"
             else:
-                c = 'k'
+                c = "k"
             c = "k"
-            plt.plot([structure[atom1].coords[projection_axis[0]],structure[atom2].coords[projection_axis[0]]],
-                    [structure[atom1].coords[projection_axis[1]],structure[atom2].coords[projection_axis[1]]],
-                    '-',
-                    color=c,
-                    linewidth=bondstrength/0.56*5)
-        
+            plt.plot(
+                [
+                    structure[atom1].coords[projection_axis[0]],
+                    structure[atom2].coords[projection_axis[0]],
+                ],
+                [
+                    structure[atom1].coords[projection_axis[1]],
+                    structure[atom2].coords[projection_axis[1]],
+                ],
+                "-",
+                color=c,
+                linewidth=bondstrength / 0.56 * 5,
+            )
+
     # Draw the cell with a black border based on the projection_axis
     lattice_vectors = structure.lattice.matrix[projection_axis]
 
     # Draw the cell with a border based on the projection_axis
-    rect = plt.Rectangle((0,0),
-                         structure.lattice.abc[projection_axis[0]],
-                         structure.lattice.abc[projection_axis[1]],
-                         edgecolor=cell_border_colour,
-                         linewidth=3,
-                         fill=False,
-                         linestyle = '--')
+    rect = plt.Rectangle(
+        (0, 0),
+        structure.lattice.abc[projection_axis[0]],
+        structure.lattice.abc[projection_axis[1]],
+        edgecolor=cell_border_colour,
+        linewidth=3,
+        fill=False,
+        linestyle="--",
+    )
     plt.gca().add_patch(rect)
-    plt.gca().set_aspect('equal')
+    plt.gca().set_aspect("equal")
     plt.grid()
-    
+
+
 def get_unique_values_in_nth_value(arr_list, n, tolerance):
     unique_values = []
     for sublist in arr_list:
@@ -473,6 +608,7 @@ def get_unique_values_in_nth_value(arr_list, n, tolerance):
             unique_values.append(value)
     return np.sort(unique_values)
 
+
 def compute_average_pairs(lst):
     averages = []
     for i in range(len(lst) - 1):
@@ -480,11 +616,18 @@ def compute_average_pairs(lst):
         averages.append(average)
     return averages
 
-def get_ANSBO(structure, bond_matrix, cleavage_plane, axis = 2):
-    bond_matrix['atom1pos'] = [structure[int(x)-1].coords[axis] for x in bond_matrix['atom1'].values]
-    bond_matrix['atom2pos'] = [structure[int(x)-1].coords[axis] for x in bond_matrix['atom2'].values]
-    clp_df = bond_matrix[(bond_matrix[['atom1pos','atom2pos']].max(axis=1) > cleavage_plane)
-                         & (bond_matrix[['atom1pos','atom2pos']].min(axis=1) < cleavage_plane) ]
+
+def get_ANSBO(structure, bond_matrix, cleavage_plane, axis=2):
+    bond_matrix["atom1pos"] = [
+        structure[int(x) - 1].coords[axis] for x in bond_matrix["atom1"].values
+    ]
+    bond_matrix["atom2pos"] = [
+        structure[int(x) - 1].coords[axis] for x in bond_matrix["atom2"].values
+    ]
+    clp_df = bond_matrix[
+        (bond_matrix[["atom1pos", "atom2pos"]].max(axis=1) > cleavage_plane)
+        & (bond_matrix[["atom1pos", "atom2pos"]].min(axis=1) < cleavage_plane)
+    ]
     if axis == 0:
         repeat1 = "repeatb"
         repeat2 = "repeatc"
@@ -494,22 +637,30 @@ def get_ANSBO(structure, bond_matrix, cleavage_plane, axis = 2):
     elif axis == 2:
         repeat1 = "repeata"
         repeat2 = "repeatb"
-        
+
     clp_df = clp_df.copy()[(clp_df[repeat1] == 0) | (clp_df[repeat2] == 0)]
     # We only want to calculate for atoms that exist in cell. This is important for bond order/area normalisation
     clp_df_countonce = clp_df.copy()[(clp_df[repeat1] == 0) & (clp_df[repeat2] == 0)]
     clp_df_counthalf = clp_df.copy()[(clp_df[repeat1] != 0) | (clp_df[repeat2] != 0)]
     # Basic summed bond order over CP
-    final_bond_order = clp_df_countonce.final_bond_order.sum() + 0.5*clp_df_counthalf.final_bond_order.sum()
+    final_bond_order = (
+        clp_df_countonce.final_bond_order.sum()
+        + 0.5 * clp_df_counthalf.final_bond_order.sum()
+    )
     # N largest
-    #final_bond_order = clp_df.nlargest(15, ['final_bond_order'])["final_bond_order"].sum()
+    # final_bond_order = clp_df.nlargest(15, ['final_bond_order'])["final_bond_order"].sum()
     # IMPORTANT: This assumes that the cross sectional area can be calculated this way
-    a_fbo = final_bond_order/(float(structure.lattice.volume)/float(structure.lattice.abc[axis]))
-    #print("area of this is %s" % (float(structure.lattice.volume)/float(structure.lattice.c)))
+    a_fbo = final_bond_order / (
+        float(structure.lattice.volume) / float(structure.lattice.abc[axis])
+    )
+    # print("area of this is %s" % (float(structure.lattice.volume)/float(structure.lattice.c)))
     return a_fbo
 
-def get_ANSBO_all_cleavage_planes(structure, bond_matrix, axis = 2, tolerance = 0.1):
-    atomic_layers = get_unique_values_in_nth_value(structure.cart_coords, axis, tolerance = tolerance)
+
+def get_ANSBO_all_cleavage_planes(structure, bond_matrix, axis=2, tolerance=0.1):
+    atomic_layers = get_unique_values_in_nth_value(
+        structure.cart_coords, axis, tolerance=tolerance
+    )
     cp_list = compute_average_pairs(atomic_layers)
 
     ANSBO_profile = []
@@ -517,45 +668,47 @@ def get_ANSBO_all_cleavage_planes(structure, bond_matrix, axis = 2, tolerance =
         ANSBO_profile.append(get_ANSBO(structure, bond_matrix, cp))
     return cp_list, ANSBO_profile
 
-def plot_ANSBO_profile(structure,
-                       bond_matrix,
-                       projection_axis = [1, 2]):
-    ANSBO_values = get_ANSBO_all_cleavage_planes(structure, bond_matrix, projection_axis[-1])
-    atomic_layer_coords = get_unique_values_in_nth_value(structure.cart_coords, projection_axis[-1], tolerance= 0.1)
+
+def plot_ANSBO_profile(structure, bond_matrix, projection_axis=[1, 2]):
+    ANSBO_values = get_ANSBO_all_cleavage_planes(
+        structure, bond_matrix, projection_axis[-1]
+    )
+    atomic_layer_coords = get_unique_values_in_nth_value(
+        structure.cart_coords, projection_axis[-1], tolerance=0.1
+    )
 
     if len(atomic_layer_coords) != len(ANSBO_values) + 1:
         print("Error: Lengths of the lists are not compatible.")
         return
-    
+
     # plt.figure(figsize=(3,10))
-    
+
     # Create lists for the x and y coordinates of the lines
     x_lines = []
     y_lines = []
-    
+
     # Iterate over the elements of ANSBO_profile
     for i, value in enumerate(ANSBO_values):
         # Append x-coordinates for the horizontal lines
         x_lines.extend([value, value])
         # Append y-coordinates for the horizontal lines
-        y_lines.extend([atomic_layer_coords[i], atomic_layer_coords[i+1]])
+        y_lines.extend([atomic_layer_coords[i], atomic_layer_coords[i + 1]])
         # Append x-coordinates for the vertical lines
         x_lines.append(value)
         # Append y-coordinates for the vertical lines
-        y_lines.append(atomic_layer_coords[i+1])
-        
+        y_lines.append(atomic_layer_coords[i + 1])
+
     # Plotting the lines
     plt.plot(x_lines, y_lines)
     plt.grid()
     # Labeling the axes
-    plt.xlabel('ANSBO Profile')
-    plt.ylabel('Coordinates (Angstrom)')
-    
-def plot_ANSBO_profile_and_structure(structure,
-                                     bond_matrix,
-                                     write=False,
-                                     filename="ANSBO.jpg",
-                                     fontsize=16):
+    plt.xlabel("ANSBO Profile")
+    plt.ylabel("Coordinates (Angstrom)")
+
+
+def plot_ANSBO_profile_and_structure(
+    structure, bond_matrix, write=False, filename="ANSBO.jpg", fontsize=16
+):
     """
     Plot the structure bond projection and the ANSBO profile side by side.
 
@@ -570,20 +723,29 @@ def plot_ANSBO_profile_and_structure(structure,
     """
 
     # Create a new figure with two subplots side by side
-    fig, axs = plt.subplots(1, 2, figsize=(10, 20), gridspec_kw={'width_ratios': [2, 1]})
-    
+    fig, axs = plt.subplots(
+        1, 2, figsize=(10, 20), gridspec_kw={"width_ratios": [2, 1]}
+    )
+
     # Activate the first subplot and call plot_structure_projection
     plt.sca(axs[0])
-    plot_structure_projection(structure, bond_matrix=bond_matrix, figsize=(8, 6), atom_colour_dict={"Fe": "b", "Ac": "r"})
-    plt.grid(True, which='major', linestyle='-')
-    plt.grid(True, which='minor', linestyle='--')
+    plot_structure_projection(
+        structure,
+        bond_matrix=bond_matrix,
+        figsize=(8, 6),
+        atom_colour_dict={"Fe": "b", "Ac": "r"},
+    )
+    plt.grid(True, which="major", linestyle="-")
+    plt.grid(True, which="minor", linestyle="--")
     axs[0].xaxis.set_minor_locator(ticker.MultipleLocator(1))
     axs[0].yaxis.set_minor_locator(ticker.MultipleLocator(1))
     # Activate the second subplot and call plot_ANSBO_profile
     plt.sca(axs[1])
-    plot_ANSBO_profile(structure, bond_matrix)  # Assuming you have defined the plot_ANSBO_profile function
-    plt.grid(True, which='major', linestyle='-')
-    plt.grid(True, which='minor', linestyle='--')
+    plot_ANSBO_profile(
+        structure, bond_matrix
+    )  # Assuming you have defined the plot_ANSBO_profile function
+    plt.grid(True, which="major", linestyle="-")
+    plt.grid(True, which="minor", linestyle="--")
     axs[1].xaxis.set_minor_locator(ticker.MultipleLocator(1))
     axs[1].yaxis.set_minor_locator(ticker.MultipleLocator(1))
     # Set the same y-axis limits for both subplots
@@ -593,8 +755,8 @@ def plot_ANSBO_profile_and_structure(structure,
     plt.subplots_adjust(wspace=0.01)  # Set the desired spacing between the subplots
 
     # Set titles for the subplots
-    axs[0].set_title('Structure Bond Projection', fontsize=fontsize)
-    axs[1].set_title('ANSBO Profile', fontsize=fontsize)
+    axs[0].set_title("Structure Bond Projection", fontsize=fontsize)
+    axs[1].set_title("ANSBO Profile", fontsize=fontsize)
 
     # Optionally, save the plot to a file
     if write:
@@ -602,7 +764,10 @@ def plot_ANSBO_profile_and_structure(structure,
 
     # Display the plot
     plt.show()
-    
+
+
 def plot_ANSBO_profile_and_structure_from_dir(directory, extract_from_tarball=True):
-    structure, bond_matrix = parse_DDEC6_analysis_output(os.path.join(directory, "VASP_DDEC_analysis.output"))
-    plot_ANSBO_profile_and_structure(structure, bond_matrix)
\ No newline at end of file
+    structure, bond_matrix = parse_DDEC6_analysis_output(
+        os.path.join(directory, "VASP_DDEC_analysis.output")
+    )
+    plot_ANSBO_profile_and_structure(structure, bond_matrix)
diff --git a/utils/custom_custodian_handlers.py b/utils/custom_custodian_handlers.py
index 8f15da6..60e8f9f 100644
--- a/utils/custom_custodian_handlers.py
+++ b/utils/custom_custodian_handlers.py
@@ -30,9 +30,7 @@
 from custodian.utils import backup
 from custodian.vasp.interpreter import VaspModder
 
-__author__ = (
-    "Han Lin Mai"
-)
+__author__ = "Han Lin Mai"
 __version__ = "0.1"
 __maintainer__ = "Han Mai"
 __email__ = "h.mai@mpie.de"
@@ -51,6 +49,7 @@
     "std_err.txt",
 }
 
+
 class Han_CustomVaspErrorHandler(ErrorHandler):
     """Check if a run is converged."""
 
@@ -89,11 +88,20 @@ def correct(self):
             # expensive algorithms.
             if len(actions) == 0:
                 if algo == "veryfast":
-                    actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "Fast"}}})
+                    actions.append(
+                        {"dict": "INCAR", "action": {"_set": {"ALGO": "Fast"}}}
+                    )
                 elif algo == "fast":
-                    actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "Normal"}}})
-                elif algo == "normal" and (v.incar.get("ISMEAR", -1) >= 0 or not 50 <= v.incar.get("IALGO", 38) <= 59):
-                    actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "All"}}})
+                    actions.append(
+                        {"dict": "INCAR", "action": {"_set": {"ALGO": "Normal"}}}
+                    )
+                elif algo == "normal" and (
+                    v.incar.get("ISMEAR", -1) >= 0
+                    or not 50 <= v.incar.get("IALGO", 38) <= 59
+                ):
+                    actions.append(
+                        {"dict": "INCAR", "action": {"_set": {"ALGO": "All"}}}
+                    )
                 else:
                     # Try mixing as last resort
                     new_settings = {
@@ -105,8 +113,12 @@ def correct(self):
                         "BMIX_MAG": 0.001,
                     }
 
-                    if not all(v.incar.get(k, "") == val for k, val in new_settings.items()):
-                        actions.append({"dict": "INCAR", "action": {"_set": new_settings}})
+                    if not all(
+                        v.incar.get(k, "") == val for k, val in new_settings.items()
+                    ):
+                        actions.append(
+                            {"dict": "INCAR", "action": {"_set": new_settings}}
+                        )
 
         elif not v.converged_ionic:
             # Just continue optimizing and let other handlers fix ionic
@@ -123,4 +135,4 @@ def correct(self):
             return {"errors": ["Unconverged"], "actions": actions}
 
         # Unfixable error. Just return None for actions.
-        return {"errors": ["Unconverged"], "actions": None}
\ No newline at end of file
+        return {"errors": ["Unconverged"], "actions": None}
diff --git a/utils/functions.py b/utils/functions.py
index 522e7fa..6a6777a 100644
--- a/utils/functions.py
+++ b/utils/functions.py
@@ -6,15 +6,19 @@
 from pymatgen.io.vasp.inputs import Potcar, Incar, Kpoints
 
 from utils.jobfile import jobfile
+
 potcar_library_path = "/root/POTCAR_Library/GGA"
-#potcar_library_path = "/u/hmai/pyiron-resources-cmmc/vasp/potentials/potpaw_PBE"
+# potcar_library_path = "/u/hmai/pyiron-resources-cmmc/vasp/potentials/potpaw_PBE"
+
+sites_to_study = {
+    "S11-RA110-S3-32": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
+    "S3-RA110-S1-11": [20, 22, 24, 26, 28, 30, 32, 34, 36],
+    "S3-RA110-S1-12": [12, 14, 16, 18, 20, 22, 24],
+    "S5-RA001-S210": [24, 27, 29, 31, 33, 35, 37],
+    "S5-RA001-S310": [23, 27, 33, 37, 40],
+    "S9-RA110-S2-21": list(range(23, 37)),
+}
 
-sites_to_study = {"S11-RA110-S3-32": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
-                  "S3-RA110-S1-11": [20, 22, 24, 26, 28, 30, 32, 34, 36],
-                  "S3-RA110-S1-12": [12, 14, 16, 18, 20, 22, 24],
-                  "S5-RA001-S210": [24, 27, 29, 31, 33, 35, 37],
-                  "S5-RA001-S310": [23, 27, 33, 37, 40],
-                  "S9-RA110-S2-21": list(range(23, 37))}
 
 def structures_from_vasp_folder(folder_path):
     # Initialize an empty dictionary to store the structures
@@ -23,24 +27,27 @@ def structures_from_vasp_folder(folder_path):
     # Loop through all files in the folder
     for filename in os.listdir(folder_path):
         # Check if the file is a .vasp file
-        if filename.endswith('.vasp'):
+        if filename.endswith(".vasp"):
             # Read in the Structure from the VASP file using pymatgen's Structure class
             structure = Structure.from_file(os.path.join(folder_path, filename))
             # Strip the .vasp extension from the filename and use it as the dictionary key
             key = os.path.splitext(filename)[0]
             # Assign the Structure object to the dictionary with the key
             structures_dict[key] = structure
-    
+
     # Return the dictionary containing the structures
     return structures_dict
 
-def createFolder(directory, delete_folder='no'):
-    import os; import shutil
+
+def createFolder(directory, delete_folder="no"):
+    import os
+    import shutil
+
     if not os.path.exists(directory):
         os.makedirs(directory)
     else:
-        if delete_folder == 'no':
-            #print('no replacement/deletion created due to folder existing')
+        if delete_folder == "no":
+            # print('no replacement/deletion created due to folder existing')
             x = 1
         else:
             print("removing directory...")
@@ -51,22 +58,30 @@ def createFolder(directory, delete_folder='no'):
             else:
                 print("given path is a special file - manually remove")
 
+
 def get_immediate_subdirectories(a_dir):
-    return [f.path for f in os.scandir(a_dir) if f.is_dir() and os.path.basename(f) != ".ipynb_checkpoints"]
-
-def generateINCAR(structure,
-                  path = os.path.join(os.getcwd(), "INCAR"),
-                  ISIF = 2,
-                  ISPIN = 1,
-                  ENCUT = 350,
-                  EDIFF = 1E-4,
-                  EDIFFG = -0.02,
-                  NCORE = 4,
-                  KPAR = 1,
-                  SYSTEM = "filltext",
-                  functional = 'PBE',
-                  reverse_magmom = False,
-                  base_element = "Fe"):
+    return [
+        f.path
+        for f in os.scandir(a_dir)
+        if f.is_dir() and os.path.basename(f) != ".ipynb_checkpoints"
+    ]
+
+
+def generateINCAR(
+    structure,
+    path=os.path.join(os.getcwd(), "INCAR"),
+    ISIF=2,
+    ISPIN=1,
+    ENCUT=350,
+    EDIFF=1e-4,
+    EDIFFG=-0.02,
+    NCORE=4,
+    KPAR=1,
+    SYSTEM="filltext",
+    functional="PBE",
+    reverse_magmom=False,
+    base_element="Fe",
+):
 
     INCAR_file = Incar()
     INCAR_file = INCAR_file.from_file(path)
@@ -78,157 +93,170 @@ def generateINCAR(structure,
     INCAR_file["NCORE"] = NCORE
     INCAR_file["KPAR"] = KPAR
 
-    dictionary_of_functionals = {"PW91" : '91',
-                                "PBE" : 'PE',
-                                "AM05" : 'AM',
-                                "PBEsol": 'PS',
-                                "Hendin-Lundquist" : "HL",
-                                "Ceperley-Alder" : "CA",
-                                "Perdew-Zunger" : "PZ",
-                                "Wigner" : 'WI',
-                                "Revised-PBE-Pade" : "RP",
-                                "revPBE" : "RE",
-                                "Vosko-Wilk-Nusair" : "VW",
-                                "B3LYP-LDA-VWN3" : "B3",
-                                "B3LYP-LDA-BWN5" : "B5",
-                                "BEEF" : "BF",
-                                "no-xc" : "CO"}
+    dictionary_of_functionals = {
+        "PW91": "91",
+        "PBE": "PE",
+        "AM05": "AM",
+        "PBEsol": "PS",
+        "Hendin-Lundquist": "HL",
+        "Ceperley-Alder": "CA",
+        "Perdew-Zunger": "PZ",
+        "Wigner": "WI",
+        "Revised-PBE-Pade": "RP",
+        "revPBE": "RE",
+        "Vosko-Wilk-Nusair": "VW",
+        "B3LYP-LDA-VWN3": "B3",
+        "B3LYP-LDA-BWN5": "B5",
+        "BEEF": "BF",
+        "no-xc": "CO",
+    }
 
     # These magmoms are from projects past and present... Feel free to alter them
     # Ni-H from Ni-GB manuscript
     #     dictionary_of_magmom = {"Ni" : 2.0,
     #                             "H"  : 0.0}
     # rest from Fe-bulk manuscript
-    dictionary_of_magmom = {'Ac': -0.196,
-                         'Ag': 0.114,
-                         'Al': -0.17,
-                         'Ar': 0.354,
-                         'As': -0.136,
-                         'At': -0.084,
-                         'Au': 0.308,
-                         'Ba': -0.25,
-                         'Bi': -0.302,
-                         'Br': 0.158,
-                         'Ca': -0.494,
-                         'Cd': -0.158,
-                         'Ce': -0.928,
-                         'Cl': 0.286,
-                         'Co': 3.37,
-                         'Cr': -3.71,
-                         'Cs': 0.06,
-                         'Cu': 0.238,
-                         'Dy': 9.11,
-                         'Er': 5.048,
-                         'Eu': -13.498,
-                         'Fe': 3.0,
-                         'Fr': -0.046,
-                         'Ga': -0.4,
-                         'Gd': -14.248,
-                         'Ge': -0.258,
-                         'Hf': -1.17,
-                         'Hg': -0.1,
-                         'Ho': 6.942,
-                         'I': -0.024,
-                         'In': -0.51,
-                         'Ir': 0.756,
-                         'K': 0.152,
-                         'Kr': 0.384,
-                         'La': -0.416,
-                         'Lu': -0.544,
-                         'Mg': -0.128,
-                         'Mn': -4.128,
-                         'Mo': -1.662,
-                         'Na': -0.09,
-                         'Nb': -1.518,
-                         'Nd': -6.142,
-                         'Ne': 0.02,
-                         #'Ne': -3.0,
-                         'Ni': 1.774,
-                         'Os': -0.224,
-                         'P': -0.112,
-                         'Pa': -1.184,
-                         'Pb': -0.41,
-                         'Pd': 0.73,
-                         'Pm': -8.76,
-                         'Po': -0.188,
-                         'Pr': -3.256,
-                         'Pt': 0.74,
-                         'Ra': -0.096,
-                         'Rb': 0.11,
-                         'Re': -1.27,
-                         'Rh': 1.194,
-                         'Rn': 0.032,
-                         'Ru': 0.454,
-                         'S': 0.082,
-                         'Sb': -0.186,
-                         'Sc': -1.12,
-                         'Se': -0.008,
-                         'Si': -0.194,
-                         'Sm': -10.964,
-                         'Sn': -0.426,
-                         'Sr': -0.128,
-                         'Ta': -1.588,
-                         'Tb': -12.568,
-                         'Tc': -1.208,
-                         'Te': -0.13,
-                         'Th': -0.508,
-                         'Ti': -1.93,
-                         'Tl': -0.45,
-                         'Tm': 2.776,
-                         'U': -2.76,
-                         'V': -2.86,
-                         'W': -1.606,
-                         'Xe': 0.288,
-                         'Y': -0.668,
-                         'Yb': 0.414,
-                         'Zn': -0.196,
-                         'Zr': -0.888,
-                         'H' : -0.018,
-                         'He': -0.010,
-                         'Li': -0.168,
-                         'Be': -0.302,
-                         'B' : -0.314,
-                         'C' : -0.204,
-                         'N' : 0.094,
-                         'O' : 0.454,
-                         'F' : 0.348}
+    dictionary_of_magmom = {
+        "Ac": -0.196,
+        "Ag": 0.114,
+        "Al": -0.17,
+        "Ar": 0.354,
+        "As": -0.136,
+        "At": -0.084,
+        "Au": 0.308,
+        "Ba": -0.25,
+        "Bi": -0.302,
+        "Br": 0.158,
+        "Ca": -0.494,
+        "Cd": -0.158,
+        "Ce": -0.928,
+        "Cl": 0.286,
+        "Co": 3.37,
+        "Cr": -3.71,
+        "Cs": 0.06,
+        "Cu": 0.238,
+        "Dy": 9.11,
+        "Er": 5.048,
+        "Eu": -13.498,
+        "Fe": 3.0,
+        "Fr": -0.046,
+        "Ga": -0.4,
+        "Gd": -14.248,
+        "Ge": -0.258,
+        "Hf": -1.17,
+        "Hg": -0.1,
+        "Ho": 6.942,
+        "I": -0.024,
+        "In": -0.51,
+        "Ir": 0.756,
+        "K": 0.152,
+        "Kr": 0.384,
+        "La": -0.416,
+        "Lu": -0.544,
+        "Mg": -0.128,
+        "Mn": -4.128,
+        "Mo": -1.662,
+        "Na": -0.09,
+        "Nb": -1.518,
+        "Nd": -6.142,
+        "Ne": 0.02,
+        #'Ne': -3.0,
+        "Ni": 1.774,
+        "Os": -0.224,
+        "P": -0.112,
+        "Pa": -1.184,
+        "Pb": -0.41,
+        "Pd": 0.73,
+        "Pm": -8.76,
+        "Po": -0.188,
+        "Pr": -3.256,
+        "Pt": 0.74,
+        "Ra": -0.096,
+        "Rb": 0.11,
+        "Re": -1.27,
+        "Rh": 1.194,
+        "Rn": 0.032,
+        "Ru": 0.454,
+        "S": 0.082,
+        "Sb": -0.186,
+        "Sc": -1.12,
+        "Se": -0.008,
+        "Si": -0.194,
+        "Sm": -10.964,
+        "Sn": -0.426,
+        "Sr": -0.128,
+        "Ta": -1.588,
+        "Tb": -12.568,
+        "Tc": -1.208,
+        "Te": -0.13,
+        "Th": -0.508,
+        "Ti": -1.93,
+        "Tl": -0.45,
+        "Tm": 2.776,
+        "U": -2.76,
+        "V": -2.86,
+        "W": -1.606,
+        "Xe": 0.288,
+        "Y": -0.668,
+        "Yb": 0.414,
+        "Zn": -0.196,
+        "Zr": -0.888,
+        "H": -0.018,
+        "He": -0.010,
+        "Li": -0.168,
+        "Be": -0.302,
+        "B": -0.314,
+        "C": -0.204,
+        "N": 0.094,
+        "O": 0.454,
+        "F": 0.348,
+    }
     ele_list, ele_count = stackElementString(structure)
 
     # This is a funny quirk involving 4d metals - we have to adjust the LMAXMIX flag for faster convergence
     if [i for i in ["Mo", "Nb"] if i in ele_list]:
-        #print("Mo/Nb present, LMAXMIX = 4 adjustment")
+        # print("Mo/Nb present, LMAXMIX = 4 adjustment")
         INCAR_file["LMAXMIX"] = 4
     elif "W" in ele_list:
-        #print("W present, LMAXMIX = 6 adjustment")
+        # print("W present, LMAXMIX = 6 adjustment")
         INCAR_file["LMAXMIX"] = 6
     else:
-        INCAR_file.pop('LMAXMIX',None)
+        INCAR_file.pop("LMAXMIX", None)
 
     if ISPIN != 2:
-        INCAR_file.pop("MAGMOM",None)
-        INCAR_file.pop('BMIX_MAG',None)
-        INCAR_file.pop('AMIX_MAG',None)
+        INCAR_file.pop("MAGMOM", None)
+        INCAR_file.pop("BMIX_MAG", None)
+        INCAR_file.pop("AMIX_MAG", None)
     else:
-        incar_magmom_str = ''
+        incar_magmom_str = ""
         for idx, element in enumerate(ele_list):
             if reverse_magmom:
                 if element == base_element:
-                    incar_magmom_str += "%s*%s " % (ele_count[idx], dictionary_of_magmom[ele_list[idx]])
+                    incar_magmom_str += "%s*%s " % (
+                        ele_count[idx],
+                        dictionary_of_magmom[ele_list[idx]],
+                    )
                 else:
-                    incar_magmom_str += "%s*%s " % (ele_count[idx], -dictionary_of_magmom[ele_list[idx]])
+                    incar_magmom_str += "%s*%s " % (
+                        ele_count[idx],
+                        -dictionary_of_magmom[ele_list[idx]],
+                    )
             else:
-                incar_magmom_str += "%s*%s " % (ele_count[idx], dictionary_of_magmom[ele_list[idx]])
+                incar_magmom_str += "%s*%s " % (
+                    ele_count[idx],
+                    dictionary_of_magmom[ele_list[idx]],
+                )
         INCAR_file["MAGMOM"] = incar_magmom_str
 
-
     if functional == "LDA":
-        INCAR_file.pop('GGA', None)
+        INCAR_file.pop("GGA", None)
     else:
         INCAR_file["GGA"] = dictionary_of_functionals[functional]
         # print('functional key is %s' % dictionary_of_functionals[functional])
 
     return INCAR_file
 
+
 def stackElementString(structure):
     site_element_list = [site.species_string for site in structure]
     past_element = site_element_list[0]
@@ -246,52 +274,54 @@ def stackElementString(structure):
     element_count.append(count)
     return element_list, element_count
 
-def createPOTCAR(structure, path = os.getcwd()):
+
+def createPOTCAR(structure, path=os.getcwd()):
 
     element_list = stackElementString(structure)[0]
     potcar_paths = []
 
     for element in element_list:
         if element == "Nb":
-            element = "Nb_sv" # Use 13 electron
-            element = "Nb_pv" # Use 11 electron
+            element = "Nb_sv"  # Use 13 electron
+            element = "Nb_pv"  # Use 11 electron
         elif element == "K":
-            element = "K_sv" # 9 electron
-            element = "K_pv" # 7 electron
+            element = "K_sv"  # 9 electron
+            element = "K_pv"  # 7 electron
         elif element == "Ca":
-            element = "Ca_sv" # 9 electron
-            element = "Ca_pv" # 7 electron
+            element = "Ca_sv"  # 9 electron
+            element = "Ca_pv"  # 7 electron
         elif element == "Rb":
-            element = "Rb_sv" # 9 electron
-            element = "Rb_pv" # 7 electron
+            element = "Rb_sv"  # 9 electron
+            element = "Rb_pv"  # 7 electron
         elif element == "Sr":
-            element = "Sr_sv" # 9 electron
+            element = "Sr_sv"  # 9 electron
         elif element == "Cs":
-            element = "Cs_sv" # 9 electron
+            element = "Cs_sv"  # 9 electron
         elif element == "Ba":
-            element = "Ba_sv" # 10 electron
+            element = "Ba_sv"  # 10 electron
         elif element == "Fr":
-            element = "Fr_sv" # 9 electron
+            element = "Fr_sv"  # 9 electron
         elif element == "Ra":
-            element = "Ra_sv" # 9 electron
+            element = "Ra_sv"  # 9 electron
         elif element == "Y":
-            element = "Y_sv" # 9 electron
+            element = "Y_sv"  # 9 electron
         elif element == "Zr":
-            element = "Zr_sv" # 10 electron
+            element = "Zr_sv"  # 10 electron
         elif element == "Fr":
-            element = "Fr_sv" # 9 electron
+            element = "Fr_sv"  # 9 electron
         elif element == "Ra":
-            element = "Ra_sv" # 9 electron
+            element = "Ra_sv"  # 9 electron
         elif element == "Y":
-            element = "Y_sv" # 9 electron
+            element = "Y_sv"  # 9 electron
 
         potcar_paths.append(os.path.join(potcar_library_path, element, "POTCAR"))
 
-    with open(os.path.join(path, "POTCAR"),'wb') as wfd:
+    with open(os.path.join(path, "POTCAR"), "wb") as wfd:
         for f in potcar_paths:
-            with open(f,'rb') as fd:
+            with open(f, "rb") as fd:
                 shutil.copyfileobj(fd, wfd)
 
+
 class KPOINTS:
     """
     Class for KPOINTS object for passing into createJobFolder
@@ -303,13 +333,12 @@ class KPOINTS:
     shift: optional shift of mesh, input as list e.g. [0, 0, 0]
 
     """
+
     def __init__(self, subdivs, shift):
         self.subdivs = subdivs
         self.shift = shift
 
-    def to_file(self,\
-                case_name = 'KPOINTS',\
-                filepath = os.getcwd()):
+    def to_file(self, case_name="KPOINTS", filepath=os.getcwd()):
         """
         Writes KPOINTS file with MP gamma centred grid:
 
@@ -318,48 +347,58 @@ def to_file(self,\
 
         """
         createFolder(filepath)
-        f = io.open(os.path.join(filepath, "KPOINTS"), 'w', newline='\n')
-        with open(os.path.join(filepath, "KPOINTS"), 'a', newline='\n') as f:
+        f = io.open(os.path.join(filepath, "KPOINTS"), "w", newline="\n")
+        with open(os.path.join(filepath, "KPOINTS"), "a", newline="\n") as f:
             # File name (just string on first line of KPOINTS)
-            f.write('%s\n' % case_name)
+            f.write("%s\n" % case_name)
             # Use automatic generation "0"
-            f.write('0\n')
+            f.write("0\n")
             # Monkhorst-Pack Gamma centred grid
-            f.write('Gamma\n')
+            f.write("Gamma\n")
             # Subdivisions along reciprocal lattice vectors
-            subdiv_string = ''
+            subdiv_string = ""
             for i in self.subdivs:
                 subdiv_string += "%s " % str(i)
-            f.write('%s\n' % subdiv_string)
+            f.write("%s\n" % subdiv_string)
             # optional shift of the mesh (s_1, s_2, s_3)
-            shift_string = ''
+            shift_string = ""
             for i in self.shift:
                 shift_string += "%s " % str(i)
-            f.write('%s\n' % shift_string)
+            f.write("%s\n" % shift_string)
         f.close()
 
-def createJobFolder(structure,\
-                    KPOINT = None,\
-                    folder_path = os.path.join(os.getcwd(), "jobfolder"),\
-                    INCAR = None,\
-                    jobfile = None,\
-                    quiet=True):
+
+def createJobFolder(
+    structure,
+    KPOINT=None,
+    folder_path=os.path.join(os.getcwd(), "jobfolder"),
+    INCAR=None,
+    jobfile=None,
+    quiet=True,
+):
     # This assumes that incar file base is present already, please adjust this function to adjust the incar flags
     # creates a subdirectory of chosen name in current directory
     parent_folder = os.getcwd()
     createFolder(folder_path)
 
-    structure.to(fmt="poscar", filename = os.path.join(folder_path, f"starter-{os.path.basename(folder_path)}.vasp"))
-    structure.to(fmt="poscar", filename = os.path.join(folder_path, "POSCAR"))
+    structure.to(
+        fmt="poscar",
+        filename=os.path.join(
+            folder_path, f"starter-{os.path.basename(folder_path)}.vasp"
+        ),
+    )
+    structure.to(fmt="poscar", filename=os.path.join(folder_path, "POSCAR"))
 
-    createPOTCAR(structure, path = "%s" % folder_path)
+    createPOTCAR(structure, path="%s" % folder_path)
 
     INCAR.write_file(os.path.join(folder_path, "INCAR"))
 
     if KPOINT:
-        KPOINT.to_file(filepath = folder_path)
+        KPOINT.to_file(filepath=folder_path)
 
-    jobfile.to_file(job_name = '%s.sh' % os.path.basename(folder_path),\
-                    output_path = "%s" % (folder_path))
+    jobfile.to_file(
+        job_name="%s.sh" % os.path.basename(folder_path),
+        output_path="%s" % (folder_path),
+    )
     if not quiet:
         print("Generating jobfolder, name %s" % (os.path.basename(folder_path)))
diff --git a/utils/generic.py b/utils/generic.py
index 5d16d81..c762fa2 100644
--- a/utils/generic.py
+++ b/utils/generic.py
@@ -12,6 +12,7 @@
 from monty.os.path import find_exts
 from monty.io import zopen
 
+
 def chunk_list(lst, n):
     """
     Split a list into smaller chunks with a maximum size of n.
@@ -29,7 +30,8 @@ def chunk_list(lst, n):
         >>> print(chunked_list)
         [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10]]
     """
-    return [lst[i:i + n] for i in range(0, len(lst), n)]
+    return [lst[i : i + n] for i in range(0, len(lst), n)]
+
 
 def get_latest_file_iteration(directory, filename_with_iteration):
     # Check for existing resubmit.log_m files and find the largest m
@@ -37,22 +39,23 @@ def get_latest_file_iteration(directory, filename_with_iteration):
     for filename in os.listdir(directory):
         if filename_with_iteration in filename:
             resubmit_log_files.append(filename)
-            
+
     max_integer = -1
-    
+
     if not resubmit_log_files:
         return -1
     else:
         for log_file in resubmit_log_files:
             if log_file.startswith(filename_with_iteration):
                 try:
-                    num_str = log_file[len(filename_with_iteration):]
+                    num_str = log_file[len(filename_with_iteration) :]
                     num = int(num_str)
                     max_integer = max(max_integer, num)
                 except ValueError:
                     pass  # Ignore non-integer parts after "resubmit.log_"
         return max_integer
-    
+
+
 def search_line_in_file(filename, line_to_search, search_depth=None, reverse=True):
     """
     Searches for a specific line in a file.
@@ -85,22 +88,23 @@ def search_line_in_file(filename, line_to_search, search_depth=None, reverse=Tru
         - If the file is not found, the function returns False.
     """
     try:
-        with open(filename, 'r') as file:
+        with open(filename, "r") as file:
             lines = file.readlines()
             if reverse:
-                lines = reversed(lines)  # Reverse the lines           
+                lines = reversed(lines)  # Reverse the lines
             count = 0
             for line in lines:
                 if search_depth is not None and count >= search_depth:
-                    break                
+                    break
                 if line_to_search in line.strip():
-                    return True                
-                count += 1            
+                    return True
+                count += 1
             return False
     except FileNotFoundError:
         # print("File not found:", filename)
         return False
 
+
 def parse_lines(flist, trigger_start, trigger_end, recursive=True):
     """
     Parses lines from a list of strings based on start and end triggers and returns the parsed data.
@@ -155,6 +159,7 @@ def parse_lines(flist, trigger_start, trigger_end, recursive=True):
 
     return data
 
+
 def find_directories_with_files(parent_dir, filenames, all_present=True):
     """
     Finds directories in a parent directory that contain specified files.
@@ -182,7 +187,9 @@ def find_directories_with_files(parent_dir, filenames, all_present=True):
         - The function returns a list of directories that meet the specified conditions.
     """
     directories = []
-    file_set = set(filenames)  # Convert filenames to a set for efficient membership checking
+    file_set = set(
+        filenames
+    )  # Convert filenames to a set for efficient membership checking
 
     for root, dirs, files in os.walk(parent_dir):
         # Check if the intersection of file_set and files is not empty
@@ -193,6 +200,7 @@ def find_directories_with_files(parent_dir, filenames, all_present=True):
 
     return directories
 
+
 def extract_tarball(archive_filepath, extraction_path):
     """
     Extracts the contents of an archive file to the specified extraction path.
@@ -224,7 +232,10 @@ def extract_tarball(archive_filepath, extraction_path):
     except Exception as e:
         print(f"Error extracting archive: {e}")
 
-def find_and_extract_tarballs_parallel(parent_dir, extensions=(".tar.gz",), max_workers=None):
+
+def find_and_extract_tarballs_parallel(
+    parent_dir, extensions=(".tar.gz",), max_workers=None
+):
     """
     Finds tarball files with specified extensions in a directory and extracts them in parallel.
 
@@ -249,11 +260,15 @@ def find_and_extract_tarballs_parallel(parent_dir, extensions=(".tar.gz",), max_
     extraction_filepaths = [os.path.dirname(filepath) for filepath in filepaths]
 
     # Prepare args_list as a list of tuples
-    args_list = [(filepath, extraction_path) for filepath, extraction_path in zip(filepaths, extraction_filepaths)]
+    args_list = [
+        (filepath, extraction_path)
+        for filepath, extraction_path in zip(filepaths, extraction_filepaths)
+    ]
 
     # Call parallelise function
     parallelise(extract_tarball, args_list, max_workers=max_workers)
 
+
 def extract_files_from_tarball(tarball_filepath, filenames, suffix=None, prefix=None):
     """
     Extracts specific files from a tarball file and optionally renames them with a suffix.
@@ -286,25 +301,39 @@ def extract_files_from_tarball(tarball_filepath, filenames, suffix=None, prefix=
     elif tarball_filepath.endswith(".bz2"):
         compression_type = "bz2"
     else:
-        raise ValueError("Unsupported compression type. Only .gz and .bz2 are supported.")
+        raise ValueError(
+            "Unsupported compression type. Only .gz and .bz2 are supported."
+        )
 
     with tarfile.open(tarball_filepath, f"r:{compression_type}") as tar:
         extracted_filepaths = []
         for filename in filenames:
             try:
-                matching_names = [name for name in tar.getnames() if name.endswith(filename)]
+                matching_names = [
+                    name for name in tar.getnames() if name.endswith(filename)
+                ]
                 for name in matching_names:
                     tar.extract(name, path=os.path.dirname(tarball_filepath))
                     if name.startswith("./"):
-                        extracted_filepath = os.path.join(os.path.dirname(tarball_filepath), name[2:])
+                        extracted_filepath = os.path.join(
+                            os.path.dirname(tarball_filepath), name[2:]
+                        )
                     else:
-                        extracted_filepath = os.path.join(os.path.dirname(tarball_filepath), name)
+                        extracted_filepath = os.path.join(
+                            os.path.dirname(tarball_filepath), name
+                        )
                     if suffix:
-                        new_path = os.path.join(os.path.dirname(extracted_filepath), os.path.basename(extracted_filepath) + "_" + suffix)
+                        new_path = os.path.join(
+                            os.path.dirname(extracted_filepath),
+                            os.path.basename(extracted_filepath) + "_" + suffix,
+                        )
                         os.rename(extracted_filepath, new_path)
                         extracted_filepath = new_path
                     if prefix:
-                        new_path = os.path.join(prefix + "_" + os.path.dirname(extracted_filepath), os.path.basename(extracted_filepath))
+                        new_path = os.path.join(
+                            prefix + "_" + os.path.dirname(extracted_filepath),
+                            os.path.basename(extracted_filepath),
+                        )
                         os.rename(extracted_filepath, new_path)
                         extracted_filepath = new_path
                     extracted_filepaths.append(extracted_filepath)
@@ -313,7 +342,10 @@ def extract_files_from_tarball(tarball_filepath, filenames, suffix=None, prefix=
 
     return extracted_filepaths
 
-def extract_files_from_tarballs_parallel(tarball_paths, filenames, suffix=False, max_workers=None):
+
+def extract_files_from_tarballs_parallel(
+    tarball_paths, filenames, suffix=False, max_workers=None
+):
     """
     Extracts specific files from multiple tarball files in parallel and optionally renames them with suffixes.
 
@@ -346,39 +378,47 @@ def extract_files_from_tarballs_parallel(tarball_paths, filenames, suffix=False,
     elif isinstance(filenames, list):
         if isinstance(filenames[0], str):
             if len(filenames) != len(tarball_paths):
-                raise ValueError("The length of filenames should match the number of tarball_paths.")
+                raise ValueError(
+                    "The length of filenames should match the number of tarball_paths."
+                )
         else:
             raise ValueError("Invalid format for filenames.")
     else:
         raise ValueError("Invalid format for filenames.")
 
     if suffix:
-        suffixes = [os.path.basename(filepath).split(".tar")[0] for filepath in tarball_paths]
+        suffixes = [
+            os.path.basename(filepath).split(".tar")[0] for filepath in tarball_paths
+        ]
     else:
         suffixes = [None for _ in tarball_paths]
 
     # Prepare args_list as a list of tuples
-    args_list = [(tarball_path, filename, suffix) for tarball_path, filename, suffix in zip(tarball_paths, filenames, suffixes)]
+    args_list = [
+        (tarball_path, filename, suffix)
+        for tarball_path, filename, suffix in zip(tarball_paths, filenames, suffixes)
+    ]
 
     # Call parallelise function
     parallelise(extract_files_from_tarball, args_list, max_workers=max_workers)
 
-def find_and_extract_files_from_tarballs_parallel(parent_dir,
-                                                  extension=(".tar.gz",),
-                                                  filenames=[],
-                                                  suffix=False,
-                                                  prefix=False,
-                                                  exclude_containing=["error."],
-                                                  max_workers=None):
+
+def find_and_extract_files_from_tarballs_parallel(
+    parent_dir,
+    extension=(".tar.gz",),
+    filenames=[],
+    suffix=False,
+    prefix=False,
+    exclude_containing=["error."],
+    max_workers=None,
+):
     """
     Finds and extracts specific files from multiple tarball files within a parent directory using parallel processing.
 
     Parameters:
         parent_dir (str): The path of the parent directory to search for tarball files.
         extension (str or tuple, optional): The file extension(s) of the tarball files to search for. Defaults to ".tar.gz".
-        filenames (str or list, optional): The filenames to extract from the tarball(s). If a string, it will be used for all tarball files.
-                                           If a list, it should have the same length as the number of tarball files found in the parent directory.
-                                           Defaults to an empty list, which means all files will be extracted.
+        filenames (list, optional): List of filenames to extract from the tarball files.
         suffix (bool, optional): Determines whether to append suffixes to the extracted filenames. Defaults to False.
         prefix (bool, optional): Determines whether to prepend prefixes to the extracted filenames. Defaults to False.
         exclude_containing (list, optional): A list of strings. Tarballs whose names contain any of these strings will be excluded from extraction.
@@ -398,43 +438,60 @@ def find_and_extract_files_from_tarballs_parallel(parent_dir,
         - The function searches for tarball files within the specified `parent_dir` using the provided `extension`.
         - It finds and extracts specific `filenames` from the tarball files, either all files or the specified files.
         - If `suffix` is True, the extracted filenames will be appended with suffixes.
+        - If `prefix` is True, the extracted filenames will be prepended with prefixes.
         - The extraction process is parallelized using the `parallelise()` function and the `extract_files_from_tarball` function.
         - Tarballs whose names contain any of the strings in `exclude_containing` will be skipped during extraction.
     """
     filepaths = find_exts(top=parent_dir, exts=extension)
 
     # Filter out tarballs that contain any of the strings in exclude_containing
-    filepaths = [filepath for filepath in filepaths if not any(exclude in os.path.basename(filepath) for exclude in exclude_containing)]
+    filepaths = [
+        filepath
+        for filepath in filepaths
+        if not any(
+            exclude in os.path.basename(filepath) for exclude in exclude_containing
+        )
+    ]
 
     if suffix:
-        suffixes = [os.path.basename(filepath).split(".tar")[0] for filepath in filepaths]
+        suffixes = [
+            os.path.basename(filepath).split(".tar")[0] for filepath in filepaths
+        ]
     else:
         suffixes = [None for _ in filepaths]
-        
+
     if prefix:
-        prefixes = [os.path.basename(filepath).split(".tar")[0] for filepath in filepaths]
+        prefixes = [
+            os.path.basename(filepath).split(".tar")[0] for filepath in filepaths
+        ]
     else:
         prefixes = [None for _ in filepaths]
 
-    if isinstance(filenames, str):
-        filenames = [filenames] * len(filepaths)
-    elif isinstance(filenames, list):
-        if len(filenames) != len(filepaths):
-            raise ValueError("The length of filenames should match the number of tarball files found.")
-    else:
-        raise ValueError("Invalid format for filenames.")
+    if not isinstance(filenames, list):
+        raise ValueError(
+            "The 'filenames' parameter should be a list of filenames to extract."
+        )
 
     # Prepare args_list as a list of tuples
-    args_list = [(filepath, filename) for filepath, filename in zip(filepaths, filenames)]
+    args_list = [(filepath, filenames) for filepath in filepaths]
 
     # Call parallelise function
-    parallelise(extract_files_from_tarball, args_list, max_workers=max_workers, suffix=suffixes, prefix=prefixes)
-
-def compress_directory(directory_path,
-                       exclude_files = [],
-                       exclude_file_patterns = [],
-                       print_message=True,
-                       inside_dir=True):
+    parallelise(
+        extract_files_from_tarball,
+        args_list,
+        max_workers=max_workers,
+        suffix=suffixes,
+        prefix=prefixes,
+    )
+
+
+def compress_directory(
+    directory_path,
+    exclude_files=[],
+    exclude_file_patterns=[],
+    print_message=True,
+    inside_dir=True,
+):
     """
     Compresses a directory and its contents into a tarball with gzip compression.
 
@@ -465,9 +522,14 @@ def compress_directory(directory_path,
         - The `print_message` parameter controls whether a message indicating the compression is printed. By default, it is set to True.
     """
     if inside_dir:
-        output_file = os.path.join(directory_path, os.path.basename(directory_path) + '.tar.gz')
+        output_file = os.path.join(
+            directory_path, os.path.basename(directory_path) + ".tar.gz"
+        )
     else:
-        output_file = os.path.join(os.path.dirname(directory_path), os.path.basename(directory_path) + '.tar.gz')
+        output_file = os.path.join(
+            os.path.dirname(directory_path),
+            os.path.basename(directory_path) + ".tar.gz",
+        )
     with tarfile.open(output_file, "w:gz") as tar:
         for root, _, files in os.walk(directory_path):
             for file in files:
@@ -475,23 +537,31 @@ def compress_directory(directory_path,
                 # Exclude the output tarball from being added
                 if file_path == output_file:
                     continue
-                if any(fnmatch.fnmatch(file, pattern) for pattern in exclude_file_patterns):
+                if any(
+                    fnmatch.fnmatch(file, pattern) for pattern in exclude_file_patterns
+                ):
                     continue
                 if file in exclude_files:
                     continue
-                arcname = os.path.join(os.path.basename(directory_path), os.path.relpath(file_path, directory_path))
+                arcname = os.path.join(
+                    os.path.basename(directory_path),
+                    os.path.relpath(file_path, directory_path),
+                )
                 tar.add(file_path, arcname=arcname)
-                # tar.add(file_path, arcname=os.path.relpath(file_path, directory_path))      
+                # tar.add(file_path, arcname=os.path.relpath(file_path, directory_path))
                 # print(f"{file} added")
     if print_message:
         print(f"Compressed directory: {directory_path}")
 
-def compress_directory_parallel(directory_paths,
-                                exclude_files=None,
-                                exclude_file_patterns=None,
-                                print_message=None,
-                                inside_dir=None,
-                                max_workers=None):
+
+def compress_directory_parallel(
+    directory_paths,
+    exclude_files=None,
+    exclude_file_patterns=None,
+    print_message=None,
+    inside_dir=None,
+    max_workers=None,
+):
     """
     Compresses multiple directories and their contents into tarballs with gzip compression in parallel.
 
@@ -522,15 +592,18 @@ def compress_directory_parallel(directory_paths,
         - The `print_message` parameter controls whether a message indicating the compression is printed for each directory.
         - The function parallelizes the compression process using the `parallelise()` function and the `compress_directory` function.
     """
-    parallelise(compress_directory,
-                directory_paths,
-                max_workers=max_workers,
-                exclude_files=exclude_files,
-                exclude_file_patterns=exclude_file_patterns,
-                print_message=print_message,
-                inside_dir=inside_dir)
-       
-def cleanup_dir(directory_path, keep=True, files=[], file_patterns=[]):    
+    parallelise(
+        compress_directory,
+        directory_paths,
+        max_workers=max_workers,
+        exclude_files=exclude_files,
+        exclude_file_patterns=exclude_file_patterns,
+        print_message=print_message,
+        inside_dir=inside_dir,
+    )
+
+
+def cleanup_dir(directory_path, keep=True, files=[], file_patterns=[]):
     """
     Cleans up files in a directory based on specified conditions.
 
@@ -577,15 +650,18 @@ def cleanup_dir(directory_path, keep=True, files=[], file_patterns=[]):
                         break
                 if should_remove or file in files:
                     os.remove(file_path)
-                
-def compress_and_cleanup(directory_path, 
-                         exclude_files_from_tarball=[],
-                         exclude_filepatterns_from_tarball=[],
-                         keep_after=True,
-                         files=[],
-                         file_patterns=[],
-                         print_msg=False,
-                         inside_dir=True):
+
+
+def compress_and_cleanup(
+    directory_path,
+    exclude_files_from_tarball=[],
+    exclude_filepatterns_from_tarball=[],
+    keep_after=True,
+    files=[],
+    file_patterns=[],
+    print_msg=False,
+    inside_dir=True,
+):
     """
     Compresses a directory and its contents into a tarball with gzip compression, and performs cleanup operations.
 
@@ -615,32 +691,39 @@ def compress_and_cleanup(directory_path,
         - The `print_msg` parameter controls whether a message indicating the compression is printed for the directory.
         - The `inside_dir` parameter determines whether the output tarball should be placed inside the directory (True) or in the same directory as the directory (False).
     """
-    compress_directory(directory_path=directory_path,
-                       exclude_files=exclude_files_from_tarball,
-                       exclude_file_patterns=exclude_filepatterns_from_tarball,
-                       print_message=print_msg,
-                       inside_dir=inside_dir)
+    compress_directory(
+        directory_path=directory_path,
+        exclude_files=exclude_files_from_tarball,
+        exclude_file_patterns=exclude_filepatterns_from_tarball,
+        print_message=print_msg,
+        inside_dir=inside_dir,
+    )
     # Add the newly compressed directory to the exceptions, or we'll remove it!
     if keep_after:
         file_patterns += [f"{os.path.basename(directory_path)}.tar.gz"]
     else:
         file_patterns = file_patterns
-    cleanup_dir(directory_path=directory_path,
-                keep=keep_after,
-                files=files,
-                file_patterns=file_patterns)
-    
-def find_and_compress_directories_parallel(parent_dir,
-                                           valid_dir_if_filenames,
-                                           all_present=False,
-                                           exclude_files_from_tarball=[],
-                                           exclude_filepatterns_from_tarball=[],
-                                           keep_after=True,
-                                           files=[],
-                                           file_patterns=[],
-                                           print_msg=False,
-                                           inside_dir=True,
-                                           max_workers=None):
+    cleanup_dir(
+        directory_path=directory_path,
+        keep=keep_after,
+        files=files,
+        file_patterns=file_patterns,
+    )
+
+
+def find_and_compress_directories_parallel(
+    parent_dir,
+    valid_dir_if_filenames,
+    all_present=False,
+    exclude_files_from_tarball=[],
+    exclude_filepatterns_from_tarball=[],
+    keep_after=True,
+    files=[],
+    file_patterns=[],
+    print_msg=False,
+    inside_dir=True,
+    max_workers=None,
+):
     """
     Finds directories containing specific files, and compresses each directory and its contents into tarballs with gzip compression in parallel.
 
@@ -676,17 +759,22 @@ def find_and_compress_directories_parallel(parent_dir,
         - The `inside_dir` parameter determines whether the output tarball should be placed inside each directory (True) or in the same directory as each directory (False).
         - The function parallelizes the compression process using the `parallelise()` function and the `compress_and_cleanup` function.
     """
-    dirs_to_compress = find_directories_with_files(parent_dir=parent_dir, filenames=valid_dir_if_filenames, all_present=all_present)
-    parallelise(compress_and_cleanup,
-                dirs_to_compress,
-                max_workers=max_workers,
-                exclude_files_from_tarball=exclude_files_from_tarball,
-                exclude_filepatterns_from_tarball=exclude_filepatterns_from_tarball,
-                keep_after=keep_after,
-                files = files,
-                file_patterns = file_patterns,
-                print_msg = print_msg,
-                inside_dir = inside_dir)
+    dirs_to_compress = find_directories_with_files(
+        parent_dir=parent_dir, filenames=valid_dir_if_filenames, all_present=all_present
+    )
+    parallelise(
+        compress_and_cleanup,
+        dirs_to_compress,
+        max_workers=max_workers,
+        exclude_files_from_tarball=exclude_files_from_tarball,
+        exclude_filepatterns_from_tarball=exclude_filepatterns_from_tarball,
+        keep_after=keep_after,
+        files=files,
+        file_patterns=file_patterns,
+        print_msg=print_msg,
+        inside_dir=inside_dir,
+    )
+
 
 def is_line_in_file(filepath, line, exact_match=True):
     """
@@ -711,7 +799,7 @@ def is_line_in_file(filepath, line, exact_match=True):
         ...     print("Line not found in the file.")
     """
     try:
-        with open(filepath, 'r') as file:
+        with open(filepath, "r") as file:
             for file_line in file:
                 if exact_match and line == file_line.strip():
                     return True
diff --git a/utils/jobfile.py b/utils/jobfile.py
index b5d9a73..5031286 100644
--- a/utils/jobfile.py
+++ b/utils/jobfile.py
@@ -1,6 +1,7 @@
 import os
 import shutil
 
+
 def create_folder(directory, delete_folder=False, quiet=True):
     """
     Create a folder if it doesn't exist, and optionally delete it if it does.
@@ -29,20 +30,23 @@ def create_folder(directory, delete_folder=False, quiet=True):
                 print("No replacement/deletion created due to folder existing")
     else:
         os.makedirs(directory)
-                
+
+
 class jobfile:
-    def __init__(self,
-                 file_path,
-                 HPC = "Gadi",
-                 VASP_version = "5.4.4",
-                 CPU = 192,
-                 cpu_per_node = 48,
-                 RAM = 64,
-                 walltime = 999,
-                 max_resubmissions = 999,
-                 generic_insert_field = [],
-                 generic_insert = []):
-        '''
+    def __init__(
+        self,
+        file_path,
+        HPC="Gadi",
+        VASP_version="5.4.4",
+        CPU=192,
+        cpu_per_node=48,
+        RAM=64,
+        walltime=999,
+        max_resubmissions=999,
+        generic_insert_field=[],
+        generic_insert=[],
+    ):
+        """
         Initialize a jobfile instance.
 
         Parameters:
@@ -50,7 +54,7 @@ def __init__(self,
         - HPC (str): One of "Gadi", "Setonix", or "Magnus" specifying the high-performance computing system.
         - VASP_version (str): VASP version, defaults to "5.4.4".
         - CPU (int): Number of CPUs to use in the job.
-        - cpu_per_node (int): Number of CPUs per node on the HPC system.         
+        - cpu_per_node (int): Number of CPUs per node on the HPC system.
             Gadi: 48 is 1 node  (Only use in full nodes, as you are charged for full nodes)
             Magnus: 24 is 1 node (Only use in full nodes, as you are charged for full nodes)
             Setonix: 128 is 1 node (Charged on a per-cpu hour basis, not per-node like Gadi)
@@ -62,7 +66,7 @@ def __init__(self,
 
         Returns:
         - None
-        '''
+        """
         self.file_path = file_path
         self.HPC = HPC
         self.VASP_version = VASP_version
@@ -73,9 +77,11 @@ def __init__(self,
         self.cpu_per_node = cpu_per_node
         self.generic_insert_field = generic_insert_field
         self.generic_insert = generic_insert
-        
-    def to_file(self, job_name='template_job', output_path=os.path.join(os.getcwd(), "test")):
-        '''
+
+    def to_file(
+        self, job_name="template_job", output_path=os.path.join(os.getcwd(), "test")
+    ):
+        """
         Generate a jobfile by replacing placeholders in the template and insert values from generic_insert.
 
         Parameters:
@@ -84,11 +90,11 @@ def to_file(self, job_name='template_job', output_path=os.path.join(os.getcwd(),
 
         Returns:
         - None
-        '''
+        """
 
         create_folder(output_path)
 
-        with open("%s" % (self.file_path), 'r') as fin:
+        with open("%s" % (self.file_path), "r") as fin:
             filedata = fin.read()
 
         fin = open("%s" % (self.file_path), "rt", newline="\n")
@@ -97,9 +103,13 @@ def to_file(self, job_name='template_job', output_path=os.path.join(os.getcwd(),
         replace_dict = {
             "{WALLTIMESTRING}": "%s:00:00" % self.walltime,
             "{CPUSTRING}": str(self.CPU),
-            "{MAXCONVITERATIONS}": str(self.max_resubmissions-1),
+            "{MAXCONVITERATIONS}": str(self.max_resubmissions - 1),
             "{MEMORYSTRING}": "%sGB" % self.RAM if self.HPC == "Gadi" else "",
-            "{NODESTRING}": "1" if self.CPU <= self.cpu_per_node else "%s" % int(self.CPU/self.cpu_per_node),
+            "{NODESTRING}": (
+                "1"
+                if self.CPU <= self.cpu_per_node
+                else "%s" % int(self.CPU / self.cpu_per_node)
+            ),
             "{CASESTRING}": "%s" % job_name,
         }
 
@@ -107,18 +117,26 @@ def to_file(self, job_name='template_job', output_path=os.path.join(os.getcwd(),
             filedata = filedata.replace(field, value)
 
         if self.VASP_version == "5.4.4":
-            filedata = filedata.replace("{VASPMODULELOADSTRING}", 'module load vasp/%s' % self.VASP_version)
+            filedata = filedata.replace(
+                "{VASPMODULELOADSTRING}", "module load vasp/%s" % self.VASP_version
+            )
         else:
             if self.HPC == "Setonix" and self.VASP_version in ["6.3.0", "6.2.1"]:
-                filedata = filedata.replace("{VASPMODULELOADSTRING}", 'module load vasp6/%s' % self.VASP_version)
+                filedata = filedata.replace(
+                    "{VASPMODULELOADSTRING}", "module load vasp6/%s" % self.VASP_version
+                )
             else:
-                filedata = filedata.replace("{VASPMODULELOADSTRING}", 'module load vasp/%s' % self.VASP_version)
+                filedata = filedata.replace(
+                    "{VASPMODULELOADSTRING}", "module load vasp/%s" % self.VASP_version
+                )
 
         # Insert values from generic_insert into corresponding fields
-        for insert_field, insert_value in zip(self.generic_insert_field, self.generic_insert):
+        for insert_field, insert_value in zip(
+            self.generic_insert_field, self.generic_insert
+        ):
             if os.path.isfile(insert_value):
                 # If insert_value is a path, inject the contents of the file
-                with open(insert_value, 'r') as insert_file:
+                with open(insert_value, "r") as insert_file:
                     insert_content = insert_file.read()
                 filedata = filedata.replace(insert_field, insert_content)
             else:
@@ -126,15 +144,14 @@ def to_file(self, job_name='template_job', output_path=os.path.join(os.getcwd(),
                 filedata = filedata.replace(insert_field, insert_value)
 
         # Write the file out again
-        with open(os.path.join(output_path, job_name), 'w') as fout:
+        with open(os.path.join(output_path, job_name), "w") as fout:
             fout.write(filedata)
 
         fin.close()
         fout.close()
-        
-    @staticmethod        
-    def _replace_fields(template_path,
-                        user_inputs):
+
+    @staticmethod
+    def _replace_fields(template_path, user_inputs):
         """
         Read a file, replace specified fields with user inputs, and create a jobfile instance.
 
@@ -144,7 +161,7 @@ def _replace_fields(template_path,
 
         Returns:
         - string containing generated text
-        
+
         Example:
             template_path = '/cmmc/u/hmai/personal_dev/utils/jobscript_templates/CustodianScripts/SDRS_template.py'
             user_inputs = {
@@ -155,7 +172,7 @@ def _replace_fields(template_path,
         """
 
         # Read the template file
-        with open(template_path, 'r') as template_file:
+        with open(template_path, "r") as template_file:
             template_content = template_file.read()
 
         # Replace specified fields with user inputs
@@ -163,18 +180,18 @@ def _replace_fields(template_path,
             template_content = template_content.replace(field, str(value))
 
         return template_content
-    
+
     def to_string(self):
-        '''
+        """
         Convert the jobfile instance to a string representation.
 
         Returns:
         - str: String representation of the jobfile content.
-        '''
-        with open(self.file_path, 'r') as file:
+        """
+        with open(self.file_path, "r") as file:
             content = file.read()
 
         # Replace placeholders in the content if needed
         # content = content.replace("{SOME_PLACEHOLDER}", str(self.some_attribute))
 
-        return content
\ No newline at end of file
+        return content
diff --git a/utils/jobscript_templates/CustodianScripts/template_BASE.py b/utils/jobscript_templates/CustodianScripts/template_BASE.py
index 84b0d8c..8daa80a 100644
--- a/utils/jobscript_templates/CustodianScripts/template_BASE.py
+++ b/utils/jobscript_templates/CustodianScripts/template_BASE.py
@@ -1,10 +1,10 @@
 import sys
 from custodian.custodian import Custodian
 from custodian.vasp.handlers import (
-    VaspErrorHandler, 
+    VaspErrorHandler,
     NonConvergingErrorHandler,
-    PositiveEnergyErrorHandler, 
-    FrozenJobErrorHandler
+    PositiveEnergyErrorHandler,
+    FrozenJobErrorHandler,
 )
 from utils.custom_custodian_handlers import Han_CustomVaspErrorHandler
 from custodian.vasp.jobs import VaspJob
@@ -16,11 +16,9 @@
     Han_CustomVaspErrorHandler(),
     NonConvergingErrorHandler(),
     PositiveEnergyErrorHandler(),
-    FrozenJobErrorHandler(output_filename=output_filename)
+    FrozenJobErrorHandler(output_filename=output_filename),
 ]
 
-jobs = [VaspJob(sys.argv[1:],
-                output_file=output_filename,
-                suffix = "")]
+jobs = [VaspJob(sys.argv[1:], output_file=output_filename, suffix="")]
 c = Custodian(handlers, jobs, max_errors=10)
 c.run()
diff --git a/utils/jobscript_templates/CustodianScripts/template_DRS.py b/utils/jobscript_templates/CustodianScripts/template_DRS.py
index 8339fb0..e678779 100644
--- a/utils/jobscript_templates/CustodianScripts/template_DRS.py
+++ b/utils/jobscript_templates/CustodianScripts/template_DRS.py
@@ -1,10 +1,10 @@
 import sys
 from custodian.custodian import Custodian
 from custodian.vasp.handlers import (
-    VaspErrorHandler, 
+    VaspErrorHandler,
     NonConvergingErrorHandler,
-    PositiveEnergyErrorHandler, 
-    FrozenJobErrorHandler
+    PositiveEnergyErrorHandler,
+    FrozenJobErrorHandler,
 )
 from utils.custom_custodian_handlers import Han_CustomVaspErrorHandler
 from custodian.vasp.jobs import VaspJob
@@ -16,36 +16,65 @@
     Han_CustomVaspErrorHandler(),
     NonConvergingErrorHandler(),
     PositiveEnergyErrorHandler(),
-    FrozenJobErrorHandler(output_filename=output_filename)
+    FrozenJobErrorHandler(output_filename=output_filename),
 ]
 
 # Original job list
 original_jobs = [
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix=".relax_1",
-            final=False,
-            settings_override=[
-                {"dict": "INCAR", "action": {"_set": {"NSW": 100, "LAECHG": False, "LCHARGE": False, "NELM": 80, "EDIFF": 1E-4, "KSPACING": 0.9}}}
-            ],
-            copy_magmom=True),
-    
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix=".relax_2",
-            final=False,
-            settings_override=[
-                {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
-                {"dict": "INCAR", "action": {"_set": {"KSPACING": 0.5, "EDIFF": 1E-5}}},
-            ],
-            copy_magmom=True),
-    
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix="",
-            settings_override=[{"dict": "INCAR", "action": {"_set": {"NSW": 0, "LAECHG": True, "LCHARGE": True, "NELM": 500, "ALGO": "VeryFast"}}},
-                               {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}}])
-    ]
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".relax_1",
+        final=False,
+        settings_override=[
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 100,
+                        "LAECHG": False,
+                        "LCHARGE": False,
+                        "NELM": 80,
+                        "EDIFF": 1e-4,
+                        "KSPACING": 0.9,
+                    }
+                },
+            }
+        ],
+        copy_magmom=True,
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".relax_2",
+        final=False,
+        settings_override=[
+            {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
+            {"dict": "INCAR", "action": {"_set": {"KSPACING": 0.5, "EDIFF": 1e-5}}},
+        ],
+        copy_magmom=True,
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix="",
+        settings_override=[
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 0,
+                        "LAECHG": True,
+                        "LCHARGE": True,
+                        "NELM": 500,
+                        "ALGO": "VeryFast",
+                    }
+                },
+            },
+            {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
+        ],
+    ),
+]
 # Number of elements to get from the end of the list
 n = {STAGES_LEFT}
 
diff --git a/utils/jobscript_templates/CustodianScripts/template_SDRS.py b/utils/jobscript_templates/CustodianScripts/template_SDRS.py
index baadcfe..8383ee4 100644
--- a/utils/jobscript_templates/CustodianScripts/template_SDRS.py
+++ b/utils/jobscript_templates/CustodianScripts/template_SDRS.py
@@ -1,10 +1,10 @@
 import sys
 from custodian.custodian import Custodian
 from custodian.vasp.handlers import (
-    VaspErrorHandler, 
+    VaspErrorHandler,
     NonConvergingErrorHandler,
-    PositiveEnergyErrorHandler, 
-    FrozenJobErrorHandler
+    PositiveEnergyErrorHandler,
+    FrozenJobErrorHandler,
 )
 from utils.custom_custodian_handlers import Han_CustomVaspErrorHandler
 from custodian.vasp.jobs import VaspJob
@@ -16,46 +16,90 @@
     Han_CustomVaspErrorHandler(),
     NonConvergingErrorHandler(),
     PositiveEnergyErrorHandler(),
-    FrozenJobErrorHandler(output_filename=output_filename)
+    FrozenJobErrorHandler(output_filename=output_filename),
 ]
 
 # Original job list
 original_jobs = [
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix=".static_1",
-            final=False,
-            settings_override=[{"dict": "INCAR", "action": {"_set": {"NSW": 0, "LAECHG": True, "LCHARGE": True, "NELM": 400, "KSPACING": 0.5}}}]),
-    
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix=".relax_1",
-            final=False,
-            settings_override=[
-                {"file": "CHGCAR", "action": {"_file_copy": {"dest": "CHGCAR.static_1"}}},
-                {"file": "AECCAR0", "action": {"_file_copy": {"dest": "AECCAR0.static_1"}}},
-                {"file": "AECCAR1", "action": {"_file_copy": {"dest": "AECCAR1.static_1"}}},
-                {"file": "AECCAR2", "action": {"_file_copy": {"dest": "AECCAR2.static_1"}}},
-                {"dict": "INCAR", "action": {"_set": {"NSW": 100, "LAECHG": False, "LCHARGE": False, "NELM": 80, "ALGO": "VeryFast", "EDIFF": 1E-4, "KSPACING": 0.9}}}
-            ],
-            copy_magmom=True),
-    
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix=".relax_2",
-            final=False,
-            settings_override=[
-                {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
-                {"dict": "INCAR", "action": {"_set": {"KSPACING": 0.5, "EDIFF": 1E-5}}},
-            ],
-            copy_magmom=True),
-    
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix="",
-            settings_override=[{"dict": "INCAR", "action": {"_set": {"NSW": 0, "LAECHG": True, "LCHARGE": True, "NELM": 500, "ALGO": "VeryFast"}}},
-                               {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}}])
-    ]
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".static_1",
+        final=False,
+        settings_override=[
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 0,
+                        "LAECHG": True,
+                        "LCHARGE": True,
+                        "NELM": 400,
+                        "KSPACING": 0.5,
+                    }
+                },
+            }
+        ],
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".relax_1",
+        final=False,
+        settings_override=[
+            {"file": "CHGCAR", "action": {"_file_copy": {"dest": "CHGCAR.static_1"}}},
+            {"file": "AECCAR0", "action": {"_file_copy": {"dest": "AECCAR0.static_1"}}},
+            {"file": "AECCAR1", "action": {"_file_copy": {"dest": "AECCAR1.static_1"}}},
+            {"file": "AECCAR2", "action": {"_file_copy": {"dest": "AECCAR2.static_1"}}},
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 100,
+                        "LAECHG": False,
+                        "LCHARGE": False,
+                        "NELM": 80,
+                        "ALGO": "VeryFast",
+                        "EDIFF": 1e-4,
+                        "KSPACING": 0.9,
+                    }
+                },
+            },
+        ],
+        copy_magmom=True,
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".relax_2",
+        final=False,
+        settings_override=[
+            {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
+            {"dict": "INCAR", "action": {"_set": {"KSPACING": 0.5, "EDIFF": 1e-5}}},
+        ],
+        copy_magmom=True,
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix="",
+        settings_override=[
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 0,
+                        "LAECHG": True,
+                        "LCHARGE": True,
+                        "NELM": 500,
+                        "ALGO": "VeryFast",
+                    }
+                },
+            },
+            {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
+        ],
+    ),
+]
 # Number of elements to get from the end of the list
 n = {STAGES_LEFT}
 
diff --git a/utils/jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py b/utils/jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py
index 1b3dfbc..4bd118a 100644
--- a/utils/jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py
+++ b/utils/jobscript_templates/CustodianScripts/template_SDRS_KPOINTS.py
@@ -1,10 +1,10 @@
 import sys
 from custodian.custodian import Custodian
 from custodian.vasp.handlers import (
-    VaspErrorHandler, 
+    VaspErrorHandler,
     NonConvergingErrorHandler,
-    PositiveEnergyErrorHandler, 
-    FrozenJobErrorHandler
+    PositiveEnergyErrorHandler,
+    FrozenJobErrorHandler,
 )
 from utils.custom_custodian_handlers import Han_CustomVaspErrorHandler
 from custodian.vasp.jobs import VaspJob
@@ -16,45 +16,90 @@
     Han_CustomVaspErrorHandler(),
     NonConvergingErrorHandler(),
     PositiveEnergyErrorHandler(),
-    FrozenJobErrorHandler(output_filename=output_filename)
+    FrozenJobErrorHandler(output_filename=output_filename),
 ]
 
 # Original job list
 original_jobs = [
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix = ".relax_1", final=False,
-            settings_override=[{"file": "KPOINTS", "action": {"_file_move": {"dest": "KPOINTS_moved"}}},
-                                {"dict": "INCAR", "action": {"_set": {"NSW": 100, "LAECHG": False, "LCHARGE": False, "NELM": 80, "EDIFF": 1E-5, "KSPACING" : 0.9}}}]),
-
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix=".relax_1",
-            final=False,
-            settings_override=[
-                {"file": "CHGCAR", "action": {"_file_copy": {"dest": "CHGCAR.static_1"}}},
-                {"file": "AECCAR0", "action": {"_file_copy": {"dest": "AECCAR0.static_1"}}},
-                {"file": "AECCAR1", "action": {"_file_copy": {"dest": "AECCAR1.static_1"}}},
-                {"file": "AECCAR2", "action": {"_file_copy": {"dest": "AECCAR2.static_1"}}},
-                {"dict": "INCAR", "action": {"_set": {"NSW": 100, "LAECHG": False, "LCHARGE": False, "NELM": 80, "EDIFF": 1E-4, "KSPACING": 0.9}}}
-            ],
-            copy_magmom=True),
-    
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix=".relax_2",
-            final=False,
-            settings_override=[
-                {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
-                {"dict": "INCAR", "action": {"_set": {"KSPACING": 0.5, "EDIFF": 1E-5}}},
-            ],
-            copy_magmom=True),
-    
-    VaspJob(sys.argv[1:],
-            output_file=output_filename,
-            suffix="",
-            settings_override=[{"dict": "INCAR", "action": {"_set": {"NSW": 0, "LAECHG": True, "LCHARGE": True, "NELM": 500, "ALGO": "VeryFast"}}}])
-    ]
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".relax_1",
+        final=False,
+        settings_override=[
+            {"file": "KPOINTS", "action": {"_file_move": {"dest": "KPOINTS_moved"}}},
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 100,
+                        "LAECHG": False,
+                        "LCHARGE": False,
+                        "NELM": 80,
+                        "EDIFF": 1e-5,
+                        "KSPACING": 0.9,
+                    }
+                },
+            },
+        ],
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".relax_1",
+        final=False,
+        settings_override=[
+            {"file": "CHGCAR", "action": {"_file_copy": {"dest": "CHGCAR.static_1"}}},
+            {"file": "AECCAR0", "action": {"_file_copy": {"dest": "AECCAR0.static_1"}}},
+            {"file": "AECCAR1", "action": {"_file_copy": {"dest": "AECCAR1.static_1"}}},
+            {"file": "AECCAR2", "action": {"_file_copy": {"dest": "AECCAR2.static_1"}}},
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 100,
+                        "LAECHG": False,
+                        "LCHARGE": False,
+                        "NELM": 80,
+                        "EDIFF": 1e-4,
+                        "KSPACING": 0.9,
+                    }
+                },
+            },
+        ],
+        copy_magmom=True,
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix=".relax_2",
+        final=False,
+        settings_override=[
+            {"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}},
+            {"dict": "INCAR", "action": {"_set": {"KSPACING": 0.5, "EDIFF": 1e-5}}},
+        ],
+        copy_magmom=True,
+    ),
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix="",
+        settings_override=[
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 0,
+                        "LAECHG": True,
+                        "LCHARGE": True,
+                        "NELM": 500,
+                        "ALGO": "VeryFast",
+                    }
+                },
+            }
+        ],
+    ),
+]
 # Number of elements to get from the end of the list
 n = {STAGE}
 
diff --git a/utils/jobscript_templates/CustodianScripts/template_Static.py b/utils/jobscript_templates/CustodianScripts/template_Static.py
index 36a97de..e3ebcd8 100644
--- a/utils/jobscript_templates/CustodianScripts/template_Static.py
+++ b/utils/jobscript_templates/CustodianScripts/template_Static.py
@@ -1,10 +1,10 @@
 import sys
 from custodian.custodian import Custodian
 from custodian.vasp.handlers import (
-    VaspErrorHandler, 
+    VaspErrorHandler,
     NonConvergingErrorHandler,
-    PositiveEnergyErrorHandler, 
-    FrozenJobErrorHandler
+    PositiveEnergyErrorHandler,
+    FrozenJobErrorHandler,
 )
 from utils.custom_custodian_handlers import Han_CustomVaspErrorHandler
 from custodian.vasp.jobs import VaspJob
@@ -16,13 +16,31 @@
     Han_CustomVaspErrorHandler(),
     NonConvergingErrorHandler(),
     PositiveEnergyErrorHandler(),
-    FrozenJobErrorHandler(output_filename=output_filename)
+    FrozenJobErrorHandler(output_filename=output_filename),
 ]
 
-jobs = [VaspJob(sys.argv[1:], output_file=output_filename, suffix = "",
-                        settings_override = [{"dict": "INCAR",
-                                              "action": {"_set":{"NSW": 0, "LAECHG": True, "LCHARGE": True, "NELM": 500, "ALGO": "VeryFast", "EDIFF": 1E-5}}}]
-                )]
+jobs = [
+    VaspJob(
+        sys.argv[1:],
+        output_file=output_filename,
+        suffix="",
+        settings_override=[
+            {
+                "dict": "INCAR",
+                "action": {
+                    "_set": {
+                        "NSW": 0,
+                        "LAECHG": True,
+                        "LCHARGE": True,
+                        "NELM": 500,
+                        "ALGO": "VeryFast",
+                        "EDIFF": 1e-5,
+                    }
+                },
+            }
+        ],
+    )
+]
 c = Custodian(handlers, jobs, max_errors={MAXCUSTODIANERRORS})
 
 c.run()
diff --git a/utils/parallel.py b/utils/parallel.py
index 951207d..4463668 100644
--- a/utils/parallel.py
+++ b/utils/parallel.py
@@ -1,5 +1,6 @@
 from multiprocessing import Pool, cpu_count
 
+
 def parallelise(func, args_list, **kwargs_list):
     """
     Executes the given function in parallel by applying it to multiple sets of arguments,
@@ -40,13 +41,15 @@ def sample_function(x, flag=False):
     """
     if not args_list:
         return []
-    
-    max_workers = kwargs_list.pop('max_workers', None)
+
+    max_workers = kwargs_list.pop("max_workers", None)
     if isinstance(max_workers, int):
         max_workers = max_workers
     else:
-        max_workers = cpu_count()  # Use default CPU count if max_workers not specified or not an int
-    
+        max_workers = (
+            cpu_count()
+        )  # Use default CPU count if max_workers not specified or not an int
+
     # Replicate kwargs handling special cases
     replicated_kwargs = {}
     for key, value in kwargs_list.items():
@@ -61,13 +64,16 @@ def sample_function(x, flag=False):
 
     # Combine args and kwargs for each function call
     combined_args = [
-        (list(args) if isinstance(args, tuple) else [args]) + [replicated_kwargs[key][i] for key in replicated_kwargs]
+        (list(args) if isinstance(args, tuple) else [args])
+        + [replicated_kwargs[key][i] for key in replicated_kwargs]
         for i, args in enumerate(args_list)
     ]
 
     # Determine the number of processors to use
     num_processors = min(len(args_list), max_workers or cpu_count())
-    print(f"# Processes: {len(args_list)}, Processors available: {cpu_count()}, CPUs used: {num_processors}")
+    print(
+        f"# Processes: {len(args_list)}, Processors available: {cpu_count()}, CPUs used: {num_processors}"
+    )
     # Execute the function in parallel
     with Pool(processes=num_processors) as pool:
         results = pool.starmap(func, tuple(combined_args))
diff --git a/utils/periodic_table.py b/utils/periodic_table.py
index bc4b697..9184133 100644
--- a/utils/periodic_table.py
+++ b/utils/periodic_table.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 
-import os 
+import os
 
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
@@ -15,13 +15,15 @@
 module_path = os.path.dirname(os.path.abspath(__file__))
 ptable = pd.read_csv(os.path.join(module_path, "periodic_table.csv"))
 
+
 def get_element_number(symbol):
     try:
         return Element(symbol).Z
     except ValueError:
         warnings.warn(f"Warning: Symbol '{symbol}' was not found.")
         return np.nan
-    
+
+
 def get_element_symbol(element_number):
     row = ptable[ptable["Z"] == element_number]
     if not row.empty:
@@ -29,75 +31,153 @@ def get_element_symbol(element_number):
     else:
         warnings.warn(f"Warning: Element with Z:{element_number} was not found.")
         return np.nan
-    
+
+
 def classify_elements(element):
     # Define the properties of the different groups of elements in a dictionary
     element_groups = {
-        'Actinoids': ['Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'],
-        'Noble gases': ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', 'Og'],
-        'Rare earths': ['La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu'],
-        'Transition metals': ['Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg'],
-        'Alkali metals': ['Li', 'Na', 'K', 'Rb', 'Cs', 'Fr'],
-        'Alkaline earths': ['Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra'],
-        'Halogens': ['F', 'Cl', 'Br', 'I', 'At'],
-        'Metalloids': ['B', 'Si', 'Ge', 'As', 'Sb', 'Te', 'Po'],
-        'Reactive nonmetals': ['H', 'C', 'N', 'O', 'P', 'S', 'Se'],  # Excluding Halogens as they're classified separately
-        'Post-transition metals': ['Al', 'Ga', 'In', 'Sn', 'Tl', 'Pb', 'Bi']
+        "Actinoids": [
+            "Ac",
+            "Th",
+            "Pa",
+            "U",
+            "Np",
+            "Pu",
+            "Am",
+            "Cm",
+            "Bk",
+            "Cf",
+            "Es",
+            "Fm",
+            "Md",
+            "No",
+            "Lr",
+        ],
+        "Noble gases": ["He", "Ne", "Ar", "Kr", "Xe", "Rn", "Og"],
+        "Rare earths": [
+            "La",
+            "Ce",
+            "Pr",
+            "Nd",
+            "Pm",
+            "Sm",
+            "Eu",
+            "Gd",
+            "Tb",
+            "Dy",
+            "Ho",
+            "Er",
+            "Tm",
+            "Yb",
+            "Lu",
+        ],
+        "Transition metals": [
+            "Sc",
+            "Ti",
+            "V",
+            "Cr",
+            "Mn",
+            "Fe",
+            "Co",
+            "Ni",
+            "Cu",
+            "Zn",
+            "Y",
+            "Zr",
+            "Nb",
+            "Mo",
+            "Tc",
+            "Ru",
+            "Rh",
+            "Pd",
+            "Ag",
+            "Cd",
+            "Hf",
+            "Ta",
+            "W",
+            "Re",
+            "Os",
+            "Ir",
+            "Pt",
+            "Au",
+            "Hg",
+        ],
+        "Alkali metals": ["Li", "Na", "K", "Rb", "Cs", "Fr"],
+        "Alkaline earths": ["Be", "Mg", "Ca", "Sr", "Ba", "Ra"],
+        "Halogens": ["F", "Cl", "Br", "I", "At"],
+        "Metalloids": ["B", "Si", "Ge", "As", "Sb", "Te", "Po"],
+        "Reactive nonmetals": [
+            "H",
+            "C",
+            "N",
+            "O",
+            "P",
+            "S",
+            "Se",
+        ],  # Excluding Halogens as they're classified separately
+        "Post-transition metals": ["Al", "Ga", "In", "Sn", "Tl", "Pb", "Bi"],
     }
-    
+
     # Check which group the element belongs to
     for group, elements in element_groups.items():
         if element in elements:
             return group
-    
+
     # If the element doesn't match any group, return 'Others'
-    return 'Others'
+    return "Others"
+
 
 def get_colour_element(element):
     # Define the color map inside the function
-    color_map = {'Actinoids': 'r',
-                 'Noble gases': 'royalblue',
-                 'Rare earths': 'm',
-                 'Transition metals': 'purple',
-                 'Alkali metals': 'gold',
-                 'Alkaline earths': "moccasin",
-                 'Halogens': 'mediumspringgreen',
-                 'Metalloids': 'darkcyan',
-                 'Others': 'slategray'}
+    color_map = {
+        "Actinoids": "r",
+        "Noble gases": "royalblue",
+        "Rare earths": "m",
+        "Transition metals": "purple",
+        "Alkali metals": "gold",
+        "Alkaline earths": "moccasin",
+        "Halogens": "mediumspringgreen",
+        "Metalloids": "darkcyan",
+        "Others": "slategray",
+    }
 
     # Classify the element using the classify_elements function
     element_group = classify_elements(element)
-    
+
     # Assign color based on the classification using the color_map dictionary
-    colour = color_map.get(element_group, 'slategray')  # Default to 'slategray' if not found in color_map
-    
+    colour = color_map.get(
+        element_group, "slategray"
+    )  # Default to 'slategray' if not found in color_map
+
     return colour
 
-def periodic_table_plot(plot_df, 
-                        property="Eseg_min",
-                        count_min=None,
-                        count_max=None,
-                        center_cm_zero=False,
-                        center_point=None,  # New parameter for arbitrary centering
-                        property_name=None,
-                        cmap=cm.Blues,
-                        element_font_color = "darkgoldenrod"
+
+def periodic_table_plot(
+    plot_df,
+    property="Eseg_min",
+    count_min=None,
+    count_max=None,
+    center_cm_zero=False,
+    center_point=None,  # New parameter for arbitrary centering
+    property_name=None,
+    cmap=cm.Blues,
+    element_font_color="darkgoldenrod",
 ):
     module_path = os.path.dirname(os.path.abspath(__file__))
-    ptable = pd.read_csv(os.path.join(module_path, 'periodic_table.csv'))
-    ptable.index = ptable['symbol'].values
-    elem_tracker = ptable['count']
-    ptable = ptable[ptable['Z'] <= 92]  # Cap at element 92
+    ptable = pd.read_csv(os.path.join(module_path, "periodic_table.csv"))
+    ptable.index = ptable["symbol"].values
+    elem_tracker = ptable["count"]
+    ptable = ptable[ptable["Z"] <= 92]  # Cap at element 92
 
-    n_row = ptable['row'].max()
-    n_column = ptable['column'].max()
+    n_row = ptable["row"].max()
+    n_column = ptable["column"].max()
 
     fig, ax = plt.subplots(figsize=(n_column, n_row))
-    rows = ptable['row']
-    columns = ptable['column']
-    symbols = ptable['symbol']
+    rows = ptable["row"]
+    columns = ptable["column"]
+    symbols = ptable["symbol"]
     rw = 0.9  # rectangle width
-    rh = rw    # rectangle height
+    rh = rw  # rectangle height
 
     if count_min is None:
         count_min = plot_df[property].min()
@@ -116,127 +196,170 @@ def periodic_table_plot(plot_df,
         norm = Normalize(vmin=count_min, vmax=count_max)
 
     for row, column, symbol in zip(rows, columns, symbols):
-        row = ptable['row'].max() - row
+        row = ptable["row"].max() - row
         if symbol in plot_df.element.unique():
             count = plot_df[plot_df["element"] == symbol][property].values[0]
             # Check for NaN and adjust color and skip text accordingly
             if pd.isna(count):
-                color = 'grey'  # Set color to none for NaN values
-                count = ''  # Avoid displaying text for NaN values
+                color = "grey"  # Set color to none for NaN values
+                count = ""  # Avoid displaying text for NaN values
             else:
                 color = cmap(norm(count))
         else:
-            count = ''
-            color = 'none'
+            count = ""
+            color = "none"
 
         if row < 3:
             row += 0.5
-        rect = patches.Rectangle((column, row), rw, rh,
-                                linewidth=1.5,
-                                edgecolor='gray',
-                                facecolor=color,
-                                alpha=1)
+        rect = patches.Rectangle(
+            (column, row),
+            rw,
+            rh,
+            linewidth=1.5,
+            edgecolor="gray",
+            facecolor=color,
+            alpha=1,
+        )
 
         # Element symbol
-        plt.text(column + rw / 2, row + rh / 2 + 0.2, symbol,
-                horizontalalignment='center',
-                verticalalignment='center',
-                fontsize=22,  # Adjusted for visibility
-                fontweight='semibold',
-                color=element_font_color)
+        plt.text(
+            column + rw / 2,
+            row + rh / 2 + 0.2,
+            symbol,
+            horizontalalignment="center",
+            verticalalignment="center",
+            fontsize=22,  # Adjusted for visibility
+            fontweight="semibold",
+            color=element_font_color,
+        )
 
         # Property value - Added below the symbol
         if count:  # Only display if count is not empty (including not NaN)
-            plt.text(column + rw / 2, row + rh / 2 - 0.25, f"{count:.2f}",  # Formatting count to 2 decimal places
-                    horizontalalignment='center',
-                    verticalalignment='center',
-                    fontsize=14,  # Smaller font size for the count value
-                    fontweight='semibold',
-                    color=element_font_color)
+            plt.text(
+                column + rw / 2,
+                row + rh / 2 - 0.25,
+                f"{count:.2f}",  # Formatting count to 2 decimal places
+                horizontalalignment="center",
+                verticalalignment="center",
+                fontsize=14,  # Smaller font size for the count value
+                fontweight="semibold",
+                color=element_font_color,
+            )
 
         ax.add_patch(rect)
     # Generate the color bar
     granularity = 20
-    colormap_array = np.linspace(norm.vmin, norm.vmax, granularity) if center_point is None else np.linspace(center_point - max_diff, center_point + max_diff, granularity)
-    
+    colormap_array = (
+        np.linspace(norm.vmin, norm.vmax, granularity)
+        if center_point is None
+        else np.linspace(center_point - max_diff, center_point + max_diff, granularity)
+    )
+
     for i, value in enumerate(colormap_array):
         color = cmap(norm(value))
-        color = 'silver' if value == 0 else color
+        color = "silver" if value == 0 else color
         length = 9
         x_offset = 3.5
         y_offset = 7.8
         x_loc = i / granularity * length + x_offset
         width = length / granularity
         height = 0.35
-        rect = patches.Rectangle((x_loc, y_offset), width, height,
-                                 linewidth=1.5,
-                                 edgecolor='gray',
-                                 facecolor=color,
-                                 alpha=1)
-
-        if i in [0, granularity//4, granularity//2, 3*granularity//4, granularity-1]:
-            plt.text(x_loc + width / 2, y_offset - 0.4, f'{value:.1f}',
-                     horizontalalignment='center',
-                     verticalalignment='center',
-                     fontweight='semibold',
-                     fontsize=20, color='k')
+        rect = patches.Rectangle(
+            (x_loc, y_offset),
+            width,
+            height,
+            linewidth=1.5,
+            edgecolor="gray",
+            facecolor=color,
+            alpha=1,
+        )
+
+        if i in [
+            0,
+            granularity // 4,
+            granularity // 2,
+            3 * granularity // 4,
+            granularity - 1,
+        ]:
+            plt.text(
+                x_loc + width / 2,
+                y_offset - 0.4,
+                f"{value:.1f}",
+                horizontalalignment="center",
+                verticalalignment="center",
+                fontweight="semibold",
+                fontsize=20,
+                color="k",
+            )
 
         ax.add_patch(rect)
 
     if property_name is None:
         property_name = property
-    plt.text(x_offset + length / 2, y_offset + 1.0,
-             property_name,
-             horizontalalignment='center',
-             verticalalignment='center',
-             fontweight='semibold',
-             fontsize=20, color='k')
-    ax.set_ylim(-0.15, n_row + .1)
+    plt.text(
+        x_offset + length / 2,
+        y_offset + 1.0,
+        property_name,
+        horizontalalignment="center",
+        verticalalignment="center",
+        fontweight="semibold",
+        fontsize=20,
+        color="k",
+    )
+    ax.set_ylim(-0.15, n_row + 0.1)
     ax.set_xlim(0.85, n_column + 1.1)
 
-    ax.axis('off')
+    ax.axis("off")
     plt.draw()
     plt.pause(0.001)
     plt.close()
     return fig, ax
 
-def periodic_table_dual_plot(plot_df, 
-                        property1="Eseg_min1",
-                        property2="Eseg_min2",  # New property
-                        count_min1=None,
-                        count_max1=None,
-                        count_min2=None,
-                        count_max2=None,
-                        center_cm_zero1=False,
-                        center_cm_zero2=False,
-                        center_point1=None,  # New parameter for arbitrary centering
-                        center_point2=None,
-                        property_name1=None,
-                        property_name2=None,
-                        cmap1=plt.cm.Blues,  # Colormap for the first property
-                        cmap2=plt.cm.Reds,  # Colormap for the second property
-                        element_font_color="darkgoldenrod"):
+
+def periodic_table_dual_plot(
+    plot_df,
+    property1="Eseg_min1",
+    property2="Eseg_min2",  # New property
+    count_min1=None,
+    count_max1=None,
+    count_min2=None,
+    count_max2=None,
+    center_cm_zero1=False,
+    center_cm_zero2=False,
+    center_point1=None,  # New parameter for arbitrary centering
+    center_point2=None,
+    property_name1=None,
+    property_name2=None,
+    cmap1=plt.cm.Blues,  # Colormap for the first property
+    cmap2=plt.cm.Reds,  # Colormap for the second property
+    element_font_color="darkgoldenrod",
+):
     module_path = os.path.dirname(os.path.abspath(__file__))
-    ptable = pd.read_csv(os.path.join(module_path, 'periodic_table.csv'))
-    ptable.index = ptable['symbol'].values
-    elem_tracker = ptable['count']
-    ptable = ptable[ptable['Z'] <= 92]  # Cap at element 92
+    ptable = pd.read_csv(os.path.join(module_path, "periodic_table.csv"))
+    ptable.index = ptable["symbol"].values
+    elem_tracker = ptable["count"]
+    ptable = ptable[ptable["Z"] <= 92]  # Cap at element 92
 
-    n_row = ptable['row'].max()
-    n_column = ptable['column'].max()
+    n_row = ptable["row"].max()
+    n_column = ptable["column"].max()
 
     fig, ax = plt.subplots(figsize=(n_column, n_row))
-    rows = ptable['row']
-    columns = ptable['column']
-    symbols = ptable['symbol']
+    rows = ptable["row"]
+    columns = ptable["column"]
+    symbols = ptable["symbol"]
     rw = 0.9  # rectangle width
-    rh = rw    # rectangle height
-
-    if count_min1 is None or count_min2 is None or count_max1 is None or count_max2 is None:
+    rh = rw  # rectangle height
+
+    if (
+        count_min1 is None
+        or count_min2 is None
+        or count_max1 is None
+        or count_max2 is None
+    ):
         show_symbols = False
     else:
         show_symbols = True
-    
+
     if count_min1 is None:
         count_min1 = plot_df[property1].min()
     if count_max1 is None:
@@ -270,9 +393,9 @@ def periodic_table_dual_plot(plot_df,
         norm2 = Normalize(vmin=count_min2, vmax=count_max2)
 
     for row, column, symbol in zip(rows, columns, symbols):
-        row = ptable['row'].max() - row
+        row = ptable["row"].max() - row
         # Initial color set to 'none' for both properties
-        color1, color2 = 'none', 'none'
+        color1, color2 = "none", "none"
 
         if symbol in plot_df.element.unique():
             element_data = plot_df[plot_df["element"] == symbol]
@@ -284,41 +407,58 @@ def periodic_table_dual_plot(plot_df,
                 color2 = cmap2(norm2(value2))
 
         # Draw upper right triangle for property1
-        triangle1 = patches.Polygon([(column, row), (column + rw, row), (column + rw, row + rh)], 
-                                    closed=True, color=color1)
+        triangle1 = patches.Polygon(
+            [(column, row), (column + rw, row), (column + rw, row + rh)],
+            closed=True,
+            color=color1,
+        )
         ax.add_patch(triangle1)
-        
+
         # Draw lower left triangle for property2
-        triangle2 = patches.Polygon([(column, row), (column, row + rh), (column + rw, row + rh)], 
-                                    closed=True, color=color2)
+        triangle2 = patches.Polygon(
+            [(column, row), (column, row + rh), (column + rw, row + rh)],
+            closed=True,
+            color=color2,
+        )
         ax.add_patch(triangle2)
 
         # Element symbol
-        plt.text(column + rw / 2, row + rh / 2, symbol,
-                 horizontalalignment='center',
-                 verticalalignment='center',
-                 fontsize=22,  # Adjusted for visibility
-                 fontweight='semibold',
-                 color=element_font_color)
+        plt.text(
+            column + rw / 2,
+            row + rh / 2,
+            symbol,
+            horizontalalignment="center",
+            verticalalignment="center",
+            fontsize=22,  # Adjusted for visibility
+            fontweight="semibold",
+            color=element_font_color,
+        )
     position1 = 3.5, 7.8
     position2 = 3.5, 9.4
     # draw_color_bar(fig, ax, norm1, cmap1, property_name1, position1, granularity=20)
     # draw_color_bar(fig, ax, norm2, cmap2, property_name2, position2, granularity=20)
-    draw_color_bar(fig, ax, norm1, cmap1, property_name1, position1, show_symbols, granularity=20)
-    draw_color_bar(fig, ax, norm2, cmap2, property_name2, position2, show_symbols, granularity=20)
-
-    ax.set_ylim(-0.15, n_row + .1)
+    draw_color_bar(
+        fig, ax, norm1, cmap1, property_name1, position1, show_symbols, granularity=20
+    )
+    draw_color_bar(
+        fig, ax, norm2, cmap2, property_name2, position2, show_symbols, granularity=20
+    )
+
+    ax.set_ylim(-0.15, n_row + 0.1)
     ax.set_xlim(0.85, n_column + 1.1)
-    ax.axis('off')
-    
+    ax.axis("off")
+
     plt.draw()
     plt.pause(0.001)
     plt.close()
     return fig, ax
 
-def draw_color_bar(fig, ax, norm, cmap, property_name, position, show_symbols=True, granularity=20):
+
+def draw_color_bar(
+    fig, ax, norm, cmap, property_name, position, show_symbols=True, granularity=20
+):
     colormap_array = np.linspace(norm.vmin, norm.vmax, granularity)
-    
+
     length = 9
     width = length / granularity
     height = 0.35
@@ -326,37 +466,55 @@ def draw_color_bar(fig, ax, norm, cmap, property_name, position, show_symbols=Tr
 
     for i, value in enumerate(colormap_array):
         color = cmap(norm(value))
-        color = 'silver' if value == 0 and not norm.vmin <= 0 <= norm.vmax else color
+        color = "silver" if value == 0 and not norm.vmin <= 0 <= norm.vmax else color
         x_loc = i / granularity * length + x_offset
-        
-        rect = patches.Rectangle((x_loc, y_offset), width, height,
-                                 linewidth=1.5,
-                                 edgecolor='gray',
-                                 facecolor=color,
-                                 alpha=1)
+
+        rect = patches.Rectangle(
+            (x_loc, y_offset),
+            width,
+            height,
+            linewidth=1.5,
+            edgecolor="gray",
+            facecolor=color,
+            alpha=1,
+        )
         ax.add_patch(rect)
 
-        if i in [0, granularity//4, granularity//2, 3*granularity//4, granularity-1]:
-            label = f'{value:.1f}'
+        if i in [
+            0,
+            granularity // 4,
+            granularity // 2,
+            3 * granularity // 4,
+            granularity - 1,
+        ]:
+            label = f"{value:.1f}"
             if show_symbols:
                 if i == 0:
                     label = "<" + label
                 elif i == granularity - 1:
                     label = ">" + label
-            
-            plt.text(x_loc + width / 2, y_offset - 0.4, label,
-                     horizontalalignment='center',
-                     verticalalignment='center',
-                     fontweight='semibold',
-                     fontsize=20, color='k')
-
-    plt.text(x_offset + length / 2, y_offset + 0.75,
-             property_name,
-             horizontalalignment='center',
-             verticalalignment='center',
-             fontweight='semibold',
-             fontsize=24, color='k')
 
+            plt.text(
+                x_loc + width / 2,
+                y_offset - 0.4,
+                label,
+                horizontalalignment="center",
+                verticalalignment="center",
+                fontweight="semibold",
+                fontsize=20,
+                color="k",
+            )
+
+    plt.text(
+        x_offset + length / 2,
+        y_offset + 0.75,
+        property_name,
+        horizontalalignment="center",
+        verticalalignment="center",
+        fontweight="semibold",
+        fontsize=24,
+        color="k",
+    )
 
 
 # Example of how to use the function
diff --git a/utils/plotters/grid_plots.py b/utils/plotters/grid_plots.py
index 4a3c2b6..009b803 100644
--- a/utils/plotters/grid_plots.py
+++ b/utils/plotters/grid_plots.py
@@ -2,18 +2,21 @@
 import matplotlib.ticker as ticker
 import numpy as np
 
-def plot_pivot_table(df,
-                 colormap_thresholds=[None, None],
-                 figsize=(18, 30),
-                 colormap='bwr',
-                 colormap_label='E$_{\\rm{seg}}$ (eV)',
-                 color_label_fontsize=20,
-                 colormap_tick_fontsize=12,
-                 xtick_fontsize=18,
-                 ytick_fontsize=12,
-                 threshold_low=None,
-                 threshold_high=None,
-                 transpose_axes=False):
+
+def plot_pivot_table(
+    df,
+    colormap_thresholds=[None, None],
+    figsize=(18, 30),
+    colormap="bwr",
+    colormap_label="E$_{\\rm{seg}}$ (eV)",
+    color_label_fontsize=20,
+    colormap_tick_fontsize=12,
+    xtick_fontsize=18,
+    ytick_fontsize=12,
+    threshold_low=None,
+    threshold_high=None,
+    transpose_axes=False,
+):
     """
     Plot a heatmap with custom parameters.
 
@@ -33,32 +36,47 @@ def plot_pivot_table(df,
     if threshold_low is not None or threshold_high is not None:
         df = df.copy()
         df[(df < threshold_low) | (df > threshold_high)] = np.nan
-    
+
     if transpose_axes:
         df = df.T
 
     fig, axs = plt.subplots(nrows=1, ncols=1, figsize=figsize)
     cmap = plt.get_cmap(colormap)
-    cmap.set_bad('k')
+    cmap.set_bad("k")
     if colormap_thresholds == [None, None]:
         vmax = max(abs(np.nanmin(df.max())), abs(np.nanmin(df.min())))
         vmin = -vmax
     else:
         vmin, vmax = colormap_thresholds
     im = axs.imshow(df, cmap=cmap, vmax=vmax, vmin=vmin)
-    cm = plt.colorbar(im, ax=axs, shrink=0.3, location='right', pad=0.01)
-    cm.set_label(colormap_label, rotation=270, labelpad=15, fontsize=color_label_fontsize)
+    cm = plt.colorbar(im, ax=axs, shrink=0.3, location="right", pad=0.01)
+    cm.set_label(
+        colormap_label, rotation=270, labelpad=15, fontsize=color_label_fontsize
+    )
     # cm.ax.tick_params(labelsize=colormap_tick_fontsize)  # Set colorbar tick label size
     if colormap_thresholds != [None, None]:
         ticks = cm.get_ticks()
         if len(ticks) > 1:  # Check to ensure there are ticks to modify
-            tick_labels = [f"$<{vmin}$" if i == 0 else f"$>{vmax}$" if i == len(ticks)-1 else str(tick) for i, tick in enumerate(ticks)]
+            tick_labels = [
+                (
+                    f"$<{vmin}$"
+                    if i == 0
+                    else f"$>{vmax}$" if i == len(ticks) - 1 else str(tick)
+                )
+                for i, tick in enumerate(ticks)
+            ]
             cm.set_ticks(ticks)  # Set the ticks back if they were changed
-            cm.set_ticklabels(tick_labels, fontsize=colormap_tick_fontsize)  # Set the modified tick labels
+            cm.set_ticklabels(
+                tick_labels, fontsize=colormap_tick_fontsize
+            )  # Set the modified tick labels
     else:
-        cm.set_ticklabels(cm.get_ticks(), fontsize=colormap_tick_fontsize)  # Set the modified tick labels
+        cm.set_ticklabels(
+            cm.get_ticks(), fontsize=colormap_tick_fontsize
+        )  # Set the modified tick labels
 
-    plt.xticks(np.arange(len(df.columns)), df.columns, rotation=0, fontsize=xtick_fontsize)
+    plt.xticks(
+        np.arange(len(df.columns)), df.columns, rotation=0, fontsize=xtick_fontsize
+    )
     plt.yticks(np.arange(len(df.index)), df.index, fontsize=ytick_fontsize)
 
     axs.xaxis.set_major_locator(ticker.MultipleLocator(1))
@@ -66,7 +84,7 @@ def plot_pivot_table(df,
     axs.xaxis.set_minor_locator(ticker.MultipleLocator(0.5))
     axs.yaxis.set_minor_locator(ticker.MultipleLocator(0.5))
 
-    axs.tick_params(axis='both', which='major', width=1.5, length=4)
-    axs.grid(which='minor', color='black', linestyle='-', linewidth=1)
-    
-    return fig, axs
\ No newline at end of file
+    axs.tick_params(axis="both", which="major", width=1.5, length=4)
+    axs.grid(which="minor", color="black", linestyle="-", linewidth=1)
+
+    return fig, axs
diff --git a/utils/plotters/structure_plots.py b/utils/plotters/structure_plots.py
index e7e72f0..9ebd91d 100644
--- a/utils/plotters/structure_plots.py
+++ b/utils/plotters/structure_plots.py
@@ -3,26 +3,29 @@
 from matplotlib.cm import ScalarMappable
 from matplotlib.colors import Normalize, TwoSlopeNorm
 
-def plot_structure_projection(structure,
-                              projection_axis=[1, 2],
-                              bond_matrix=None,
-                              atom_size=250,
-                              figsize=(8, 6),
-                              cell_border_colour="r",
-                              no_fill_elements=["Fe"],
-                              fill_color="red",
-                              atom_size_dict={},
-                              fontsize=16,
-                              values_list=None,
-                              title=None,
-                              cmap='viridis',
-                              colorbar_label=r"$\rm{E}_{seg}$",
-                              xlabel_fontsize=None,
-                              ylabel_fontsize=None,
-                              title_fontsize=None,
-                              colorbar_fontsize=None,
-                              colorbar_ticks_fontsize=None,
-                              center_colorbar_at_zero=True):
+
+def plot_structure_projection(
+    structure,
+    projection_axis=[1, 2],
+    bond_matrix=None,
+    atom_size=250,
+    figsize=(8, 6),
+    cell_border_colour="r",
+    no_fill_elements=["Fe"],
+    fill_color="red",
+    atom_size_dict={},
+    fontsize=16,
+    values_list=None,
+    title=None,
+    cmap="viridis",
+    colorbar_label=r"$\rm{E}_{seg}$",
+    xlabel_fontsize=None,
+    ylabel_fontsize=None,
+    title_fontsize=None,
+    colorbar_fontsize=None,
+    colorbar_ticks_fontsize=None,
+    center_colorbar_at_zero=True,
+):
     """
     Plots the projection of a pymatgen structure on a 2D plane based on the specified projection axis.
 
@@ -60,19 +63,19 @@ def plot_structure_projection(structure,
     if values_list is not None:
         # Adjust vmin and vmax based on the absolute maximum value for symmetry
         max_abs_value = max(abs(min(values_list)), abs(max(values_list)))
-        
+
         if center_colorbar_at_zero:
             norm = TwoSlopeNorm(vmin=-max_abs_value, vcenter=0, vmax=max_abs_value)
         else:
             norm = Normalize(vmin=min(values_list), vmax=max(values_list))
-            
+
         sm = ScalarMappable(cmap=cmap, norm=norm)
         sm.set_array([])  # Required for ScalarMappable to work
 
     for i, site in enumerate(structure):
         species = site.species_string
         if species in no_fill_elements:
-            color = 'none'  # No fill for specified elements
+            color = "none"  # No fill for specified elements
         else:
             if values_list is not None:
                 color = sm.to_rgba(values_list[i])
@@ -81,8 +84,13 @@ def plot_structure_projection(structure,
 
         # Use custom size if available, otherwise use the default size
         size = atom_size_dict.get(species, atom_size)
-        plt.scatter(site.coords[projection_axis[0]], site.coords[projection_axis[1]], color=color, s=size,
-                    edgecolors='black')
+        plt.scatter(
+            site.coords[projection_axis[0]],
+            site.coords[projection_axis[1]],
+            color=color,
+            s=size,
+            edgecolors="black",
+        )
 
     # Set plot title and labels
     if title is not None:
@@ -97,43 +105,57 @@ def plot_structure_projection(structure,
     plt.ylim(y_min - 1, y_max + 1)
 
     if bond_matrix is not None:
-        relevant_plot_bonds = bond_matrix[(bond_matrix['repeata'] == 0) & (bond_matrix['repeatb'] == 0)]
+        relevant_plot_bonds = bond_matrix[
+            (bond_matrix["repeata"] == 0) & (bond_matrix["repeatb"] == 0)
+        ]
         for idx, bonds in relevant_plot_bonds.iterrows():
             atom1 = int(bonds["atom1"]) - 1
             atom2 = int(bonds["atom2"]) - 1
             bondstrength = np.round(bonds["final_bond_order"], 2)
             if bondstrength < 0.28:
-                c = 'r'
+                c = "r"
             else:
-                c = 'k'
+                c = "k"
             c = "k"
-            plt.plot([structure[atom1].coords[projection_axis[0]], structure[atom2].coords[projection_axis[0]]],
-                     [structure[atom1].coords[projection_axis[1]], structure[atom2].coords[projection_axis[1]]],
-                     '-',
-                     color=c,
-                     linewidth=bondstrength / 0.56 * 5)
+            plt.plot(
+                [
+                    structure[atom1].coords[projection_axis[0]],
+                    structure[atom2].coords[projection_axis[0]],
+                ],
+                [
+                    structure[atom1].coords[projection_axis[1]],
+                    structure[atom2].coords[projection_axis[1]],
+                ],
+                "-",
+                color=c,
+                linewidth=bondstrength / 0.56 * 5,
+            )
 
     # Draw the cell with a black border based on the projection_axis
     lattice_vectors = structure.lattice.matrix[projection_axis]
 
     # Draw the cell with a border based on the projection_ax|is
-    rect = plt.Rectangle((0, 0),
-                         structure.lattice.abc[projection_axis[0]],
-                         structure.lattice.abc[projection_axis[1]],
-                         edgecolor=cell_border_colour,
-                         linewidth=3,
-                         fill=False,
-                         linestyle='--')
+    rect = plt.Rectangle(
+        (0, 0),
+        structure.lattice.abc[projection_axis[0]],
+        structure.lattice.abc[projection_axis[1]],
+        edgecolor=cell_border_colour,
+        linewidth=3,
+        fill=False,
+        linestyle="--",
+    )
     plt.gca().add_patch(rect)
-    plt.gca().set_aspect('equal')
+    plt.gca().set_aspect("equal")
     plt.grid()
 
     # Add colorbar
     if colorbar_fontsize is not None and values_list is not None:
-        cbar_ax = fig.add_axes([0.55, 0.1, 0.005, 0.8])  # Adjust these values to position the colorbar as needed
+        cbar_ax = fig.add_axes(
+            [0.55, 0.1, 0.005, 0.8]
+        )  # Adjust these values to position the colorbar as needed
         cbar = plt.colorbar(sm, cax=cbar_ax, label=colorbar_label)
         cbar.set_label(colorbar_label, fontsize=colorbar_fontsize)
         if colorbar_ticks_fontsize is not None:
             cbar.ax.tick_params(labelsize=colorbar_ticks_fontsize)
-    
+
     plt.show()
diff --git a/utils/structure_featuriser.py b/utils/structure_featuriser.py
index 709dbd1..8f8bda3 100644
--- a/utils/structure_featuriser.py
+++ b/utils/structure_featuriser.py
@@ -3,44 +3,66 @@
 
 from sklearn.decomposition import PCA
 from sklearn.preprocessing import StandardScaler
-#from maml.describers import SmoothOverlapAtomicPosition
+
+# from maml.describers import SmoothOverlapAtomicPosition
 
 from pymatgen.analysis.local_env import VoronoiNN
 from pymatgen.core import Structure
 
+
 def get_stats(property_list, property_str):
-    return  [f"{property_str}_std",f"{property_str}_mean",f"{property_str}_min",f"{property_str}_max"],\
-            [np.std(property_list), np.mean(property_list), np.min(property_list), np.max(property_list)]
-            
+    return [
+        f"{property_str}_std",
+        f"{property_str}_mean",
+        f"{property_str}_min",
+        f"{property_str}_max",
+    ], [
+        np.std(property_list),
+        np.mean(property_list),
+        np.min(property_list),
+        np.max(property_list),
+    ]
+
+
 def VoronoiSiteFeaturiser(structure, site):
-    coord_no = VoronoiNN().get_cn(structure = structure, n = site)
+    coord_no = VoronoiNN().get_cn(structure=structure, n=site)
     site_info_dict = VoronoiNN().get_voronoi_polyhedra(structure, site)
-    volumes = [site_info_dict[polyhedra]["volume"] for polyhedra in list(site_info_dict.keys())]
-    vertices = [site_info_dict[polyhedra]["n_verts"] for polyhedra in list(site_info_dict.keys())]
-    distances = [site_info_dict[polyhedra]["face_dist"] for polyhedra in list(site_info_dict.keys())]
-    areas = [site_info_dict[polyhedra]["area"] for polyhedra in list(site_info_dict.keys())]
-    
+    volumes = [
+        site_info_dict[polyhedra]["volume"] for polyhedra in list(site_info_dict.keys())
+    ]
+    vertices = [
+        site_info_dict[polyhedra]["n_verts"]
+        for polyhedra in list(site_info_dict.keys())
+    ]
+    distances = [
+        site_info_dict[polyhedra]["face_dist"]
+        for polyhedra in list(site_info_dict.keys())
+    ]
+    areas = [
+        site_info_dict[polyhedra]["area"] for polyhedra in list(site_info_dict.keys())
+    ]
+
     total_area = np.sum(areas)
     total_volume = np.sum(volumes)
-    
+
     df_str_list = ["VorNN_CoordNo", "VorNN_tot_vol", "VorNN_tot_area"]
     df_prop_list = [coord_no, total_volume, total_area]
-    
+
     data_str_list = ["volumes", "vertices", "areas", "distances"]
 
     for i, value_list in enumerate([volumes, vertices, areas, distances]):
-        property_str_list, property_stats_list = get_stats(value_list, f"VorNN_{data_str_list[i]}")
+        property_str_list, property_stats_list = get_stats(
+            value_list, f"VorNN_{data_str_list[i]}"
+        )
         df_str_list += property_str_list
         df_prop_list += property_stats_list
-    
+
     return df_str_list, df_prop_list
 
-def get_per_site_SOAP_descriptor(structure,
-                                 cutoff=3,
-                                 l_max=10,
-                                 n_max=10,
-                                 atom_sigma=0.5,
-                                 verbose=False):
+
+def get_per_site_SOAP_descriptor(
+    structure, cutoff=3, l_max=10, n_max=10, atom_sigma=0.5, verbose=False
+):
     """
     Process a list of pymatgen structures using the Smooth Overlap of Atomic Positions (SOAP) method
     and organize the results into a list of DataFrames per structure,
@@ -59,9 +81,16 @@ def get_per_site_SOAP_descriptor(structure,
         list: List of DataFrames, each DataFrame containing the SOAP descriptors for each site in the structure.
     """
     # Change n_jobs to the number of cores you have available
-    s = SmoothOverlapAtomicPosition(cutoff=cutoff, l_max=l_max, n_max=n_max, atom_sigma=atom_sigma, verbose=verbose, n_jobs=1)
+    s = SmoothOverlapAtomicPosition(
+        cutoff=cutoff,
+        l_max=l_max,
+        n_max=n_max,
+        atom_sigma=atom_sigma,
+        verbose=verbose,
+        n_jobs=1,
+    )
     # Create a DataFrame with the list of structures
-    df = pd.DataFrame({'structure': [structure]})
+    df = pd.DataFrame({"structure": [structure]})
     # Transform the structures using SOAP
     a = s.transform(df["structure"])
     # Copy and reset the index of the transformed DataFrame
@@ -69,10 +98,16 @@ def get_per_site_SOAP_descriptor(structure,
     # Rename the "level_1" column to "site"
     soap_df.rename(columns={"level_1": "site"}, inplace=True)
     # Group the DataFrame by "input_index" and drop the "input_index" column from each group
-    df_list = [soap_df.reset_index(drop=True).drop(columns='input_index') for _, soap_df in soap_df.groupby(["input_index"])]
+    df_list = [
+        soap_df.reset_index(drop=True).drop(columns="input_index")
+        for _, soap_df in soap_df.groupby(["input_index"])
+    ]
     return df
 
-def get_per_site_SOAP_dfs(struct_list, cutoff=3, l_max=10, n_max=10, atom_sigma=0.5, verbose=False, n_jobs=16):
+
+def get_per_site_SOAP_dfs(
+    struct_list, cutoff=3, l_max=10, n_max=10, atom_sigma=0.5, verbose=False, n_jobs=16
+):
     """
     Process a list of pymatgen structures using the Smooth Overlap of Atomic Positions (SOAP) method
     and organize the results into a list of DataFrames per structure,
@@ -91,9 +126,16 @@ def get_per_site_SOAP_dfs(struct_list, cutoff=3, l_max=10, n_max=10, atom_sigma=
         list: List of DataFrames, each DataFrame containing the SOAP descriptors for each site in the structure.
     """
     # Change n_jobs to the number of cores you have available
-    s = SmoothOverlapAtomicPosition(cutoff=cutoff, l_max=l_max, n_max=n_max, atom_sigma=atom_sigma, verbose=verbose, n_jobs=n_jobs)
+    s = SmoothOverlapAtomicPosition(
+        cutoff=cutoff,
+        l_max=l_max,
+        n_max=n_max,
+        atom_sigma=atom_sigma,
+        verbose=verbose,
+        n_jobs=n_jobs,
+    )
     # Create a DataFrame with the list of structures
-    df = pd.DataFrame({'structure': struct_list})
+    df = pd.DataFrame({"structure": struct_list})
     # Transform the structures using SOAP
     a = s.transform(df["structure"])
     # Copy and reset the index of the transformed DataFrame
@@ -101,11 +143,15 @@ def get_per_site_SOAP_dfs(struct_list, cutoff=3, l_max=10, n_max=10, atom_sigma=
     # Rename the "level_1" column to "site"
     soap_df.rename(columns={"level_1": "site"}, inplace=True)
     # Group the DataFrame by "input_index" and drop the "input_index" column from each group
-    df_list = [soap_df.reset_index(drop=True).drop(columns='input_index') for _, soap_df in soap_df.groupby(["input_index"])]
+    df_list = [
+        soap_df.reset_index(drop=True).drop(columns="input_index")
+        for _, soap_df in soap_df.groupby(["input_index"])
+    ]
 
     return df_list
 
-def get_SOAP_PCA_df(struct_list, PCA_comp = 30, write_df = False, filename=None):
+
+def get_SOAP_PCA_df(struct_list, PCA_comp=30, write_df=False, filename=None):
     """
     Perform Principal Component Analysis (PCA) on Smooth Overlap of Atomic Positions (SOAP) descriptors
     for a list of structures and return a DataFrame with PCA-transformed SOAP descriptors.
@@ -126,7 +172,7 @@ def get_SOAP_PCA_df(struct_list, PCA_comp = 30, write_df = False, filename=None)
     """
     # Compute SOAP descriptors for each site in the structures
     struct_SOAP_df_list = get_per_site_SOAP_dfs(struct_list)
-    
+
     # Concatenate all SOAP descriptors and perform standard scaling
     df_soap = pd.concat(struct_SOAP_df_list)
     df_soap.columns = df_soap.columns.astype(str)
@@ -137,10 +183,12 @@ def get_SOAP_PCA_df(struct_list, PCA_comp = 30, write_df = False, filename=None)
     # Perform PCA with the specified number of principal components
     pca = PCA(n_components=PCA_comp)
     PCA_soap = pca.fit_transform(df_soap)
-    
+
     # Create a DataFrame for PCA-transformed SOAP descriptors
-    PCA_soap_df = pd.DataFrame(data=PCA_soap, columns=[f'SOAP_PCA_{i}' for i in np.arange(0, PCA_comp)])
-    
+    PCA_soap_df = pd.DataFrame(
+        data=PCA_soap, columns=[f"SOAP_PCA_{i}" for i in np.arange(0, PCA_comp)]
+    )
+
     # Save the DataFrame as a pickle file if write_df is True
     if write_df:
         if filename:
@@ -148,9 +196,12 @@ def get_SOAP_PCA_df(struct_list, PCA_comp = 30, write_df = False, filename=None)
         else:
             filename = f"SOAP_PCA_{PCA_comp}_segsite.pkl"
         PCA_soap_df.to_pickle(filename)
-        
-    print(f'Explained variation at {PCA_comp} principal components: {np.sum(pca.explained_variance_ratio_)}')
-    
+
+    print(
+        f"Explained variation at {PCA_comp} principal components: {np.sum(pca.explained_variance_ratio_)}"
+    )
+
     return PCA_soap_df
 
+
 # def ACE_featuriser():
diff --git a/utils/training_data_nequip.py b/utils/training_data_nequip.py
index 745ac94..f893433 100644
--- a/utils/training_data_nequip.py
+++ b/utils/training_data_nequip.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pandas as pd
 import time
+
+
 def process_list(my_list, n):
     # if list has length 1 or 2, return the list as is
     if len(my_list) <= 2:
@@ -13,26 +15,38 @@ def process_list(my_list, n):
     last = my_list[-1]
 
     # get every nth element of the list, excluding first and lasat images
-    new_list = [my_list[i] for i in range(0, len(my_list), n) if i != 0 and i != len(my_list)-1]
+    new_list = [
+        my_list[i]
+        for i in range(0, len(my_list), n)
+        if i != 0 and i != len(my_list) - 1
+    ]
 
     # return the result
     return [first] + new_list + [last]
 
-def extract_allegro_extxyz(filepath, max_electronic_steps = 120, every_nth_image=4,  scf_steps = [], output_filepath = "allegro_training_data.extxyz"):
+
+def extract_allegro_extxyz(
+    filepath,
+    max_electronic_steps=120,
+    every_nth_image=4,
+    scf_steps=[],
+    output_filepath="allegro_training_data.extxyz",
+):
     filtered_list = []
     print(filepath)
-    ase_outcar = read(filepath, format = "vasp-out", index = ":")
+    ase_outcar = read(filepath, format="vasp-out", index=":")
     if scf_steps:
         for j, n_electronic_steps in enumerate(scf_steps):
             if n_electronic_steps != max_electronic_steps:
                 filtered_list.append(ase_outcar[j])
     else:
         filtered_list = ase_outcar
-    
+
     every_n_list = process_list(filtered_list, every_nth_image)
     for _, atoms_obj in enumerate(every_n_list):
         write(output_filepath, atoms_obj, append=True, format="extxyz")
-        
+
+
 import glob
 
 df_pickles_filelist = []
@@ -41,26 +55,33 @@ def extract_allegro_extxyz(filepath, max_electronic_steps = 120, every_nth_image
         continue
     print(file)
     df_pickles_filelist.append(file)
-    
+
 import multiprocessing
+
 start_time = time.time()
 
 num_processors = multiprocessing.cpu_count()
 if len(df_pickles_filelist) < num_processors:
     processes = len(df_pickles_filelist)
 else:
-    processes = num_processors    
+    processes = num_processors
 print(f"Number of processors: {num_processors}, used: {processes}")
 
+
 def allegro_data_setup_from_df(df_pickle_filepath):
     df = pd.read_pickle(df_pickle_filepath)
     for _, row in df.iterrows():
         output_file = os.path.basename(df_pickle_filepath).split(sep=".pkl")[0]
-        extract_allegro_extxyz(row.filepath, scf_steps = row.scf_steps, output_filepath = f"{output_file}-AllegroNequip.extxyz")
-        
+        extract_allegro_extxyz(
+            row.filepath,
+            scf_steps=row.scf_steps,
+            output_filepath=f"{output_file}-AllegroNequip.extxyz",
+        )
+
+
 with multiprocessing.Pool(processes=processes) as pool:
     pool.map(allegro_data_setup_from_df, df_pickles_filelist)
 
 end_time = time.time()
 elapsed_time = end_time - start_time
-print("Elapsed time:", np.round(elapsed_time,3), "seconds") 
+print("Elapsed time:", np.round(elapsed_time, 3), "seconds")
diff --git a/utils/vasp/database.py b/utils/vasp/database.py
index b27e1cc..b4c75e7 100644
--- a/utils/vasp/database.py
+++ b/utils/vasp/database.py
@@ -13,12 +13,22 @@
 from utils.vasp.parser.outcar import Outcar
 from utils.vasp.parser.output import parse_vasp_directory
 
-def find_vasp_directories(parent_dir,
-                          filenames=["vasp.log", "INCAR", "POTCAR", "CONTCAR", "KPOINTS", "OUTCAR", "vasprun.xml"],
-                          all_present=False,
-                          extract_tarballs=True,
-                          tarball_extensions=(".tar.gz"),
-                          ):
+
+def find_vasp_directories(
+    parent_dir,
+    filenames=[
+        "vasp.log",
+        "INCAR",
+        "POTCAR",
+        "CONTCAR",
+        "KPOINTS",
+        "OUTCAR",
+        "vasprun.xml",
+    ],
+    all_present=False,
+    extract_tarballs=True,
+    tarball_extensions=(".tar.gz"),
+):
     """
     Finds directories in a parent directory that contain specified files.
 
@@ -45,22 +55,22 @@ def find_vasp_directories(parent_dir,
         - The function returns a list of directories that meet the specified conditions.
     """
     if extract_tarballs:
-        gen_tools.find_and_extract_files_from_tarballs_parallel(parent_dir=parent_dir, 
-                                                                extension=tarball_extensions,
-                                                                filenames=filenames,                                                            
-                                                                suffix=None,
-                                                                prefix=None)
-   
-    directories =  gen_tools.find_directories_with_files(parent_dir=parent_dir,
-                                          filenames=filenames,
-                                          all_present=all_present)
+        gen_tools.find_and_extract_files_from_tarballs_parallel(
+            parent_dir=parent_dir,
+            extension=tarball_extensions,
+            filenames=filenames,
+            suffix=None,
+            prefix=None,
+        )
+
+    directories = gen_tools.find_directories_with_files(
+        parent_dir=parent_dir, filenames=filenames, all_present=all_present
+    )
 
     return directories
 
-def read_OUTCAR(filename="OUTCAR",
-                free_energy=True,
-                energy_zero=True,
-                structures=True):
+
+def read_OUTCAR(filename="OUTCAR", free_energy=True, energy_zero=True, structures=True):
     """
     Read information from the OUTCAR file and related VASP structure files.
 
@@ -86,15 +96,15 @@ def read_OUTCAR(filename="OUTCAR",
         - If any part of the parsing encounters an error, the corresponding DataFrame entry will have NaN values.
     """
     outcar = Outcar()
-    outcar.from_file(filename = filename)
+    outcar.from_file(filename=filename)
 
     structure_name = os.path.basename(os.path.dirname(filename))
-    
+
     try:
         energies = outcar.parse_dict["energies"]
     except:
         energies = np.nan
-        
+
     # create a list of file extensions to search for
     extensions = [".vasp", "CONTCAR", "POSCAR"]
     # create an empty list to store matching files
@@ -110,69 +120,89 @@ def read_OUTCAR(filename="OUTCAR",
             break
         except:
             pass
-        
+
     try:
-        ionic_step_structures = np.array([Structure(cell, structure.species, outcar.parse_dict["positions"][i], coords_are_cartesian=True).to_json()
-                                            for i, cell in enumerate(outcar.parse_dict["cells"])])
+        ionic_step_structures = np.array(
+            [
+                Structure(
+                    cell,
+                    structure.species,
+                    outcar.parse_dict["positions"][i],
+                    coords_are_cartesian=True,
+                ).to_json()
+                for i, cell in enumerate(outcar.parse_dict["cells"])
+            ]
+        )
     except:
         ionic_step_structures = np.nan
-    
+
     try:
-        energies_zero =  outcar.parse_dict["energies_zero"]
+        energies_zero = outcar.parse_dict["energies_zero"]
     except:
         energies_zero = np.nan
-        
+
     try:
         forces = outcar.parse_dict["forces"]
     except:
         forces = np.nan
-        
+
     try:
         stresses = outcar.parse_dict["stresses"]
     except:
         stresses = np.nan
-        
+
     try:
         magmoms = np.array(outcar.parse_dict["final_magmoms"])
     except:
         magmoms = np.nan
-        
+
     try:
         scf_steps = [len(i) for i in outcar.parse_dict["scf_energies"]]
     except:
         scf_steps = np.nan
-        
-    df = pd.DataFrame([[structure_name,
-                        filename,
-                        ionic_step_structures,
-                        energies,
-                        energies_zero,
-                        forces,
-                        stresses,
-                        magmoms,
-                        scf_steps]],
-                columns = ["job_name",
-                            "filepath",
-                            "structures",
-                            "energy",
-                            "energy_zero",
-                            "forces",
-                            "stresses",
-                            "magmoms",
-                            "scf_steps"])
+
+    df = pd.DataFrame(
+        [
+            [
+                structure_name,
+                filename,
+                ionic_step_structures,
+                energies,
+                energies_zero,
+                forces,
+                stresses,
+                magmoms,
+                scf_steps,
+            ]
+        ],
+        columns=[
+            "job_name",
+            "filepath",
+            "structures",
+            "energy",
+            "energy_zero",
+            "forces",
+            "stresses",
+            "magmoms",
+            "scf_steps",
+        ],
+    )
     return df
 
-def parse_VASP_directory(directory,
-                      INCAR_filename="INCAR",
-                      KPOINTS_filename="KPOINTS",
-                      POTCAR_filename="POTCAR",
-                      OUTCAR_filename="OUTCAR",
-                      vasprunxml_filename="vasprun.xml",
-                      vasplog_filename="vasp.log"):
-    
+
+def parse_VASP_directory(
+    directory,
+    INCAR_filename="INCAR",
+    KPOINTS_filename="KPOINTS",
+    POTCAR_filename="POTCAR",
+    OUTCAR_filename="OUTCAR",
+    vasprunxml_filename="vasprun.xml",
+    vasplog_filename="vasp.log",
+):
+
     # Find file matching pattern
     structure_files = glob.glob(os.path.join(directory, "starter*.vasp"))
-    
+
     if len(structure_files) > 0:
         init_structure = Structure.from_file(structure_files[0])
     else:
@@ -182,7 +212,10 @@ def parse_VASP_directory(directory,
     try:
         df = read_OUTCAR(filename=os.path.join(directory, OUTCAR_filename))
     except:
-        df = pd.DataFrame([[os.path.basename(directory),
+        df = pd.DataFrame(
+            [
+                [
+                    os.path.basename(directory),
                     directory,
                     np.nan,
                     np.nan,
@@ -190,26 +223,33 @@ def parse_VASP_directory(directory,
                     np.nan,
                     np.nan,
                     np.nan,
-                    np.nan]],
-            columns = ["job_name",
-                        "filepath",
-                        "structures",
-                        "energy",
-                        "energy_zero",
-                        "forces",
-                        "stresses",
-                        "magmoms",
-                        "scf_steps"])
-        
-    convergence = check_convergence(directory=directory,
-                                    filename_vasprun=vasprunxml_filename,
-                                    filename_vasplog=vasplog_filename)    
+                    np.nan,
+                ]
+            ],
+            columns=[
+                "job_name",
+                "filepath",
+                "structures",
+                "energy",
+                "energy_zero",
+                "forces",
+                "stresses",
+                "magmoms",
+                "scf_steps",
+            ],
+        )
+
+    convergence = check_convergence(
+        directory=directory,
+        filename_vasprun=vasprunxml_filename,
+        filename_vasplog=vasplog_filename,
+    )
     # INCAR
     try:
         incar = Incar.from_file(os.path.join(directory, INCAR_filename)).as_dict()
     except:
         incar = np.nan
-        
+
     try:
         # KPOINTS
         kpoints = Kpoints.from_file(os.path.join(directory, KPOINTS_filename)).as_dict()
@@ -221,32 +261,37 @@ def parse_VASP_directory(directory,
             kpoints = np.nan
 
     try:
-        element_list, element_count, electron_of_potcar = grab_electron_info(directory_path=directory,
-                                                                            potcar_filename=POTCAR_filename)
+        element_list, element_count, electron_of_potcar = grab_electron_info(
+            directory_path=directory, potcar_filename=POTCAR_filename
+        )
     except:
         element_list = np.nan
         element_count = np.nan
         electron_of_potcar = np.nan
 
-        
     try:
         electron_count = get_total_electron_count(directory_path=directory)
     except:
         electron_count = np.nan
-        
+
     df["element_list"] = [element_list]
     df["element_count"] = [element_count]
     df["potcar_electron_count"] = [electron_of_potcar]
     df["total_electron_count"] = [electron_count]
     df["convergence"] = [convergence]
-    
+
     df["kpoints"] = [kpoints]
     df["incar"] = [incar]
 
     return df
 
 
-def check_convergence(directory, filename_vasprun="vasprun.xml", filename_vasplog="vasp.log", backup_vasplog = "error.out"):
+def check_convergence(
+    directory,
+    filename_vasprun="vasprun.xml",
+    filename_vasplog="vasp.log",
+    backup_vasplog="error.out",
+):
     """
     Check the convergence status of a VASP calculation.
 
@@ -272,21 +317,28 @@ def check_convergence(directory, filename_vasprun="vasprun.xml", filename_vasplo
         vr = Vasprun(filename=os.path.join(directory, filename_vasprun))
         return vr.converged
     except:
-        line_converged = "reached required accuracy - stopping structural energy minimisation"
+        line_converged = (
+            "reached required accuracy - stopping structural energy minimisation"
+        )
         try:
-            converged = gen_tools.is_line_in_file(filepath=os.path.join(directory, filename_vasplog),
-                                        line=line_converged,
-                                        exact_match=False)
+            converged = gen_tools.is_line_in_file(
+                filepath=os.path.join(directory, filename_vasplog),
+                line=line_converged,
+                exact_match=False,
+            )
             return converged
         except:
             try:
-                converged = gen_tools.is_line_in_file(filepath=os.path.join(directory, backup_vasplog),
-                            line=line_converged,
-                            exact_match=False)
+                converged = gen_tools.is_line_in_file(
+                    filepath=os.path.join(directory, backup_vasplog),
+                    line=line_converged,
+                    exact_match=False,
+                )
                 return converged
             except:
                 return False
 
+
 def element_count_ordered(structure):
     site_element_list = [site.species_string for site in structure]
     past_element = site_element_list[0]
@@ -302,9 +354,12 @@ def element_count_ordered(structure):
             count = 1
             past_element = element
     element_count.append(count)
-    return element_list, element_count 
+    return element_list, element_count
+
 
-def _try_read_structure(directory_path, structure_filenames = ["CONTCAR", ".vasp", "POSCAR"]):    
+def _try_read_structure(
+    directory_path, structure_filenames=["CONTCAR", ".vasp", "POSCAR"]
+):
     structure_files = []
     # walk through the directory and check each file's name
     for root, dirs, files in os.walk(directory_path):
@@ -323,15 +378,18 @@ def _try_read_structure(directory_path, structure_filenames = ["CONTCAR", ".vasp
             structure = np.nan
     return structure
 
-def grab_electron_info(directory_path, line_before_elec_str="PAW_PBE", potcar_filename = "POTCAR"):
-    
+
+def grab_electron_info(
+    directory_path, line_before_elec_str="PAW_PBE", potcar_filename="POTCAR"
+):
+
     structure = _try_read_structure(directory_path=directory_path)
     if structure != None:
         element_list, element_count = element_count_ordered(structure)
-        
+
     electron_of_potcar = []
-    
-    with open(os.path.join(directory_path, potcar_filename), 'r') as file:
+
+    with open(os.path.join(directory_path, potcar_filename), "r") as file:
         lines = file.readlines()  # Read the lines from the file
         should_append = False  # Flag to determine if the next line should be appended
         for line in lines:
@@ -341,35 +399,50 @@ def grab_electron_info(directory_path, line_before_elec_str="PAW_PBE", potcar_fi
                 should_append = False  # Reset the flag
             if stripped_line.startswith(line_before_elec_str):
                 should_append = True  # Set the flag to append the next line
-        
+
     return element_list, element_count, electron_of_potcar
 
-def get_total_electron_count(directory_path, line_before_elec_str="PAW_PBE", potcar_filename = "POTCAR"):
-    ele_list, ele_count, electron_of_potcar = grab_electron_info(directory_path=directory_path, line_before_elec_str=line_before_elec_str, potcar_filename=potcar_filename)
+
+def get_total_electron_count(
+    directory_path, line_before_elec_str="PAW_PBE", potcar_filename="POTCAR"
+):
+    ele_list, ele_count, electron_of_potcar = grab_electron_info(
+        directory_path=directory_path,
+        line_before_elec_str=line_before_elec_str,
+        potcar_filename=potcar_filename,
+    )
     total_electron_count = np.dot(ele_count, electron_of_potcar)
     return total_electron_count
 
+
 def _check_convergence(directory):
     return directory, check_convergence(directory)
 
+
 def find_converged_dirs(parent_dir):
 
     dirs = find_vasp_directories(parent_dir=parent_dir, extract_tarballs=False)
     # Filter the directories where convergence is True
     dir_and_convergence = parallelise(_check_convergence, dirs)
-    
-    converged_dirs = [directory for directory, convergence in dir_and_convergence if convergence]
+
+    converged_dirs = [
+        directory for directory, convergence in dir_and_convergence if convergence
+    ]
     return converged_dirs
 
+
 def flatten_all_iterables(input_list):
     flat_list = []
     for item in input_list:
-        if isinstance(item, (list, tuple, np.ndarray)):  # Now also checks for numpy arrays
+        if isinstance(
+            item, (list, tuple, np.ndarray)
+        ):  # Now also checks for numpy arrays
             flat_list.extend(item)  # Extend the flat list with elements of the iterable
         else:
             flat_list.append(item)  # Add the item directly if it's not an iterable
     return flat_list
 
+
 def find_significantly_different_indices_threshold(values, threshold):
     if not values:
         return []
@@ -381,161 +454,291 @@ def find_significantly_different_indices_threshold(values, threshold):
             last_significant_value = current_value
     return significant_indices
 
+
 def exclude_non_converged_data(df, columns_to_exclude_data):
     def process_row(row):
-        non_converged_indices = [i for i, conv in enumerate(row["scf_convergence"]) if not conv]
+        non_converged_indices = [
+            i for i, conv in enumerate(row["scf_convergence"]) if not conv
+        ]
         for column in columns_to_exclude_data:
             if column in row:  # Check if column is in row to avoid KeyError
-                row[column] = [value for i, value in enumerate(row[column]) if i not in non_converged_indices]
+                row[column] = [
+                    value
+                    for i, value in enumerate(row[column])
+                    if i not in non_converged_indices
+                ]
         return row
 
     processed_df = df.apply(process_row, axis=1)
     return processed_df
 
-def get_flattened_df(df,
-                     groupby="filepath",
-                     columns_to_process=["energy", "energy_zero", "structures", "forces", "magmoms", "stresses", "scf_steps", "scf_convergence"]):
-    processed_df = df.sort_values("calc_start_time").groupby(groupby).agg(lambda x: x.tolist()).reset_index().copy()
+
+def get_flattened_df(
+    df,
+    groupby="filepath",
+    columns_to_process=[
+        "energy",
+        "energy_zero",
+        "structures",
+        "forces",
+        "magmoms",
+        "stresses",
+        "scf_steps",
+        "scf_convergence",
+    ],
+):
+    processed_df = (
+        df.sort_values("calc_start_time")
+        .groupby(groupby)
+        .agg(lambda x: x.tolist())
+        .reset_index()
+        .copy()
+    )
     for column in columns_to_process:
         processed_df[column] = processed_df[column].apply(flatten_all_iterables)
     return processed_df
 
-def get_filtered_df(df,
-                    energy_threshold=0.05,
-                    columns=["energy", "energy_zero", "structures", "forces", "magmoms", "stresses", "scf_steps", "scf_convergence"]):
+
+def get_filtered_df(
+    df,
+    energy_threshold=0.05,
+    columns=[
+        "energy",
+        "energy_zero",
+        "structures",
+        "forces",
+        "magmoms",
+        "stresses",
+        "scf_steps",
+        "scf_convergence",
+    ],
+):
     def process_row(row, column="energy", columns_to_flatten=columns):
-        indices = find_significantly_different_indices_threshold(row[column], energy_threshold)
-        processed_row = {col: (row[col] if col not in columns_to_flatten else [row[col][i] for i in indices if i < len(row[col])]) for col in df.columns}
+        indices = find_significantly_different_indices_threshold(
+            row[column], energy_threshold
+        )
+        processed_row = {
+            col: (
+                row[col]
+                if col not in columns_to_flatten
+                else [row[col][i] for i in indices if i < len(row[col])]
+            )
+            for col in df.columns
+        }
         return processed_row
+
     significant_changes = df.apply(process_row, axis=1)
     significant_changes_df = pd.DataFrame(list(significant_changes))
 
-    if 'job_name' in significant_changes_df.columns:
-        significant_changes_df["job_name"] = [row.job_name[0] for _, row in significant_changes_df.iterrows()]
+    if "job_name" in significant_changes_df.columns:
+        significant_changes_df["job_name"] = [
+            row.job_name[0] for _, row in significant_changes_df.iterrows()
+        ]
 
     return significant_changes_df
 
-def get_potential_data_df(df,
-                          energy_threshold=0.05,
-                          columns_to_process=["energy", "energy_zero", "structures", "forces", "magmoms", "stresses", "scf_steps", "scf_convergence"],
-                          ):
+
+def get_potential_data_df(
+    df,
+    energy_threshold=0.05,
+    columns_to_process=[
+        "energy",
+        "energy_zero",
+        "structures",
+        "forces",
+        "magmoms",
+        "stresses",
+        "scf_steps",
+        "scf_convergence",
+    ],
+):
     processed_df = get_flattened_df(df)
-    processed_df = get_filtered_df(processed_df,
-                                   energy_threshold=energy_threshold,
-                                   columns=["energy", "energy_zero", "structures", "forces", "magmoms", "stresses", "scf_steps", "scf_convergence"])
+    processed_df = get_filtered_df(
+        processed_df,
+        energy_threshold=energy_threshold,
+        columns=[
+            "energy",
+            "energy_zero",
+            "structures",
+            "forces",
+            "magmoms",
+            "stresses",
+            "scf_steps",
+            "scf_convergence",
+        ],
+    )
     non_corr_df = exclude_non_converged_data(processed_df, columns_to_process)
 
     return non_corr_df
 
-class DatabaseGenerator():
-    
-    def __init__(self,
-                 parent_dir,
-                 max_workers=16):
+
+class DatabaseGenerator:
+
+    def __init__(self, parent_dir, max_workers=16):
         self.parent_dir = parent_dir
         self.max_workers = max_workers
 
-    def build_database(self,
-                    target_directory = None,
-                    extract_directories = False,
-                    tarball_extensions = (".tar.gz", "tar.bz2"),
-                    read_error_dirs = False,
-                    read_multiple_runs_in_dir = False,
-                    cleanup = False,
-                    keep_filenames_after_cleanup = [],
-                    keep_filename_patterns_after_cleanup = [],
-                    max_dir_count = None,
-                    filenames_to_qualify=["vasp.log", "INCAR", "POTCAR", "CONTCAR", "KPOINTS", "OUTCAR", "vasprun.xml"],
-                    all_present=False,
-                    df_filename = None,
-                    df_compression=True):  # Added database_compression flag with default True
+    def build_database(
+        self,
+        target_directory=None,
+        extract_directories=False,
+        tarball_extensions=(".tar.gz", "tar.bz2"),
+        read_error_dirs=False,
+        read_multiple_runs_in_dir=False,
+        cleanup=False,
+        keep_filenames_after_cleanup=[],
+        keep_filename_patterns_after_cleanup=[],
+        max_dir_count=None,
+        filenames_to_qualify=[
+            "vasp.log",
+            "INCAR",
+            "POTCAR",
+            "CONTCAR",
+            "KPOINTS",
+            "OUTCAR",
+            "vasprun.xml",
+        ],
+        all_present=False,
+        df_filename=None,
+        df_compression=True,
+    ):  # Added database_compression flag with default True
 
         start_time = time.time()
-        
+
         if target_directory:
-            dirs = find_vasp_directories(parent_dir = target_directory,
-                                         extract_tarballs = extract_directories,
-                                         all_present = all_present,
-                                         filenames = filenames_to_qualify,
-                                         tarball_extensions = tarball_extensions)
+            dirs = find_vasp_directories(
+                parent_dir=target_directory,
+                extract_tarballs=extract_directories,
+                all_present=all_present,
+                filenames=filenames_to_qualify,
+                tarball_extensions=tarball_extensions,
+            )
         else:
-            dirs = find_vasp_directories(parent_dir = self.parent_dir,
-                                         extract_tarballs = extract_directories,
-                                         all_present = all_present,
-                                         filenames = filenames_to_qualify,
-                                         tarball_extensions = tarball_extensions)
-        print(f"The total number of vasp directories that we are building the database out of is {len(dirs)}")
-        
-        compression_option = 'gzip' if df_compression else None
-        compression_extension = '.gz' if df_compression else ''
-        
+            dirs = find_vasp_directories(
+                parent_dir=self.parent_dir,
+                extract_tarballs=extract_directories,
+                all_present=all_present,
+                filenames=filenames_to_qualify,
+                tarball_extensions=tarball_extensions,
+            )
+        print(
+            f"The total number of vasp directories that we are building the database out of is {len(dirs)}"
+        )
+
+        compression_option = "gzip" if df_compression else None
+        compression_extension = ".gz" if df_compression else ""
+
         if max_dir_count:
             pkl_filenames = []
             for i, chunks in enumerate(gen_tools.chunk_list(dirs, max_dir_count)):
                 step_time = time.time()
-                df = pd.concat(parallelise(parse_vasp_directory, 
-                                            [(chunk,) for chunk in chunks],
-                                            max_workers=self.max_workers,
-                                            extract_error_dirs=read_error_dirs, 
-                                            parse_all_in_dir=read_multiple_runs_in_dir))
+                df = pd.concat(
+                    parallelise(
+                        parse_vasp_directory,
+                        [(chunk,) for chunk in chunks],
+                        max_workers=self.max_workers,
+                        extract_error_dirs=read_error_dirs,
+                        parse_all_in_dir=read_multiple_runs_in_dir,
+                    )
+                )
                 if df_filename:
                     db_filename = f"{i}_{df_filename}.pkl{compression_extension}"
                 else:
                     db_filename = f"{i}.pkl{compression_extension}"
                 pkl_filenames.append(os.path.join(self.parent_dir, db_filename))
-                df.to_pickle(os.path.join(self.parent_dir, db_filename), compression=compression_option)
+                df.to_pickle(
+                    os.path.join(self.parent_dir, db_filename),
+                    compression=compression_option,
+                )
                 step_taken_time = np.round(time.time() - step_time, 3)
-                print(f"Step {i}: {step_taken_time} seconds taken for {len(chunks)} parse steps")
-            
-            df = pd.concat([pd.read_pickle(partial_df, compression=compression_option) for partial_df in pkl_filenames])
-            final_db_filename = os.path.join(self.parent_dir, f"vasp_database.pkl{compression_extension}")
+                print(
+                    f"Step {i}: {step_taken_time} seconds taken for {len(chunks)} parse steps"
+                )
+
+            df = pd.concat(
+                [
+                    pd.read_pickle(partial_df, compression=compression_option)
+                    for partial_df in pkl_filenames
+                ]
+            )
+            final_db_filename = os.path.join(
+                self.parent_dir, f"vasp_database.pkl{compression_extension}"
+            )
             df.to_pickle(final_db_filename, compression=compression_option)
         else:
-            df = pd.concat(parallelise(parse_vasp_directory, 
-                                        [(chunk,) for chunk in chunks],
-                                        max_workers=self.max_workers,
-                                        extract_error_dirs=read_error_dirs, 
-                                        parse_all_in_dir=read_multiple_runs_in_dir))
-            df.to_pickle(os.path.join(self.parent_dir, f"vasp_database.pkl{compression_extension}"), compression=compression_option)
-        
+            df = pd.concat(
+                parallelise(
+                    parse_vasp_directory,
+                    [(chunk,) for chunk in chunks],
+                    max_workers=self.max_workers,
+                    extract_error_dirs=read_error_dirs,
+                    parse_all_in_dir=read_multiple_runs_in_dir,
+                )
+            )
+            df.to_pickle(
+                os.path.join(
+                    self.parent_dir, f"vasp_database.pkl{compression_extension}"
+                ),
+                compression=compression_option,
+            )
+
         end_time = time.time()
         elapsed_time = end_time - start_time
-        
+
         # not optional - keep the tarballs/zips..
         keep_filename_patterns_after_cleanup += ".tar.gz"
         keep_filename_patterns_after_cleanup += ".tar.bz2"
         keep_filename_patterns_after_cleanup += ".zip"
 
         if cleanup:
-            gen_tools.cleanup_dir(directory_path=dirs, keep=True, files=[], file_patterns=[])
-            parallelise(gen_tools.cleanup_dir, dirs, [True] * len(dirs), keep_filenames_after_cleanup*len(dirs), keep_filename_patterns_after_cleanup*len(dirs))
-        
-        print("Elapsed time:", np.round(elapsed_time,3), "seconds")
+            gen_tools.cleanup_dir(
+                directory_path=dirs, keep=True, files=[], file_patterns=[]
+            )
+            parallelise(
+                gen_tools.cleanup_dir,
+                dirs,
+                [True] * len(dirs),
+                keep_filenames_after_cleanup * len(dirs),
+                keep_filename_patterns_after_cleanup * len(dirs),
+            )
+
+        print("Elapsed time:", np.round(elapsed_time, 3), "seconds")
 
         return df
-    
-    def update_failed_jobs_in_database(self, df_path=None, read_error_dirs=False, read_multiple_runs_in_dir=False, max_dir_count=None, df_compression=True):
-        compression_option = 'gzip' if df_compression else None
-        compression_extension = '.gz' if df_compression else ''
-        
+
+    def update_failed_jobs_in_database(
+        self,
+        df_path=None,
+        read_error_dirs=False,
+        read_multiple_runs_in_dir=False,
+        max_dir_count=None,
+        df_compression=True,
+    ):
+        compression_option = "gzip" if df_compression else None
+        compression_extension = ".gz" if df_compression else ""
+
         if df_path is None:
-            df_path = os.path.join(self.parent_dir, f"vasp_database.pkl{compression_extension}")
-        
+            df_path = os.path.join(
+                self.parent_dir, f"vasp_database.pkl{compression_extension}"
+            )
+
         if os.path.isdir(df_path):
             potential_files = [
                 os.path.join(df_path, "vasp_database.pkl.gz"),
-                os.path.join(df_path, "vasp_database.pkl")
+                os.path.join(df_path, "vasp_database.pkl"),
             ]
-            output_path = os.path.join(df_path, f"vasp_database.pkl{compression_extension}")
+            output_path = os.path.join(
+                df_path, f"vasp_database.pkl{compression_extension}"
+            )
         else:
             potential_files = [df_path]
             output_path = df_path
-        
+
         df = None
         for file in potential_files:
             try:
                 if file.endswith(".gz"):
-                    df = pd.read_pickle(file, compression='gzip')
+                    df = pd.read_pickle(file, compression="gzip")
                 else:
                     df = pd.read_pickle(file, compression=None)
                 print(f"Successfully read database from {file}")
@@ -544,33 +747,56 @@ def update_failed_jobs_in_database(self, df_path=None, read_error_dirs=False, re
                 print(f"Failed to read database from {file}")
 
         if df is None:
-            raise ValueError("Invalid path or filename - please check! Attempted paths: " + ", ".join(potential_files))
-        
-        failed_dirs = df[df['convergence'] == False]['filepath'].tolist()
+            raise ValueError(
+                "Invalid path or filename - please check! Attempted paths: "
+                + ", ".join(potential_files)
+            )
+
+        failed_dirs = df[df["convergence"] == False]["filepath"].tolist()
         print(f"Reparsing {len(failed_dirs)} directories where convergence is False")
 
         if max_dir_count:
             pkl_filenames = []
-            for i, chunks in enumerate(gen_tools.chunk_list(failed_dirs, max_dir_count)):
+            for i, chunks in enumerate(
+                gen_tools.chunk_list(failed_dirs, max_dir_count)
+            ):
                 step_time = time.time()
-                failed_df = pd.concat(parallelise(parse_vasp_directory, 
-                                                  [(chunk,) for chunk in chunks],
-                                                  max_workers=self.max_workers,
-                                                  extract_error_dirs=read_error_dirs, 
-                                                  parse_all_in_dir=read_multiple_runs_in_dir))
+                failed_df = pd.concat(
+                    parallelise(
+                        parse_vasp_directory,
+                        [(chunk,) for chunk in chunks],
+                        max_workers=self.max_workers,
+                        extract_error_dirs=read_error_dirs,
+                        parse_all_in_dir=read_multiple_runs_in_dir,
+                    )
+                )
                 db_filename = f"update_{i}.pkl{compression_extension}"
                 pkl_filenames.append(os.path.join(self.parent_dir, db_filename))
-                failed_df.to_pickle(os.path.join(self.parent_dir, db_filename), compression=compression_option)
+                failed_df.to_pickle(
+                    os.path.join(self.parent_dir, db_filename),
+                    compression=compression_option,
+                )
                 step_taken_time = np.round(time.time() - step_time, 3)
-                print(f"Step {i}: {step_taken_time} seconds taken for {len(chunks)} parse steps")
-            
-            failed_df = pd.concat([pd.read_pickle(partial_df, compression=compression_option) for partial_df in pkl_filenames])
+                print(
+                    f"Step {i}: {step_taken_time} seconds taken for {len(chunks)} parse steps"
+                )
+
+            failed_df = pd.concat(
+                [
+                    pd.read_pickle(partial_df, compression=compression_option)
+                    for partial_df in pkl_filenames
+                ]
+            )
         else:
-            failed_df = pd.concat(parallelise(parse_vasp_directory, 
-                                              [(chunk,) for chunk in failed_dirs],
-                                              max_workers=self.max_workers,
-                                              extract_error_dirs=read_error_dirs, 
-                                              parse_all_in_dir=read_multiple_runs_in_dir))
+            failed_df = pd.concat(
+                parallelise(
+                    parse_vasp_directory,
+                    [(chunk,) for chunk in failed_dirs],
+                    max_workers=self.max_workers,
+                    extract_error_dirs=read_error_dirs,
+                    parse_all_in_dir=read_multiple_runs_in_dir,
+                )
+            )
 
         # Use a different method to merge the DataFrames
         df.update(failed_df, overwrite=True)
@@ -578,6 +804,7 @@ def update_failed_jobs_in_database(self, df_path=None, read_error_dirs=False, re
         df.to_pickle(output_path, compression=compression_option)
         print(f"Updated dataframe saved to {output_path}")
         return df
+
     # def update_database(self,
     #                 new_calculation_directory,
     #                 existing_database_filename = "vasp_database.pkl",
@@ -587,7 +814,7 @@ def update_failed_jobs_in_database(self, df_path=None, read_error_dirs=False, re
     #                 keep_filename_patterns_after_cleanup = [],
     #                 max_dir_count = None,
     #                 df_filename = None):
-        
+
     #     update_df = self.build_database(target_directory = existing_database_filename,
     #                                     extract_directories = extract_directories,
     #                                     cleanup=cleanup,
@@ -597,12 +824,12 @@ def update_failed_jobs_in_database(self, df_path=None, read_error_dirs=False, re
     #                                     df_filename = df_filename)
     #     def _get_job_dir(filepath):
     #         return os.path.basename(filepath.rstrip("/OUTCAR"))
-        
+
     #     update_df["job_dir"] = [_get_job_dir(row.filepath) for _, row in update_df.iterrows()]
     #     base_df["job_dir"] = [_get_job_dir(row.filepath) for _, row in base_df.iterrows()]
 
     #     base_df = pd.read_pickle(existing_database_filename)
-        
+
     #     # Merge df1 and df2 based on the common dirname
     #     interm_df = base_df.merge(update_df, on='job_dir', suffixes=('_df1', '_df2'), how='left')
 
@@ -612,11 +839,12 @@ def update_failed_jobs_in_database(self, df_path=None, read_error_dirs=False, re
     #             # Check if the column with suffix '_df2' exists
     #             if (f'{column}_df2' in interm_df.columns):
     #                 base_df[column].update(interm_df[column + '_df2'].combine_first(interm_df[column + '_df1']))
-                    
+
     #     base_df.drop(columns=['job_dir'], inplace=True)
-        
+
     #     return base_df
 
+
 def update_database(df_base, df_update):
     # Get the unique job names from df2
     df_update_jobs = set(df_update["job_name"])
@@ -627,6 +855,8 @@ def update_database(df_base, df_update):
     # Append df2 to the filtered df1
     merged_df = pd.concat([df_base, df_update_jobs], ignore_index=True)
     return merged_df
+
+
 def robust_append_last(clist, value):
     try:
         clist.append(value[-1])
@@ -634,18 +864,19 @@ def robust_append_last(clist, value):
         clist.append(np.nan)
     return clist
 
+
 def create_summary(database_df):
     energies = []
     magmoms = []
     structures = []
-    
+
     for i, row in database_df.iterrows():
         energies = robust_append_last(energies, row.energy_zero)
         magmoms = robust_append_last(magmoms, row.magmoms)
         structures = robust_append_last(structures, row.structures)
-    
+
     df = database_df[["job_name", "convergence"]].copy()
     df["total_energy"] = energies
     df["magmoms"] = magmoms
     df["structures"] = structures
-    return df
\ No newline at end of file
+    return df
diff --git a/utils/vasp/job.py b/utils/vasp/job.py
index 4f268ca..3fdc240 100644
--- a/utils/vasp/job.py
+++ b/utils/vasp/job.py
@@ -5,13 +5,16 @@
 potcar_library_path = "/root/POTCAR_Library/GGA"
 potcar_library_path = "/cmmc/u/hmai/pyiron-resources-cmmc/vasp/potentials/potpaw_PBE"
 
-def createFolder(directory, delete_folder='no'):
-    import os; import shutil
+
+def createFolder(directory, delete_folder="no"):
+    import os
+    import shutil
+
     if not os.path.exists(directory):
         os.makedirs(directory)
     else:
-        if delete_folder == 'no':
-            #print('no replacement/deletion created due to folder existing')
+        if delete_folder == "no":
+            # print('no replacement/deletion created due to folder existing')
             x = 1
         else:
             print("removing directory...")
@@ -22,11 +25,17 @@ def createFolder(directory, delete_folder='no'):
             else:
                 print("given path is a special file - manually remove")
 
+
 def get_immediate_subdirectories(a_dir):
-    return [f.path for f in os.scandir(a_dir) if f.is_dir() and os.path.basename(f) != ".ipynb_checkpoints"]
+    return [
+        f.path
+        for f in os.scandir(a_dir)
+        if f.is_dir() and os.path.basename(f) != ".ipynb_checkpoints"
+    ]
+
 
 class jobfile:
-    '''
+    """
     Class for jobfile object for passing into createJobFolder
 
     Attributes:
@@ -48,15 +57,18 @@ class jobfile:
     RAM: RAM to be allocated - this is only specified in the case of Gadi, Setonix + magnus do not need specification.
     walltime: INTEGER ONLY
               The walltime of the job in hours
-    '''
-    def __init__(self,
-                 file_path,
-                 HPC = "Gadi",
-                 VASP_version = "5.4.4",
-                 CPU = 192,
-                 RAM = 64,
-                 walltime = 999,
-                 max_resubmissions = 999):
+    """
+
+    def __init__(
+        self,
+        file_path,
+        HPC="Gadi",
+        VASP_version="5.4.4",
+        CPU=192,
+        RAM=64,
+        walltime=999,
+        max_resubmissions=999,
+    ):
         self.file_path = file_path
         self.HPC = HPC
         self.VASP_version = VASP_version
@@ -65,9 +77,9 @@ def __init__(self,
         self.walltime = walltime
         self.max_resubmissions = max_resubmissions
 
-    def to_file(self,\
-                case_name = 'template_job',\
-                output_path = os.path.join(os.getcwd(), "test")):
+    def to_file(
+        self, case_name="template_job", output_path=os.path.join(os.getcwd(), "test")
+    ):
         """
         Writes KPOINTS file with MP gamma centred grid:
 
@@ -78,7 +90,7 @@ def to_file(self,\
 
         createFolder(output_path)
 
-        with open("%s" % (self.file_path), 'r') as fin :
+        with open("%s" % (self.file_path), "r") as fin:
             filedata = fin.read()
         if self.HPC == "Gadi":
             fin = open("%s" % (self.file_path), "rt", newline="\n")
@@ -87,7 +99,9 @@ def to_file(self,\
         # Replace the target string
         filedata = filedata.replace("{WALLTIMESTRING}", "%s:00:00" % self.walltime)
         filedata = filedata.replace("{CPUSTRING}", str(self.CPU))
-        filedata = filedata.replace("{MAXCONVITERATIONS}", str(self.max_resubmissions-1))
+        filedata = filedata.replace(
+            "{MAXCONVITERATIONS}", str(self.max_resubmissions - 1)
+        )
 
         # Only on GADI
         filedata = filedata.replace("{MEMORYSTRING}", "%sGB" % self.RAM)
@@ -99,33 +113,43 @@ def to_file(self,\
             max_cpu_count = 128
         elif self.HPC == "Garching":
             max_cpu_count = 40
-        if  self.CPU <= max_cpu_count:
+        if self.CPU <= max_cpu_count:
             filedata = filedata.replace("{NODESTRING}", "1")
         else:
-            filedata = filedata.replace("{NODESTRING}", "%s" % int(self.CPU/max_cpu_count))
-            
+            filedata = filedata.replace(
+                "{NODESTRING}", "%s" % int(self.CPU / max_cpu_count)
+            )
+
         filedata = filedata.replace("{CASESTRING}", "%s" % case_name)
 
         if self.VASP_version == "5.4.4":
-            filedata = filedata.replace("{VASPMODULELOADSTRING}", 'module load vasp/%s' %  self.VASP_version)
+            filedata = filedata.replace(
+                "{VASPMODULELOADSTRING}", "module load vasp/%s" % self.VASP_version
+            )
         else:
             if self.HPC == "Setonix" and self.VASP_version in ["6.3.0", "6.2.1"]:
-                filedata = filedata.replace("{VASPMODULELOADSTRING}", 'module load vasp6/%s' % self.VASP_version)
+                filedata = filedata.replace(
+                    "{VASPMODULELOADSTRING}", "module load vasp6/%s" % self.VASP_version
+                )
             else:
-                filedata = filedata.replace("{VASPMODULELOADSTRING}", 'module load vasp/%s' % self.VASP_version)
+                filedata = filedata.replace(
+                    "{VASPMODULELOADSTRING}", "module load vasp/%s" % self.VASP_version
+                )
             if self.HPC == "Garching":
                 # vasp/5.3-constrainedcollinearmagnetism  vasp/5.4.4-buildFeb20  vasp/5.4.4-elphon        vasp/5.4.4-python  vasp/6.4.0-buildMar23
                 # vasp/5.4.4      vasp/5.4.4-Dudarev     vasp/5.4.4-potentiostat  vasp/6.4.0         vasp/6.4.0-python
-                filedata = filedata.replace("{VASPMODULELOADSTRING}", 'module load vasp/%s' % self.VASP_version)
-
+                filedata = filedata.replace(
+                    "{VASPMODULELOADSTRING}", "module load vasp/%s" % self.VASP_version
+                )
 
         # Write the file out again
-        with open(os.path.join(output_path, case_name), 'w') as fout:
+        with open(os.path.join(output_path, case_name), "w") as fout:
             fout.write(filedata)
 
         fin.close()
         fout.close()
 
+
 def stackElementString(structure):
     site_element_list = [site.species_string for site in structure]
     past_element = site_element_list[0]
@@ -143,52 +167,54 @@ def stackElementString(structure):
     element_count.append(count)
     return element_list, element_count
 
-def createPOTCAR(structure, path = os.getcwd()):
+
+def createPOTCAR(structure, path=os.getcwd()):
 
     element_list = stackElementString(structure)[0]
     potcar_paths = []
 
     for element in element_list:
         if element == "Nb":
-            element = "Nb_sv" # Use 13 electron
-            element = "Nb_pv" # Use 11 electron
+            element = "Nb_sv"  # Use 13 electron
+            element = "Nb_pv"  # Use 11 electron
         elif element == "K":
-            element = "K_sv" # 9 electron
-            element = "K_pv" # 7 electron
+            element = "K_sv"  # 9 electron
+            element = "K_pv"  # 7 electron
         elif element == "Ca":
-            element = "Ca_sv" # 9 electron
-            element = "Ca_pv" # 7 electron
+            element = "Ca_sv"  # 9 electron
+            element = "Ca_pv"  # 7 electron
         elif element == "Rb":
-            element = "Rb_sv" # 9 electron
-            element = "Rb_pv" # 7 electron
+            element = "Rb_sv"  # 9 electron
+            element = "Rb_pv"  # 7 electron
         elif element == "Sr":
-            element = "Sr_sv" # 9 electron
+            element = "Sr_sv"  # 9 electron
         elif element == "Cs":
-            element = "Cs_sv" # 9 electron
+            element = "Cs_sv"  # 9 electron
         elif element == "Ba":
-            element = "Ba_sv" # 10 electron
+            element = "Ba_sv"  # 10 electron
         elif element == "Fr":
-            element = "Fr_sv" # 9 electron
+            element = "Fr_sv"  # 9 electron
         elif element == "Ra":
-            element = "Ra_sv" # 9 electron
+            element = "Ra_sv"  # 9 electron
         elif element == "Y":
-            element = "Y_sv" # 9 electron
+            element = "Y_sv"  # 9 electron
         elif element == "Zr":
-            element = "Zr_sv" # 10 electron
+            element = "Zr_sv"  # 10 electron
         elif element == "Fr":
-            element = "Fr_sv" # 9 electron
+            element = "Fr_sv"  # 9 electron
         elif element == "Ra":
-            element = "Ra_sv" # 9 electron
+            element = "Ra_sv"  # 9 electron
         elif element == "Y":
-            element = "Y_sv" # 9 electron
+            element = "Y_sv"  # 9 electron
 
         potcar_paths.append(os.path.join(potcar_library_path, element, "POTCAR"))
 
-    with open(os.path.join(path, "POTCAR"),'wb') as wfd:
+    with open(os.path.join(path, "POTCAR"), "wb") as wfd:
         for f in potcar_paths:
-            with open(f,'rb') as fd:
+            with open(f, "rb") as fd:
                 shutil.copyfileobj(fd, wfd)
-                
+
+
 class KPOINTS:
     """
     Class for KPOINTS object for passing into createJobFolder
@@ -200,13 +226,12 @@ class KPOINTS:
     shift: optional shift of mesh, input as list e.g. [0, 0, 0]
 
     """
+
     def __init__(self, subdivs, shift):
         self.subdivs = subdivs
         self.shift = shift
 
-    def to_file(self,\
-                case_name = 'KPOINTS',\
-                filepath = os.getcwd()):
+    def to_file(self, case_name="KPOINTS", filepath=os.getcwd()):
         """
         Writes KPOINTS file with MP gamma centred grid:
 
@@ -215,48 +240,58 @@ def to_file(self,\
 
         """
         createFolder(filepath)
-        f = io.open(os.path.join(filepath, "KPOINTS"), 'w', newline='\n')
-        with open(os.path.join(filepath, "KPOINTS"), 'a', newline='\n') as f:
+        f = io.open(os.path.join(filepath, "KPOINTS"), "w", newline="\n")
+        with open(os.path.join(filepath, "KPOINTS"), "a", newline="\n") as f:
             # File name (just string on first line of KPOINTS)
-            f.write('%s\n' % case_name)
+            f.write("%s\n" % case_name)
             # Use automatic generation "0"
-            f.write('0\n')
+            f.write("0\n")
             # Monkhorst-Pack Gamma centred grid
-            f.write('Gamma\n')
+            f.write("Gamma\n")
             # Subdivisions along reciprocal lattice vectors
-            subdiv_string = ''
+            subdiv_string = ""
             for i in self.subdivs:
                 subdiv_string += "%s " % str(i)
-            f.write('%s\n' % subdiv_string)
+            f.write("%s\n" % subdiv_string)
             # optional shift of the mesh (s_1, s_2, s_3)
-            shift_string = ''
+            shift_string = ""
             for i in self.shift:
                 shift_string += "%s " % str(i)
-            f.write('%s\n' % shift_string)
+            f.write("%s\n" % shift_string)
         f.close()
-        
-def createJobFolder(structure,\
-                    KPOINT = None,\
-                    folder_path = os.path.join(os.getcwd(), "jobfolder"),\
-                    INCAR = None,\
-                    jobfile = None,\
-                    quiet=True):
+
+
+def createJobFolder(
+    structure,
+    KPOINT=None,
+    folder_path=os.path.join(os.getcwd(), "jobfolder"),
+    INCAR=None,
+    jobfile=None,
+    quiet=True,
+):
     # This assumes that incar file base is present already, please adjust this function to adjust the incar flags
     # creates a subdirectory of chosen name in current directory
     parent_folder = os.getcwd()
     createFolder(folder_path)
 
-    structure.to(fmt="poscar", filename = os.path.join(folder_path, f"starter-{os.path.basename(folder_path)}.vasp"))
-    structure.to(fmt="poscar", filename = os.path.join(folder_path, "POSCAR"))
+    structure.to(
+        fmt="poscar",
+        filename=os.path.join(
+            folder_path, f"starter-{os.path.basename(folder_path)}.vasp"
+        ),
+    )
+    structure.to(fmt="poscar", filename=os.path.join(folder_path, "POSCAR"))
 
-    createPOTCAR(structure, path = "%s" % folder_path)
+    createPOTCAR(structure, path="%s" % folder_path)
 
     INCAR.write_file(os.path.join(folder_path, "INCAR"))
 
     if KPOINT:
-        KPOINT.to_file(filepath = folder_path)
+        KPOINT.to_file(filepath=folder_path)
 
-    jobfile.to_file(case_name = '%s.sh' % os.path.basename(folder_path),\
-                    output_path = "%s" % (folder_path))
+    jobfile.to_file(
+        case_name="%s.sh" % os.path.basename(folder_path),
+        output_path="%s" % (folder_path),
+    )
     if not quiet:
-        print("Generating jobfolder, name %s" % (os.path.basename(folder_path)))
\ No newline at end of file
+        print("Generating jobfolder, name %s" % (os.path.basename(folder_path)))
diff --git a/utils/vasp/parser/outcar.py b/utils/vasp/parser/outcar.py
index b024a1c..3e75a1e 100644
--- a/utils/vasp/parser/outcar.py
+++ b/utils/vasp/parser/outcar.py
@@ -17,6 +17,7 @@
     scipy.constants.physical_constants["joule-electron volt relationship"][0] / 1e22
 )
 
+
 class Outcar(object):
     """
     This module is used to parse VASP OUTCAR files.
@@ -120,10 +121,18 @@ def from_file(self, filename="OUTCAR"):
             "elapsed_time": elapsed_time,
             "memory_used": memory_used,
         }
-        self.parse_dict["ionic_stop_criteria"] = self.get_ionic_stop_criteria(filename=filename)
-        self.parse_dict["electronic_stop_criteria"] = self.get_electronic_stop_criteria(filename=filename)
-        self.parse_dict["max_electronic_steps"] = self.get_electronic_stop_criteria(filename=filename)    
-        self.parse_dict["max_ionic_steps"] = self.get_electronic_stop_criteria(filename=filename)    
+        self.parse_dict["ionic_stop_criteria"] = self.get_ionic_stop_criteria(
+            filename=filename
+        )
+        self.parse_dict["electronic_stop_criteria"] = self.get_electronic_stop_criteria(
+            filename=filename
+        )
+        self.parse_dict["max_electronic_steps"] = self.get_electronic_stop_criteria(
+            filename=filename
+        )
+        self.parse_dict["max_ionic_steps"] = self.get_electronic_stop_criteria(
+            filename=filename
+        )
 
         try:
             self.parse_dict["pressures"] = (
@@ -177,30 +186,34 @@ def from_hdf(self, hdf, group_name="outcar"):
         """
         with hdf.open(group_name) as hdf5_output:
             for key in hdf5_output.list_nodes():
-                self.parse_dict[key] = hdf5_output[key]     
-                
-    def extract_value_from_line(self, line, position = 1, split = "="):
+                self.parse_dict[key] = hdf5_output[key]
+
+    def extract_value_from_line(self, line, position=1, split="="):
         parts = line.split(split)
         if len(parts) > 1:
             return float(parts[position].strip().split()[0].strip(";"))
         return None
-               
+
     def find_and_extract_value_from_matched_line(self, filename, search_term):
-        with open(filename, 'r') as file:
+        with open(filename, "r") as file:
             for line in file:
                 if search_term in line:
                     value = self.extract_value_from_line(line)
-        return value              
-      
+        return value
+
     def get_ionic_stop_criteria(self, filename="OUTCAR"):
-        return self.find_and_extract_value_from_matched_line(filename, "stopping-criterion for IOM")
-    
+        return self.find_and_extract_value_from_matched_line(
+            filename, "stopping-criterion for IOM"
+        )
+
     def get_electronic_stop_criteria(self, filename="OUTCAR"):
-        return self.find_and_extract_value_from_matched_line(filename, "stopping-criterion for ELM")
-    
+        return self.find_and_extract_value_from_matched_line(
+            filename, "stopping-criterion for ELM"
+        )
+
     def get_max_electronic_steps(self, filename="OUTCAR"):
         return self.find_and_extract_value_from_matched_line(filename, "NELM")
-    
+
     def get_vasp_version(self, filename="OUTCAR", lines=None):
         return lines[0].lstrip().split(sep=" ")[0]
 
@@ -218,11 +231,11 @@ def get_datetime(self, filename="OUTCAR", lines=None):
             if match:
                 date_str, time_str = match.groups()
                 # Combining the date and time strings
-                datetime_str = date_str + ' ' + time_str
+                datetime_str = date_str + " " + time_str
                 # Converting to datetime object
-                datetime_obj = datetime.strptime(datetime_str, '%Y.%m.%d %H:%M:%S')
+                datetime_obj = datetime.strptime(datetime_str, "%Y.%m.%d %H:%M:%S")
                 return datetime_obj
-    
+
     def get_positions_and_forces(self, filename="OUTCAR", lines=None, n_atoms=None):
         """
         Gets the forces and positions for every ionic step from the OUTCAR file
@@ -253,7 +266,6 @@ def get_positions_and_forces(self, filename="OUTCAR", lines=None, n_atoms=None):
         )
 
     def get_positions(self, filename="OUTCAR", lines=None, n_atoms=None):
-
         """
         Gets the positions for every ionic step from the OUTCAR file
 
@@ -1187,12 +1199,14 @@ def get_energy_components(filename="OUTCAR", lines=None):
                     [
                         np.hstack(
                             [
-                                float(lines[ind + i].split()[-1])
-                                if i != 7
-                                else [
-                                    float(lines[ind_lst[-1] + 7].split()[-2]),
-                                    float(lines[ind_lst[-1] + 7].split()[-1]),
-                                ]
+                                (
+                                    float(lines[ind + i].split()[-1])
+                                    if i != 7
+                                    else [
+                                        float(lines[ind_lst[-1] + 7].split()[-2]),
+                                        float(lines[ind_lst[-1] + 7].split()[-1]),
+                                    ]
+                                )
                                 for i in range(2, 12)
                             ]
                         )
@@ -1242,9 +1256,11 @@ def _split_indices(ind_ionic_lst, ind_elec_lst):
     """
     ind_elec_array = np.array(ind_elec_lst)
     return [
-        ind_elec_array[(ind_elec_array < j2) & (j1 < ind_elec_array)]
-        if j1 < j2
-        else ind_elec_array[(ind_elec_array < j2)]
+        (
+            ind_elec_array[(ind_elec_array < j2) & (j1 < ind_elec_array)]
+            if j1 < j2
+            else ind_elec_array[(ind_elec_array < j2)]
+        )
         for j1, j2 in zip(np.roll(ind_ionic_lst, 1), ind_ionic_lst)
     ]
 
@@ -1263,4 +1279,4 @@ def _get_lines_from_file(filename, lines=None):
     if lines is None:
         with open(filename, "r") as f:
             lines = f.readlines()
-    return lines
\ No newline at end of file
+    return lines
diff --git a/utils/vasp/parser/output.py b/utils/vasp/parser/output.py
index b864d23..c8a223d 100644
--- a/utils/vasp/parser/output.py
+++ b/utils/vasp/parser/output.py
@@ -13,7 +13,13 @@
 from utils.vasp.parser.outcar import Outcar
 import utils.generic as gen_tools
 
-def check_convergence(directory, filename_vasprun="vasprun.xml", filename_vasplog="vasp.log", backup_vasplog="error.out"):
+
+def check_convergence(
+    directory,
+    filename_vasprun="vasprun.xml",
+    filename_vasplog="vasp.log",
+    backup_vasplog="error.out",
+):
     """
     Check the convergence status of a VASP calculation.
 
@@ -29,15 +35,26 @@ def check_convergence(directory, filename_vasprun="vasprun.xml", filename_vasplo
         vr = Vasprun(filename=os.path.join(directory, filename_vasprun))
         return vr.converged
     except:
-        line_converged = "reached required accuracy - stopping structural energy minimisation"
+        line_converged = (
+            "reached required accuracy - stopping structural energy minimisation"
+        )
         try:
-            return gen_tools.is_line_in_file(os.path.join(directory, filename_vasplog), line=line_converged, exact_match=False)
+            return gen_tools.is_line_in_file(
+                os.path.join(directory, filename_vasplog),
+                line=line_converged,
+                exact_match=False,
+            )
         except:
             try:
-                return gen_tools.is_line_in_file(os.path.join(directory, backup_vasplog), line=line_converged, exact_match=False)
+                return gen_tools.is_line_in_file(
+                    os.path.join(directory, backup_vasplog),
+                    line=line_converged,
+                    exact_match=False,
+                )
             except:
                 return False
 
+
 def process_error_archives(directory):
     """
     Processes all tar or tar.gz files starting with 'error' in the specified directory and its subdirectories.
@@ -48,10 +65,14 @@ def process_error_archives(directory):
     Returns:
         pd.DataFrame: DataFrame containing the processed VASP outputs from error archives.
     """
-    error_files = [os.path.join(root, file)
-                   for root, dirs, files in os.walk(directory)
-                   for file in files if file.startswith('error') and (file.endswith('.tar') or file.endswith('.tar.gz'))]
-    
+    error_files = [
+        os.path.join(root, file)
+        for root, dirs, files in os.walk(directory)
+        for file in files
+        if file.startswith("error")
+        and (file.endswith(".tar") or file.endswith(".tar.gz"))
+    ]
+
     df_list = []
     for error_file in error_files:
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -67,12 +88,15 @@ def process_error_archives(directory):
     print(f"Processing error dirs in {directory} complete.")
     return pd.concat(df_list) if df_list else pd.DataFrame()
 
-def _get_vasp_outputs_from_files(structure, outcar_path="OUTCAR", incar_path="INCAR", kpoints_path="KPOINTS"):
+
+def _get_vasp_outputs_from_files(
+    structure, outcar_path="OUTCAR", incar_path="INCAR", kpoints_path="KPOINTS"
+):
     file_data = {
         "POSCAR": [structure],
         "OUTCAR": [np.nan],
         "INCAR": [np.nan],
-        "KPOINTS": [np.nan]
+        "KPOINTS": [np.nan],
     }
 
     if os.path.isfile(outcar_path):
@@ -96,15 +120,20 @@ def _get_vasp_outputs_from_files(structure, outcar_path="OUTCAR", incar_path="IN
             file_data["KPOINTS"] = [kpoints]
         except Exception as e:
             pass
-        
+
     return pd.DataFrame(file_data)
 
+
 def _get_vasp_outputs(directory, structure=None, parse_all_in_dir=True):
-    outcar_files = glob.glob(os.path.join(directory, "OUTCAR*")) if parse_all_in_dir else glob.glob(os.path.join(directory, "OUTCAR"))
-    
+    outcar_files = (
+        glob.glob(os.path.join(directory, "OUTCAR*"))
+        if parse_all_in_dir
+        else glob.glob(os.path.join(directory, "OUTCAR"))
+    )
+
     if structure is None:
         structure = get_structure(directory)
-    
+
     if outcar_files:
         data = []
         for outcar_file in outcar_files:
@@ -112,22 +141,36 @@ def _get_vasp_outputs(directory, structure=None, parse_all_in_dir=True):
             incar_file = os.path.join(directory, f"INCAR{suffix}")
             kpoints_file = os.path.join(directory, f"KPOINTS{suffix}")
 
-            output_df = _get_vasp_outputs_from_files(structure, outcar_path=outcar_file, incar_path=incar_file, kpoints_path=kpoints_file)
+            output_df = _get_vasp_outputs_from_files(
+                structure,
+                outcar_path=outcar_file,
+                incar_path=incar_file,
+                kpoints_path=kpoints_file,
+            )
             data.append(output_df)
         data = pd.concat(data)
     else:
-        data = pd.DataFrame({"POSCAR": [structure], "OUTCAR": [np.nan], "INCAR": [np.nan], "KPOINTS": [np.nan]})
-    
+        data = pd.DataFrame(
+            {
+                "POSCAR": [structure],
+                "OUTCAR": [np.nan],
+                "INCAR": [np.nan],
+                "KPOINTS": [np.nan],
+            }
+        )
+
     return data
 
+
 def get_SCF_cycle_convergence(outcar_scf_arrays, threshold=1e-5):
     diff = outcar_scf_arrays[-1] - outcar_scf_arrays[-2]
     return abs(diff) < threshold
 
+
 def _get_KPOINTS_info(KPOINTS, INCAR):
     try:
         if np.isnan(KPOINTS):
-            kpoints_key = 'KSPACING'
+            kpoints_key = "KSPACING"
             return f"KSPACING: {INCAR.get(kpoints_key, 0.5)}"
         else:
             return KPOINTS
@@ -135,89 +178,117 @@ def _get_KPOINTS_info(KPOINTS, INCAR):
         print(e)
         return np.nan
 
+
 def process_outcar(outcar, structure):
     if pd.isna(outcar) or pd.isna(structure):
-        warning_message = ("Both OUTCAR and structure data are missing. Returning DataFrame with np.nan values." 
-                           if pd.isna(outcar) and pd.isna(structure) else
-                           "OUTCAR data is missing. Returning DataFrame with np.nan values for OUTCAR-related fields." 
-                           if pd.isna(outcar) else 
-                           "Structure data is missing. Returning DataFrame with np.nan values for structure-related fields.")
+        warning_message = (
+            "Both OUTCAR and structure data are missing. Returning DataFrame with np.nan values."
+            if pd.isna(outcar) and pd.isna(structure)
+            else (
+                "OUTCAR data is missing. Returning DataFrame with np.nan values for OUTCAR-related fields."
+                if pd.isna(outcar)
+                else "Structure data is missing. Returning DataFrame with np.nan values for structure-related fields."
+            )
+        )
         warnings.warn(warning_message)
-        
-        return pd.DataFrame([{
-            "calc_start_time": np.nan,
-            "consumed_time": np.nan,
-            "structures": np.nan,
-            "energy": np.nan,
-            "energy_zero": np.nan,
-            "forces": np.nan,
-            "stresses": np.nan,
-            "magmoms": np.nan,
-            "scf_steps": np.nan,
-            "scf_convergence": np.nan
-        }])
+
+        return pd.DataFrame(
+            [
+                {
+                    "calc_start_time": np.nan,
+                    "consumed_time": np.nan,
+                    "structures": np.nan,
+                    "energy": np.nan,
+                    "energy_zero": np.nan,
+                    "forces": np.nan,
+                    "stresses": np.nan,
+                    "magmoms": np.nan,
+                    "scf_steps": np.nan,
+                    "scf_convergence": np.nan,
+                }
+            ]
+        )
 
     try:
         energies = outcar.parse_dict["energies"]
     except:
         energies = np.nan
-            
+
     try:
-        ionic_step_structures = np.array([Structure(cell, structure.species, outcar.parse_dict["positions"][i], coords_are_cartesian=True).to_json()
-                                          for i, cell in enumerate(outcar.parse_dict["cells"])])
+        ionic_step_structures = np.array(
+            [
+                Structure(
+                    cell,
+                    structure.species,
+                    outcar.parse_dict["positions"][i],
+                    coords_are_cartesian=True,
+                ).to_json()
+                for i, cell in enumerate(outcar.parse_dict["cells"])
+            ]
+        )
     except:
         ionic_step_structures = np.nan
-    
+
     try:
-        energies_zero =  outcar.parse_dict["energies_zero"]
+        energies_zero = outcar.parse_dict["energies_zero"]
     except:
         energies_zero = np.nan
-        
+
     try:
         forces = outcar.parse_dict["forces"]
     except:
         forces = np.nan
-        
+
     try:
         stresses = outcar.parse_dict["stresses"]
     except:
         stresses = np.nan
-        
+
     try:
         magmoms = np.array(outcar.parse_dict["final_magmoms"])
     except:
         magmoms = np.nan
-        
+
     try:
         scf_steps = [len(i) for i in outcar.parse_dict["scf_energies"]]
-        scf_conv_list = [get_SCF_cycle_convergence(d, threshold=outcar.parse_dict["electronic_stop_criteria"]) for d in outcar.parse_dict["scf_energies"]]
+        scf_conv_list = [
+            get_SCF_cycle_convergence(
+                d, threshold=outcar.parse_dict["electronic_stop_criteria"]
+            )
+            for d in outcar.parse_dict["scf_energies"]
+        ]
     except Exception as e:
         print(e)
         scf_steps = np.nan
         scf_conv_list = np.nan
-    
+
     try:
         calc_start_time = outcar.parse_dict["execution_datetime"]
     except:
         calc_start_time = np.nan
-    
+
     try:
         consumed_time = outcar.parse_dict["resources"]
     except:
         consumed_time = np.nan
-        
-    return pd.DataFrame([{
-        "calc_start_time": calc_start_time,
-        "consumed_time": consumed_time,
-        "structures": ionic_step_structures,
-        "energy": energies,
-        "energy_zero": energies_zero,
-        "forces": forces,
-        "stresses": stresses,
-        "magmoms": magmoms,
-        "scf_steps": scf_steps,
-        "scf_convergence": scf_conv_list
-    }])
+
+    return pd.DataFrame(
+        [
+            {
+                "calc_start_time": calc_start_time,
+                "consumed_time": consumed_time,
+                "structures": ionic_step_structures,
+                "energy": energies,
+                "energy_zero": energies_zero,
+                "forces": forces,
+                "stresses": stresses,
+                "magmoms": magmoms,
+                "scf_steps": scf_steps,
+                "scf_convergence": scf_conv_list,
+            }
+        ]
+    )
+
 
 def get_structure(directory):
     """
@@ -229,29 +300,37 @@ def get_structure(directory):
     Returns:
         pymatgen.core.Structure: The structure object if successful, None otherwise.
     """
-    structure_filenames = ["CONTCAR", "POSCAR"] + glob.glob(os.path.join(directory, "starter*.vasp"))
+    structure_filenames = ["CONTCAR", "POSCAR"] + glob.glob(
+        os.path.join(directory, "starter*.vasp")
+    )
 
     for filename in structure_filenames:
         try:
             return Structure.from_file(os.path.join(directory, filename))
         except Exception as e:
-            #print(f"Failed to parse structure file {filename}: {e}")
+            # print(f"Failed to parse structure file {filename}: {e}")
             pass
     print("Failed to parse appropriate structure file completely")
     return np.nan
 
+
 def get_vasp_outputs(directory, extract_error_dirs=True, parse_all_in_dir=True):
     df_direct_outputs = _get_vasp_outputs(directory, parse_all_in_dir=parse_all_in_dir)
-    df_error_outputs = process_error_archives(directory) if extract_error_dirs else pd.DataFrame()
+    df_error_outputs = (
+        process_error_archives(directory) if extract_error_dirs else pd.DataFrame()
+    )
     return pd.concat([df_direct_outputs, df_error_outputs])
 
-def grab_electron_info(directory_path, line_before_elec_str="PAW_PBE", potcar_filename="POTCAR"):
+
+def grab_electron_info(
+    directory_path, line_before_elec_str="PAW_PBE", potcar_filename="POTCAR"
+):
     structure = get_structure(directory_path)
     if structure:
         element_list, element_count = element_count_ordered(structure)
-        
+
     electron_of_potcar = []
-    with open(os.path.join(directory_path, potcar_filename), 'r') as file:
+    with open(os.path.join(directory_path, potcar_filename), "r") as file:
         lines = file.readlines()
         should_append = False
         for line in lines:
@@ -261,13 +340,19 @@ def grab_electron_info(directory_path, line_before_elec_str="PAW_PBE", potcar_fi
                 should_append = False
             if stripped_line.startswith(line_before_elec_str):
                 should_append = True
-        
+
     return element_list, element_count, electron_of_potcar
 
-def get_total_electron_count(directory_path, line_before_elec_str="PAW_PBE", potcar_filename="POTCAR"):
-    ele_list, ele_count, electron_of_potcar = grab_electron_info(directory_path, line_before_elec_str, potcar_filename)
+
+def get_total_electron_count(
+    directory_path, line_before_elec_str="PAW_PBE", potcar_filename="POTCAR"
+):
+    ele_list, ele_count, electron_of_potcar = grab_electron_info(
+        directory_path, line_before_elec_str, potcar_filename
+    )
     return np.dot(ele_count, electron_of_potcar)
 
+
 def element_count_ordered(structure):
     site_element_list = [site.species_string for site in structure]
     past_element = site_element_list[0]
@@ -283,10 +368,15 @@ def element_count_ordered(structure):
             count = 1
             past_element = element
     element_count.append(count)
-    return element_list, element_count 
+    return element_list, element_count
+
 
 def parse_vasp_directory(directory, extract_error_dirs=True, parse_all_in_dir=True):
-    df = get_vasp_outputs(directory, extract_error_dirs=extract_error_dirs, parse_all_in_dir=parse_all_in_dir)
+    df = get_vasp_outputs(
+        directory,
+        extract_error_dirs=extract_error_dirs,
+        parse_all_in_dir=parse_all_in_dir,
+    )
     results_df = []
     kpoints_list = []
     for _, row in df.iterrows():
@@ -296,9 +386,11 @@ def parse_vasp_directory(directory, extract_error_dirs=True, parse_all_in_dir=Tr
     results_df = pd.concat(results_df).sort_values(by="calc_start_time")
     results_df["KPOINTS"] = kpoints_list
     results_df["INCAR"] = df["INCAR"].tolist()
-    
+
     try:
-        element_list, element_count, electron_of_potcar = grab_electron_info(directory_path=directory, potcar_filename="POTCAR")
+        element_list, element_count, electron_of_potcar = grab_electron_info(
+            directory_path=directory, potcar_filename="POTCAR"
+        )
     except:
         element_list = np.nan
         element_count = np.nan
diff --git a/utils/vasp/resubmitter.py b/utils/vasp/resubmitter.py
index 104827f..e92691d 100644
--- a/utils/vasp/resubmitter.py
+++ b/utils/vasp/resubmitter.py
@@ -8,97 +8,163 @@
 from utils.generic import get_latest_file_iteration
 from utils.jobfile import jobfile
 
+
 def get_slurm_jobs_working_directories(username="hmai"):
-    command = f"squeue -u {username} -o \"%i %Z\""
+    command = f'squeue -u {username} -o "%i %Z"'
     result = subprocess.run(command, shell=True, capture_output=True, text=True)
     output_lines = result.stdout.strip().split("\n")[1:]  # Remove the header line
-    
+
     # Parse the output lines into a list of tuples (job_id, working_directory)
     data = [line.split() for line in output_lines]
-    
+
     # Create a Pandas DataFrame from the data
     df = pd.DataFrame(data, columns=["Job ID", "Working Directory"])
-    
+
     return df
 
-class CalculationConverger():
-    
-    def __init__(self, parent_dir, script_template_dir, max_submissions=1000, submission_command="sbatch", username="hmai"):
+
+class CalculationConverger:
+
+    def __init__(
+        self,
+        parent_dir,
+        script_template_dir,
+        max_submissions=1000,
+        submission_command="sbatch",
+        username="hmai",
+    ):
         self.parent_dir = parent_dir
         self.max_submissions = max_submissions
         self.submission_command = submission_command
-        self.vasp_dirs = find_vasp_directories(parent_dir, filenames=["INCAR", "POTCAR"], all_present=True, extract_tarballs=False)
+        self.vasp_dirs = find_vasp_directories(
+            parent_dir,
+            filenames=["INCAR", "POTCAR"],
+            all_present=True,
+            extract_tarballs=False,
+        )
         self.script_template_dir = script_template_dir
         self.user = username
 
     def submit_to_queue(self, dirpath, script_name):
         os.system(f"cd {dirpath} && {self.submission_command} {script_name}")
-        
-    def reconverge_all(self, calc_type="DRS", HPC="Setonix", VASP_version="5.4.4", CPU=128, walltime=24, cpu_per_node=128, from_dataframe_path=None):
+
+    def reconverge_all(
+        self,
+        calc_type="DRS",
+        HPC="Setonix",
+        VASP_version="5.4.4",
+        CPU=128,
+        walltime=24,
+        cpu_per_node=128,
+        from_dataframe_path=None,
+    ):
         non_converged = self.load_non_converged_paths(from_dataframe_path)
         running_jobs_df = get_slurm_jobs_working_directories(self.user)
         running_queued_job_directories = running_jobs_df["Working Directory"].to_list()
 
         dirs_to_search_next_time, leftover_calcs_exceeding_queue_limit = [], []
 
-        dirs_to_apply_reconverge = set(non_converged or self.vasp_dirs) - set(running_queued_job_directories)
+        dirs_to_apply_reconverge = set(non_converged or self.vasp_dirs) - set(
+            running_queued_job_directories
+        )
 
         for i, dir in enumerate(dirs_to_apply_reconverge):
             if not check_convergence(dir):
                 if i + len(running_queued_job_directories) > self.max_submissions:
                     leftover_calcs_exceeding_queue_limit.append(dir)
                 else:
-                    self.reconverge(dir, calc_type, HPC, VASP_version, CPU, walltime, cpu_per_node)
+                    self.reconverge(
+                        dir, calc_type, HPC, VASP_version, CPU, walltime, cpu_per_node
+                    )
                     dirs_to_search_next_time.append(dir)
             else:
                 print(f"CONVERGED: {dir}")
 
-        self.update_resubmit_log(dirs_to_search_next_time + running_queued_job_directories + leftover_calcs_exceeding_queue_limit)
+        self.update_resubmit_log(
+            dirs_to_search_next_time
+            + running_queued_job_directories
+            + leftover_calcs_exceeding_queue_limit
+        )
         return dirs_to_search_next_time
 
     def load_non_converged_paths(self, from_dataframe_path):
         if from_dataframe_path:
             df = pd.read_pickle(from_dataframe_path)
-            return [path.rstrip(os.sep + "OUTCAR") if path.endswith(os.sep + "OUTCAR") else path for path in df['filepath'].tolist()]
+            return [
+                (
+                    path.rstrip(os.sep + "OUTCAR")
+                    if path.endswith(os.sep + "OUTCAR")
+                    else path
+                )
+                for path in df["filepath"].tolist()
+            ]
         return self.reconverge_from_log_file()
-    
+
     def update_resubmit_log(self, dirs_to_search_next_time):
         with open(os.path.join(self.parent_dir, "resubmit.log"), "w") as log_file:
             for dir_path in dirs_to_search_next_time:
                 log_file.write(dir_path + "\n")
 
-    def reconverge(self, dirpath, calc_type="SDRS", HPC="Setonix", VASP_version="5.4.4", CPU=128, walltime=24, cpu_per_node=128):
+    def reconverge(
+        self,
+        dirpath,
+        calc_type="SDRS",
+        HPC="Setonix",
+        VASP_version="5.4.4",
+        CPU=128,
+        walltime=24,
+        cpu_per_node=128,
+    ):
         self.handle_error_run_files(dirpath)
         reconverge_methods = {
             "static": self.reconverge_static,
             "SDRS": self.reconverge_SDRS,
             "DRS": self.reconverge_DRS,
-            "base": self.reconverge_base
+            "base": self.reconverge_base,
         }
         reconverge_method = reconverge_methods.get(calc_type, self.reconverge_base)
         reconverge_method(dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node)
-    
+
     def handle_error_run_files(self, dirpath):
-        error_tar_files_exist = any("error" in f and "tar" in f for f in os.listdir(dirpath))
+        error_tar_files_exist = any(
+            "error" in f and "tar" in f for f in os.listdir(dirpath)
+        )
         if error_tar_files_exist:
             latest_error_run_index = self.find_latest_error_run_index(dirpath)
-            error_run_folder_path = os.path.join(dirpath, f"error_run_{latest_error_run_index + 1}")
+            error_run_folder_path = os.path.join(
+                dirpath, f"error_run_{latest_error_run_index + 1}"
+            )
             os.makedirs(error_run_folder_path)
             self.move_files_to_error_run_folder(dirpath, error_run_folder_path)
 
     def move_files_to_error_run_folder(self, dirpath, error_run_folder_path):
         for f in os.listdir(dirpath):
             if ("error" in f and "tar" in f) or f.endswith(".sh"):
-                shutil.move(os.path.join(dirpath, f), os.path.join(error_run_folder_path, f))
+                shutil.move(
+                    os.path.join(dirpath, f), os.path.join(error_run_folder_path, f)
+                )
 
         for og_file in ["INCAR.orig", "POSCAR.orig", "KPOINTS.orig", "custodian.json"]:
             if os.path.exists(os.path.join(dirpath, og_file)):
-                shutil.move(os.path.join(dirpath, og_file), os.path.join(error_run_folder_path, og_file))
-                
-        for current_run in ["INCAR", "POSCAR", "POTCAR", "OUTCAR", "vasprun.xml", "vasp.log"]:
+                shutil.move(
+                    os.path.join(dirpath, og_file),
+                    os.path.join(error_run_folder_path, og_file),
+                )
+
+        for current_run in [
+            "INCAR",
+            "POSCAR",
+            "POTCAR",
+            "OUTCAR",
+            "vasprun.xml",
+            "vasp.log",
+        ]:
             if os.path.exists(os.path.join(dirpath, current_run)):
-                shutil.copy(os.path.join(dirpath, current_run), os.path.join(error_run_folder_path, current_run))
-                
+                shutil.copy(
+                    os.path.join(dirpath, current_run),
+                    os.path.join(error_run_folder_path, current_run),
+                )
+
     def find_latest_error_run_index(self, dirpath):
         error_run_indices = [0]
         for f in os.listdir(dirpath):
@@ -109,24 +175,56 @@ def find_latest_error_run_index(self, dirpath):
                 except ValueError as e:
                     print(f"Exception occurred at {dirpath}: {e}")
         return max(error_run_indices)
-    
+
     def generate_custodian_string(self, template_filename, user_inputs):
         template_path = os.path.join(self.script_template_dir, template_filename)
         return jobfile._replace_fields(template_path, user_inputs)
 
     def reconverge_base(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node):
-        self.reconverge_generic(dirpath, "template_BASE.py", HPC, VASP_version, CPU, walltime, cpu_per_node)
+        self.reconverge_generic(
+            dirpath, "template_BASE.py", HPC, VASP_version, CPU, walltime, cpu_per_node
+        )
 
-    def reconverge_static(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node):
-        self.reconverge_generic(dirpath, "template_Static.py", HPC, VASP_version, CPU, walltime, cpu_per_node)
+    def reconverge_static(
+        self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node
+    ):
+        self.reconverge_generic(
+            dirpath,
+            "template_Static.py",
+            HPC,
+            VASP_version,
+            CPU,
+            walltime,
+            cpu_per_node,
+        )
 
     def reconverge_DRS(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node):
         stages_left = self.get_stages_left(dirpath, ["relax_1", "relax_2"], 3)
-        self.reconverge_generic(dirpath, "template_DRS.py", HPC, VASP_version, CPU, walltime, cpu_per_node, {"{STAGES_LEFT}": str(stages_left)})
+        self.reconverge_generic(
+            dirpath,
+            "template_DRS.py",
+            HPC,
+            VASP_version,
+            CPU,
+            walltime,
+            cpu_per_node,
+            {"{STAGES_LEFT}": str(stages_left)},
+        )
 
     def reconverge_SDRS(self, dirpath, HPC, VASP_version, CPU, walltime, cpu_per_node):
-        stages_left = self.get_stages_left(dirpath, ["static_1", "relax_1", "relax_2"], 4)
-        self.reconverge_generic(dirpath, "template_SDRS.py", HPC, VASP_version, CPU, walltime, cpu_per_node, {"{STAGES_LEFT}": str(stages_left)})
+        stages_left = self.get_stages_left(
+            dirpath, ["static_1", "relax_1", "relax_2"], 4
+        )
+        self.reconverge_generic(
+            dirpath,
+            "template_SDRS.py",
+            HPC,
+            VASP_version,
+            CPU,
+            walltime,
+            cpu_per_node,
+            {"{STAGES_LEFT}": str(stages_left)},
+        )
 
     def get_stages_left(self, dirpath, stage_markers, default_stages_left):
         for i, marker in enumerate(reversed(stage_markers)):
@@ -134,17 +232,41 @@ def get_stages_left(self, dirpath, stage_markers, default_stages_left):
                 return i + 1
         return default_stages_left
 
-    def reconverge_generic(self, dirpath, template_filename, HPC, VASP_version, CPU, walltime, cpu_per_node, extra_inputs=None):
+    def reconverge_generic(
+        self,
+        dirpath,
+        template_filename,
+        HPC,
+        VASP_version,
+        CPU,
+        walltime,
+        cpu_per_node,
+        extra_inputs=None,
+    ):
         user_inputs = {
-            '{VASPOUTPUTFILENAME}': '"vasp.log"',
-            '{MAXCUSTODIANERRORS}': "20"
+            "{VASPOUTPUTFILENAME}": '"vasp.log"',
+            "{MAXCUSTODIANERRORS}": "20",
         }
         if extra_inputs:
             user_inputs.update(extra_inputs)
 
-        custodian_string = self.generate_custodian_string(template_filename, user_inputs)
-        script_name = os.path.join(self.script_template_dir, f"{template_filename.split('_')[0]}_Custodian_{HPC}.sh")
-        job = jobfile(file_path=script_name, HPC=HPC, VASP_version=VASP_version, CPU=CPU, walltime=walltime, cpu_per_node=cpu_per_node, generic_insert_field=["{CUSTODIANSTRING}"], generic_insert=[custodian_string])
+        custodian_string = self.generate_custodian_string(
+            template_filename, user_inputs
+        )
+        script_name = os.path.join(
+            self.script_template_dir,
+            f"{template_filename.split('_')[0]}_Custodian_{HPC}.sh",
+        )
+        job = jobfile(
+            file_path=script_name,
+            HPC=HPC,
+            VASP_version=VASP_version,
+            CPU=CPU,
+            walltime=walltime,
+            cpu_per_node=cpu_per_node,
+            generic_insert_field=["{CUSTODIANSTRING}"],
+            generic_insert=[custodian_string],
+        )
         target_script_name = f"{os.path.basename(dirpath)}.sh"
         job.to_file(job_name=target_script_name, output_path=dirpath)
         self.submit_to_queue(dirpath, target_script_name)
@@ -156,7 +278,10 @@ def reconverge_from_log_file(self):
                 non_converged_dirs = [line.strip() for line in log_file.readlines()]
 
             largest_n = get_latest_file_iteration(self.parent_dir, "resubmit.log_")
-            os.rename(resubmit_log_file, os.path.join(self.parent_dir, f"resubmit.log_{largest_n + 1}"))
+            os.rename(
+                resubmit_log_file,
+                os.path.join(self.parent_dir, f"resubmit.log_{largest_n + 1}"),
+            )
 
             return non_converged_dirs
         else:
diff --git a/utils/vasp/vasp_potential_training_database.py b/utils/vasp/vasp_potential_training_database.py
index a4b6498..e69de29 100644
--- a/utils/vasp/vasp_potential_training_database.py
+++ b/utils/vasp/vasp_potential_training_database.py
@@ -1 +0,0 @@
-def 
\ No newline at end of file