diff --git a/.codespell/ignore_lines.txt b/.codespell/ignore_lines.txt new file mode 100644 index 0000000..2d86940 --- /dev/null +++ b/.codespell/ignore_lines.txt @@ -0,0 +1,22 @@ +;; Please include filenames and explanations for each ignored line. +;; See https://docs.openverse.org/meta/codespell.html for docs. + +;; .github/workflows/draft_releases.yml +;; The release-drafter/release-drafter configuration variable name cannot be changed + commitish: main + +;; frontend/src/locales/scripts/en.json5 +;; Prettier insists we escape a single quote rather than the double quotes and codespell +;; does not understand the escaped `\'t` as "couldn't". It instead just sees "couldn". + heading: 'We couldn\'t find anything for "{query}".', + +;; catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py +;; "Titel" matches "title", but the phrase is in Dutch, not English, so "titel" +;; is actually correct. Similarly, "als" incorrectly matches to "also" in the same +;; block of Dutch text. + "Identificatie Titel(s): Allegorie op kunstenaar Francesco Mazzoli, " + "bekend als Parmigianino" + +;; packages/js/eslint-plugin/configs/vue.ts +;; `te` gets matched with `the` and others +const i18nDestructureRules = ["t", "tc", "te", "td", "d", "n"].map( diff --git a/.codespell/ignore_words.txt b/.codespell/ignore_words.txt new file mode 100644 index 0000000..3ce2ff1 --- /dev/null +++ b/.codespell/ignore_words.txt @@ -0,0 +1,11 @@ +;; Please include explanations for each ignored word (lowercase) using full sentences. +;; See https://docs.openverse.org/meta/codespell.html for docs. + +;; `nd` is a chemical element. +nd + +;; `te` is a chemical element. +te + +;; `Indide` refers to the keywords in the tyvanchuk_crystal_2024 paper. +indide diff --git a/.flake8 b/.flake8 index 5536ba2..e3c9707 100644 --- a/.flake8 +++ b/.flake8 @@ -5,7 +5,7 @@ exclude = build, dist, doc/source/conf.py -max-line-length = 115 +max-line-length = 90 # https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#labels-why-pycodestyle-warnings extend-ignore = E203 diff --git a/README.md b/README.md index d7ea5ba..a4b0bb0 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ variables for handling large datasets, on the order of tens of thousands, of `cifkit` provides higher-level functions in just a few lines of code. -- **Coordination geometry** - `cifkit` provides fuctions for visualing +- **Coordination geometry** - `cifkit` provides functions for visualing coordination geometry from each site and extracts physics-based features like volume and packing efficiency in each polyhedron. - **Atomic mixing** - `cifkit` extracts atomic mixing information at the bond @@ -79,7 +79,7 @@ ensemble.filter_by_structures(["Co1.75Ge", "CoIn2"]) ensemble.filter_by_structures("CeAl2Ga2") ``` -To learn more, please read the official documention here: +To learn more, please read the official documentation here: https://bobleesj.github.io/cifkit. ## Quotes @@ -122,3 +122,10 @@ Here is how you can contribute to the `cifkit` project if you found it helpful: - If you have any suggestions or need further clarification on how to use `cifkit`, please reach out to Bob Lee ([@bobleesj](https://github.com/bobleesj)). + +## To render documentation + +```bash +pip install -r requirements/docs.txt +mkdocs serve +``` diff --git a/docs/index.md b/docs/index.md index ccdeb9c..dc7b06d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -29,7 +29,7 @@ mixing, among other parameters. `cifkit` provides higher-level functions in just a few lines of code. -- **Coordination geometry** - `cifkit` provides fuctions for visualing +- **Coordination geometry** - `cifkit` provides functions for visualing coordination geometry from each site and extracts physics-based features like volume and packing efficiency in each polyhedron. - **Atomic mixing** - `cifkit` extracts atomic mixing information at the bond diff --git a/docs/notebooks/01_cif.ipynb b/docs/notebooks/01_cif.ipynb index bc2134f..8b9b767 100644 --- a/docs/notebooks/01_cif.ipynb +++ b/docs/notebooks/01_cif.ipynb @@ -45,7 +45,7 @@ "from cifkit import Example\n", "from cifkit import Cif\n", "\n", - "# Initalize with the example file provided\n", + "# Initialize with the example file provided\n", "cif = Cif(Example.Er10Co9In20_file_path)\n", "\n", "# Print attributes\n", @@ -60,7 +60,7 @@ "source": [ "## Get instant properties - parsed information\n", "\n", - "The `Cif` class provides a set of accessible properties that can be accessed. Each object is intialized with the `file_path` to the `.cif` file." + "The `Cif` class provides a set of accessible properties that can be accessed. Each object is initialized with the `file_path` to the `.cif` file." ] }, { @@ -83,7 +83,7 @@ "print(\"Formula:\", cif.formula)\n", "print(\"Structure:\", cif.structure)\n", "print(\"Unique elements:\", cif.unique_elements)\n", - "print(\"Unitcell lenghts:\", cif.unitcell_lengths)\n", + "print(\"Unitcell lengths:\", cif.unitcell_lengths)\n", "print(\"Unitcell angles:\", cif.unitcell_angles)\n", "print(\"Site labels:\", cif.site_labels)\n", "print(\"Weight:\", cif.weight)\n", @@ -102,7 +102,7 @@ "source": [ "### How tag is parsed\n", "\n", - "Tag is parsed from the thrid line of each `.cif` file. Some databases such as Pearson's Crystal Data offers tags for each file.\n", + "Tag is parsed from the third line of each `.cif` file. Some databases such as Pearson's Crystal Data offers tags for each file.\n", "\n" ] }, diff --git a/mkdocs.yml b/mkdocs.yml index ad26fd0..6eeff87 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -61,4 +61,5 @@ plugins: options: show_source: false show_root_heading: true # Show module names as headings + docstring_style: numpy - mkdocs-jupyter diff --git a/src/cifkit/coordination/bond_distance.py b/src/cifkit/coordination/bond_distance.py index 5ddfe7c..8b7a630 100644 --- a/src/cifkit/coordination/bond_distance.py +++ b/src/cifkit/coordination/bond_distance.py @@ -1,9 +1,7 @@ def get_shortest_distance_per_bond_pair( flattened_connections: list[tuple[tuple[str, str], float]], ) -> dict[tuple[str, str], float]: - """ - Determine the minimum distance for all possible unique pair of elements. - """ + """Determine the min distance for all possible unique pair of elements.""" # Initialize the dictionary with a specific type min_dist_per_element_pair: dict[tuple[str, str], float] = {} diff --git a/src/cifkit/coordination/composition.py b/src/cifkit/coordination/composition.py index 3c04d00..6665a99 100644 --- a/src/cifkit/coordination/composition.py +++ b/src/cifkit/coordination/composition.py @@ -9,9 +9,7 @@ def get_bond_counts( connections: dict[str, list], sorted_by_mendeleev=False, ) -> dict: - """ - Return a dictionary containing bond pairs and counts per label site. - """ + """Return a dictionary containing bond pairs and counts per label site.""" if sorted_by_mendeleev: bond_pairs = bond_pair.get_pairs_sorted_by_mendeleev(elements) else: @@ -58,9 +56,7 @@ def get_bond_counts( def get_bond_fractions(bond_pair_data: dict) -> dict[tuple[str, str], float]: - """ - Calculate the fraction of each bond type across all labels. - """ + """Calculate the fraction of each bond type across all labels.""" total_bond_counts: dict[tuple[str, str], float] = {} total_bonds = 0 @@ -83,9 +79,7 @@ def get_bond_fractions(bond_pair_data: dict) -> dict[tuple[str, str], float]: def count_connections_per_site(connections: dict) -> dict[str, int]: - """ - Calculate the coordination number for each atom site. - """ + """Calculate the coordination number for each atom site.""" neighbor_count = {} for label, connection_data in connections.items(): neighbor_count[label] = len(connection_data) @@ -94,9 +88,7 @@ def count_connections_per_site(connections: dict) -> dict[str, int]: def compute_avg_CN(connections: dict[str, int]) -> float: - """ - Calculate the average coordination number across all sites. - """ + """Calculate the average coordination number across all sites.""" coordination_numbers = count_connections_per_site(connections) total = 0 for _, value in coordination_numbers.items(): @@ -105,9 +97,7 @@ def compute_avg_CN(connections: dict[str, int]) -> float: def get_unique_CN_values(connections: dict) -> set[int]: - """ - Return unique coordination numbers from all sites. - """ + """Return unique coordination numbers from all sites.""" coordination_numbers = count_connections_per_site(connections) unique_numbers = set(coordination_numbers.values()) return unique_numbers diff --git a/src/cifkit/coordination/connection.py b/src/cifkit/coordination/connection.py index cc729bf..e82fd02 100644 --- a/src/cifkit/coordination/connection.py +++ b/src/cifkit/coordination/connection.py @@ -1,10 +1,8 @@ def get_CN_connections_by_best_methods( - best_methods, conncetions: dict + best_methods, connections: dict ) -> dict: - """ - Retrieve connections limited by the number of vertices (CN_value) - for each label. - """ + """Retrieve connections limited by the number of vertices (CN) for each + label.""" CN_connections = {} for label, data in best_methods.items(): @@ -12,6 +10,6 @@ def get_CN_connections_by_best_methods( "number_of_vertices" ] # Extract the limit for the number of vertices # Limit the connections for this label using CN_value - CN_connections[label] = conncetions[label][:CN_value] + CN_connections[label] = connections[label][:CN_value] return CN_connections diff --git a/src/cifkit/coordination/filter.py b/src/cifkit/coordination/filter.py index 3b08501..e3b2789 100644 --- a/src/cifkit/coordination/filter.py +++ b/src/cifkit/coordination/filter.py @@ -4,11 +4,8 @@ def find_best_polyhedron(max_gaps_per_label, connections): - """ - Find the best polyhedron for each label based on the minimum - distance between the reference atom to the average position of - connected atoms. - """ + """Find the best polyhedron for each label based on the minimum distance + between the reference atom to the average position of connected atoms.""" best_polyhedrons = {} for label, CN_data_per_method in max_gaps_per_label.items(): @@ -38,16 +35,17 @@ def find_best_polyhedron(max_gaps_per_label, connections): except Exception: print( - f"Error in determining polyhedron for {label} using {method} - skipped" + f"Error in polyhedron calculation for" + f"{label} using {method} - Skip" ) continue # Move to the next method - # Returns non if ther eis any error + # Returns non if there is any error polyhedron_metrics = compute_polyhedron_metrics( polyhedron_points, hull ) - # If there is no metrics, then skip the mthod + # If there is no metrics, then skip the method if polyhedron_metrics is None: continue diff --git a/src/cifkit/coordination/geometry.py b/src/cifkit/coordination/geometry.py index 4bf2303..b788069 100644 --- a/src/cifkit/coordination/geometry.py +++ b/src/cifkit/coordination/geometry.py @@ -6,9 +6,10 @@ def get_polyhedron_coordinates_labels( connections: dict, label: str ) -> tuple[list[list[float]], list[str]]: - """ - Return a list of Cartesian coordinates and labels. The central atom is - the last index. + """Return a list of Cartesian coordinates and labels. + + The central atom is the last index. + """ conn_data = connections[label] polyhedron_points = [conn[3] for conn in conn_data] @@ -24,9 +25,7 @@ def get_polyhedron_coordinates_labels( def compute_polyhedron_metrics(polyhedron_points, hull): - """ - Compute various metrics related to a given polyhedron. - """ + """Compute various metrics related to a given polyhedron.""" try: central_atom_coord = np.array(polyhedron_points[-1]) @@ -98,10 +97,8 @@ def compute_polyhedron_metrics(polyhedron_points, hull): def compute_center_of_mass_and_distance( polyhedron_points, hull, central_atom_coord ): - """ - Calculate the center of mass of a polyhedron and the distance - from the center of mass to a given point. - """ + """Calculate the center of mass of a polyhedron and the distance from the + center of mass to a given point.""" center_of_mass = np.mean(polyhedron_points[hull.vertices, :], axis=0) vector_to_center_of_mass = center_of_mass - central_atom_coord distance_to_center = np.linalg.norm(vector_to_center_of_mass) diff --git a/src/cifkit/coordination/method.py b/src/cifkit/coordination/method.py index 9cad77b..49255b6 100644 --- a/src/cifkit/coordination/method.py +++ b/src/cifkit/coordination/method.py @@ -64,7 +64,7 @@ def compute_CN_max_gap_per_site( norm_dist_by_CIF_radius_sum = compute_normalized_value( pair_dist, CIF_radius_sum_norm_value ) - norm_dist_by_CIF_radius_refined_sum = compute_normalized_value( + norm_dist_by_CIF_rad_ref_sum = compute_normalized_value( pair_dist, CIF_radius_sum_refined_norm_value ) norm_dist_by_Pauling_radius_sum = compute_normalized_value( @@ -80,7 +80,7 @@ def compute_CN_max_gap_per_site( distances = { "dist_by_shortest_dist": norm_dist_by_min_dist, "dist_by_CIF_radius_sum": norm_dist_by_CIF_radius_sum, - "dist_by_CIF_radius_refined_sum": norm_dist_by_CIF_radius_refined_sum, + "dist_by_CIF_radius_refined_sum": norm_dist_by_CIF_rad_ref_sum, "dist_by_Pauling_radius_sum": norm_dist_by_Pauling_radius_sum, } else: @@ -118,10 +118,8 @@ def compute_normalized_value(number: float, ref_number: float) -> float: def get_rad_sum_value( rad_sum_data, method_name: str, ref_label: str, other_label: str ) -> float: - """ - Return the sum of radii value for a given pair of elements, - ensuring the pair is alphabetically sorted. - """ + """Return the sum of radii value for a given pair of elements, ensuring the + pair is alphabetically sorted.""" # Extract the element types from the labels ref_element = get_atom_type_from_label(ref_label) diff --git a/src/cifkit/coordination/site.py b/src/cifkit/coordination/site.py index 55899fd..eb4e983 100644 --- a/src/cifkit/coordination/site.py +++ b/src/cifkit/coordination/site.py @@ -4,10 +4,8 @@ def get_min_distance_pair( connections: dict, ) -> tuple[tuple[str, str], float]: - """ - Return an alphabetically sorted element pair with the global minimum - distance in the entire supercell. - """ + """Return an alphabetically sorted element pair with the global minimum + distance in the entire supercell.""" sorted_tuples = get_min_distance_pair_per_site_label(connections) min_dist_tuple = sorted_tuples[0] return min_dist_tuple @@ -16,10 +14,8 @@ def get_min_distance_pair( def get_min_distance_pair_per_site_label( connections: dict, ) -> list[tuple[tuple[str, str], float]]: - """ - Return a list of tuples containing element pairs - and the minimum distance from each site label in the loop. - """ + """Return a list of tuples containing element pairs and the minimum distance + from each site label in the loop.""" element_pairs = [] # Iterate over each pair and their list of distances for ref_label, pair_data in connections.items(): diff --git a/src/cifkit/coordination/site_distance.py b/src/cifkit/coordination/site_distance.py index 154357b..2c73e7c 100644 --- a/src/cifkit/coordination/site_distance.py +++ b/src/cifkit/coordination/site_distance.py @@ -1,7 +1,5 @@ def get_shortest_distance(connections: dict) -> float: - """ - Return the shortest distance in the supercell. - """ + """Return the shortest distance in the supercell.""" min_dist = float("inf") # Iterate over each site's connections in the dictionary @@ -16,9 +14,7 @@ def get_shortest_distance(connections: dict) -> float: def get_shortest_distance_per_site( connections: dict, ) -> dict[str, tuple[str, float]]: - """ - Calculate the shortest distance for each label. - """ + """Calculate the shortest distance for each label.""" shortest_dist_info: dict[str, tuple[str, float]] = {} for label, connections in connections.items(): diff --git a/src/cifkit/data/example.py b/src/cifkit/data/example.py index 3ed04e1..6bfd3b4 100644 --- a/src/cifkit/data/example.py +++ b/src/cifkit/data/example.py @@ -3,9 +3,13 @@ class Example(str, Enum): + # Define 'get_path' as a static method + @staticmethod def get_path(*args): + # Use '__file__' to get the directory of the current file and join paths return os.path.join(os.path.dirname(__file__), *args) + # Now use 'get_path' to define paths relative to this file ErCoIn_folder_path = get_path("ErCoIn") ErCoIn_big_folder_path = get_path("ErCoIn_big") Er10Co9In20_file_path = get_path("ErCoIn", "Er10Co9In20.cif") diff --git a/src/cifkit/data/mendeleeve_handler.py b/src/cifkit/data/mendeleeve_handler.py index 20501a2..c80a67d 100644 --- a/src/cifkit/data/mendeleeve_handler.py +++ b/src/cifkit/data/mendeleeve_handler.py @@ -5,9 +5,10 @@ def get_mendeleev_nums_from_pair_tuple( label_pair_tuple: tuple[str, str], ) -> tuple[int, int]: - """ - Parse Mendeleev number for each label in the tuple. + """Parse Mendeleev number for each label in the tuple. + If no number is found, default to 0 for that element. + """ # Parse the first and second elements first_element = string_parser.get_atom_type_from_label(label_pair_tuple[0]) diff --git a/src/cifkit/data/radius.py b/src/cifkit/data/radius.py index 511c26c..9275a2c 100644 --- a/src/cifkit/data/radius.py +++ b/src/cifkit/data/radius.py @@ -1,7 +1,5 @@ def get_radius_data() -> dict: - """ - Return a dictionary of element radii data. - """ + """Return a dictionary of element radii data.""" rad_data = { "Si": [1.176, 1.316], "Sc": [1.641, 1.620], diff --git a/src/cifkit/data/radius_handler.py b/src/cifkit/data/radius_handler.py index 0491722..483cb56 100644 --- a/src/cifkit/data/radius_handler.py +++ b/src/cifkit/data/radius_handler.py @@ -2,13 +2,12 @@ from cifkit.data.radius import get_radius_data from cifkit.data.radius_optimization import get_refined_CIF_radius +from cifkit.utils.unit import round_dict_values def get_is_radius_data_available(elements: list[str]) -> bool: - """ - Check if both CIF and Pauling radius data are available - for each element in the list. - """ + """Check if both CIF and Pauling radius data are available for each element in + the list.""" data = get_radius_data() for element in elements: element_data = data.get(element, {}) @@ -21,9 +20,7 @@ def get_is_radius_data_available(elements: list[str]) -> bool: def get_CIF_pauling_radius(elements: list[str]) -> dict: - """ - Return CIF and Pualing data for a list of elements - """ + """Return CIF and Pualing data for a list of elements.""" data = get_radius_data() radii = {} for atom in elements: @@ -35,10 +32,10 @@ def get_CIF_pauling_radius(elements: list[str]) -> dict: return radii -def get_radius_values_per_element(elements, shortest_bond_distances) -> dict: - """ - Merge CIF and Pauling radius data with CIF refined radius data. - """ +def get_radius_values_per_element( + elements: list[str], shortest_bond_distances +) -> dict: + """Merge CIF and Pauling radius data with CIF refined radius data.""" is_radius_data_available = get_is_radius_data_available(elements) if not is_radius_data_available: @@ -59,13 +56,13 @@ def get_radius_values_per_element(elements, shortest_bond_distances) -> dict: ], } - return combined_radii + return round_dict_values(combined_radii) -def compute_radius_sum(radius_values, is_radius_data_available: bool): - """ - Compute the sum of two radii. - """ +def compute_radius_sum( + radius_values: dict[str : dict[str:float]], is_radius_data_available: bool +): + """Compute the sum of two radii.""" if not is_radius_data_available: return None diff --git a/src/cifkit/data/radius_optimization.py b/src/cifkit/data/radius_optimization.py index 26bfc98..cf9be11 100644 --- a/src/cifkit/data/radius_optimization.py +++ b/src/cifkit/data/radius_optimization.py @@ -9,10 +9,8 @@ def generate_adjacent_pairs( elements: list[str], ) -> list[tuple[str, str]]: - """ - Generate a list of tuples, where each tuple is - a pair of adjacent atom labels. - """ + """Generate a list of tuples, where each tuple is a pair of adjacent atom + labels.""" # Binary -> [('In', 'Rh')] # Ternary -> [('In', 'Rh'), ('Rh', 'U')] @@ -23,19 +21,15 @@ def generate_adjacent_pairs( def objective(params, original_radii: list[float]) -> list[float]: - """ - Calculate the objective function value,which is the sum of - squared percent differences between original and refined radii. - """ + """Calculate the objective function value,which is the sum of squared percent + differences between original and refined radii.""" return np.sum(((original_radii - params) / original_radii) ** 2) def constraint(params, index_pair: tuple[int, int], shortest_distance: dict): - """ - Enforce that the sum of the radii of the pair does not - exceed the shortest allowed distance between them. - """ + """Enforce that the sum of the radii of the pair does not exceed the shortest + allowed distance between them.""" i, j = index_pair i, j = index_pair return shortest_distance - (params[i] + params[j]) @@ -44,10 +38,8 @@ def constraint(params, index_pair: tuple[int, int], shortest_distance: dict): def get_refined_CIF_radius( elements: list[str], shortest_distances: dict ) -> dict[str, float]: - """ - Optimize CIF radii given atom labels and their - shortest pair distance constraints. - """ + """Optimize CIF radii given atom labels and their shortest pair distance + constraints.""" sorted_elements = sorted(elements) radii_data = get_radius_data() original_radii = np.array( diff --git a/src/cifkit/figures/histogram.py b/src/cifkit/figures/histogram.py index c563d8f..25d0177 100644 --- a/src/cifkit/figures/histogram.py +++ b/src/cifkit/figures/histogram.py @@ -1,6 +1,4 @@ -""" -Histgoram for supercell size, minimum distances -""" +"""Histgoram for supercell size, minimum distances.""" import os @@ -137,14 +135,12 @@ def plot_histogram(attribute, stats, dir_path, display, output_dir): histogram["data"], histogram["settings"], display, output_dir ) - # Make a deafult folder if the output folder is not provided= + # Make a default folder if the output folder is not provided def generate_histogram(data, settings, display, output_dir: str) -> None: - """ - Generate a histogram from a dictionary of data and save - it to a specified directory. - """ + """Generate a histogram from a dictionary of data and save it to a specified + directory.""" plt.figure(figsize=(10, 6)) # Create a new figure for each histogram diff --git a/src/cifkit/figures/polyhedron.py b/src/cifkit/figures/polyhedron.py index 41b6ae8..33a02ac 100644 --- a/src/cifkit/figures/polyhedron.py +++ b/src/cifkit/figures/polyhedron.py @@ -42,9 +42,7 @@ def plot( is_displayed, output_dir=None, ): - """ - Generate and save a 3D plot of a molecular structure. - """ + """Generate and save a 3D plot of a molecular structure.""" plotter = pv.Plotter(off_screen=not is_displayed, window_size=(1600, 1200)) label_colors = generate_color_mapping(vertex_labels) @@ -56,7 +54,11 @@ def plot( coordination_number = len(points) - 1 # Title - title = f"Formula: {formula}, Central atom: {central_atom_label}, CN: {coordination_number},\n{file_path}" + title = ( + f"Formula: {formula}, Central atom: {central_atom_label}, " + f"CN: {coordination_number},\n{file_path}" + ) + plotter.add_title(title, font="arial") # Constructing title and subtitle @@ -124,7 +126,6 @@ def plot( plotter.add_mesh(poly_data, color="aqua", opacity=0.5, show_edges=True) plotter.show() - """ Output """ @@ -145,7 +146,6 @@ def plot( + ".png" ) save_path = os.path.join(output_dir, plot_filename) - """ Save """ diff --git a/src/cifkit/models/cif.py b/src/cifkit/models/cif.py index e015a8f..7352431 100644 --- a/src/cifkit/models/cif.py +++ b/src/cifkit/models/cif.py @@ -59,8 +59,9 @@ # Identify .cif database source from cifkit.utils.cif_sourcer import get_cif_db_source + +# Utility from cifkit.utils.log_messages import CifLog -from cifkit.utils.unit import round_dict_values def ensure_connections(func): @@ -85,29 +86,104 @@ class Cif: def __init__( self, file_path: str, is_formatted=False, logging_enabled=False ) -> None: - """_summary_ + """Initializes an object from a .cif file. Parameters ---------- file_path : str - _description_ + Path to the .cif file. is_formatted : bool, optional - _description_, by default False + If False, preprocess the .cif file to ensure compatibility with the + gemmi library. Default is False. logging_enabled : bool, optional - _description_, by default False + Enables detailed logging during initialization and for distance + calculations. Default is False. + + Attributes + ---------- + file_path : str + Path to the CIF file from which data is loaded. + logging_enabled : bool + Enables detailed logging for initialization and distance + alculations if set to True. + file_name : str + Base name of the CIF file, extracted from `file_path`. + file_name_without_ext : str + File name without its extension, useful for referencing or + generating derivative files. + db_source : str + Source database (e.g., ICSD, MP, CCDC, PCD) from which the CIF file + originates, determined at runtime. + unitcell_lengths : list[float] + List of unit cell lengths for the crystal structure, typically in + Angstroms. + unitcell_angles : list[float] + List of unit cell angles in radians, ordered by alpha, beta, gamma. + site_labels : list[str] + Lists all unique atomic site labels. + unique_elements : set[str] + Set of unique chemical elements present in the CIF file. + atom_site_info : dict[str, any] + Dictionary containing detailed information about each atomic site + including element, site occupancy, + fractional coordinates, symmetry, and multiplicity. + composition_type : int + Number of unique elements present in the .cif file, e.g., 1 for + unary, 2 for binary, etc. + tag : str + Additional tag associated with the CIF data, parsed from the third + line of PCD .cif files. + bond_pairs : set[tuple[str, str]] + Set of tuples representing bonded pairs of elements. + site_label_pairs : set[tuple[str, str]] + Set of tuples representing pairs of atomic site labels. + bond_pairs_sorted_by_mendeleev : set[tuple[str, str]] + Set of bonded pairs sorted according to Mendeleev Numbers. + site_label_pairs_sorted_by_mendeleev : set[tuple[str, str]] + Set of site label pairs sorted by Mendeleev Numbers. + site_mixing_type : str + Descriptor of the mixing type, categorized into four types: + deficiency_atomic_mixing, + full_occupancy_atomic_mixing, deficiency_without_atomic_mixing, + full_occupancy. + is_radius_data_available : bool + Indicates whether Pauling and CIF atomic radii are available for + all elements in the .cif file. + mixing_info_per_label_pair : dict + Dictionary mapping pairs of labels to their mixing information. + mixing_info_per_label_pair_sorted_by_mendeleev : dict + Same as `mixing_info_per_label_pair`, but sorted according to + Mendeleev numbers. + unitcell_points : list[list[tuple[float, float, float, str]]] + List of points defining the unit cell; each point contains + fractional coordinates and a site label. + supercell_points : list[list[tuple[float, float, float, str]]] + List of points defining the supercell of the cell, with + translations of ±1, ±1, ±1 from the unit cell. + unitcell_atom_count : int + Total count of atoms within the unit cell. + supercell_atom_count : int + Total count of atoms within the generated supercell + incorporating ±1, ±1, ±1 translations. + connections : None or dict + Initially None, intended to store connection data related to + the crystal structure. Connections are computed lazily and are + only calculated when first needed by a method or property requiring them. """ self.file_path = file_path self.logging_enabled = logging_enabled - """Initialize the Cif object with the file path.""" + # Initialize the Cif object with the file path. self.file_name = os.path.basename(file_path) self.file_name_without_ext = os.path.splitext(self.file_name)[0] self.db_source = get_cif_db_source(self.file_path) - self.connections = None # Private attribute to store connections + + # Private attribute to store connections + self.connections = None self._shortest_pair_distance = None - # If it is not previously formatted + # Pre-process if .cif has not been formatted if not is_formatted: self._preprocess() @@ -181,9 +257,6 @@ def _generate_supercell(self) -> None: This method calculates the supercell points and atom counts based on the unit cell data. It uses the `get_supercell_points` and `get_cell_atom_count` functions to perform the calculations. - - Returns: - None """ # Method implementation goes here self.unitcell_points = get_supercell_points(self._block, 1) @@ -191,11 +264,18 @@ def _generate_supercell(self) -> None: self.unitcell_atom_count = get_cell_atom_count(self.unitcell_points) self.supercell_atom_count = get_cell_atom_count(self.supercell_points) - def compute_connections(self, cutoff_radius=10.0): - """_summary_ + def compute_connections(self, cutoff_radius=10.0) -> None: + """Computes various connection parameters for the crystal structure, + including connection network, shortest distances, bond counts, and + coordination numbers (CN). These prperties are lazily loaded to avoid + unnecessary computation during the initialization and pre-processing + step. - Args: - cutoff_radius (float, optional): _description_. Defaults to 10.0. + Parameters + ---------- + cutoff_radius : float, optional + The distance threshold in Angstroms used to consider two atoms as connected, + by default 10.0 """ self._log_info(CifLog.COMPUTE_CONNECTIONS.value) self.connections = get_site_connections( @@ -209,10 +289,7 @@ def compute_connections(self, cutoff_radius=10.0): cutoff_radius=cutoff_radius, ) - # Flattened coordinations self._connections_flattened = flat_site_connections(self.connections) - - # Shortest distance self._shortest_distance = get_shortest_distance(self.connections) # Shortest distance per bond pair @@ -226,11 +303,10 @@ def compute_connections(self, cutoff_radius=10.0): ) # Parse individual radii per element - self._radius_values = round_dict_values( - get_radius_values_per_element( - self.unique_elements, self.shortest_bond_pair_distance - ) + self._radius_values = get_radius_values_per_element( + self.unique_elements, self.shortest_bond_pair_distance ) + self._radius_sum = compute_radius_sum( self.radius_values, self.is_radius_data_available ) @@ -341,23 +417,90 @@ def compute_connections(self, cutoff_radius=10.0): @property @ensure_connections def shortest_distance(self): - """Property that checks if connections are computed and computes.""" + """Lazily retrieve the shortest atomic distance within the crystal + structure. This property is lazily loaded and ensures all necessary + connections are computed beforehand using the `@ensure_connections` + decorator. The computation calculates the minimum distance between any + pairs of atoms based on the connection data. + + Returns + ------- + float + The shortest distance between any two connected atoms in the + crystal structure, in Angstroms. + """ return self._shortest_distance @property @ensure_connections def connections_flattened(self): - """Property that combine site connections into a single array.""" + """Transform site connections into a sorted list of tuples, each + containing a pair of alphabetically sorted element symbols and the + distance between them. + + Returns + ------- + list[tuple[tuple[str, str], float]] + A sorted list of tuples, each containing a pair of alphabetically + sorted element symbols and the distance between them. + + Examples + -------- + >>> cif = Cif("path/to/cif/file.cif")) + >>> cif.connections_flattened + [(("In", "Rh"), 2.697), (("In", "Rh"), 2.697)] + """ return self._connections_flattened @property @ensure_connections def shortest_bond_pair_distance(self): + """Determine the minimum distance for all possible unique pair of + elements. This property uses lazily loaded connections to compute the + distance if they are not already available. + + Returns + ------- + dict[tuple[str, str], float] + + Examples + -------- + >>> cif.shortest_bond_pair_distance + >>> { + ("In", "In"): 3.244, + ("In", "Rh"): 2.697, + ("In", "U"): 3.21, + ("Rh", "Rh"): 3.881, + ("Rh", "U"): 2.983, + ("U", "U"): 3.881, + } + """ return self._shortest_bond_pair_distance @property @ensure_connections def shortest_site_pair_distance(self): + """Retrieves the shortest distance from each unique atomic site in the + crystal structure. This property uses lazily loaded connections to + compute these distances if they are not already available. + + Returns + ------- + dict[str, tuple[str, float]] + dictionary where each key is an atomic label and the value is a + tuple containing the label of the closest atomic site and the + shortest distance to it in Angstroms + + Examples + -------- + >>> cif.shortest_site_pair_distance + >>> { + "In1": ("Rh2", 2.697), + "Rh1": ("In1", 2.852), + "Rh2": ("In1", 2.697), + "U1": ("Rh1", 2.984), + } + """ return self._shortest_site_pair_distance @property @@ -498,17 +641,26 @@ def get_polyhedron_labels_by_CN_best_methods( @ensure_connections def plot_polyhedron( - self, site_label, show_labels=True, is_displayed=False, output_dir=None + self, + site_label: str, + show_labels=True, + is_displayed=False, + output_dir=None, ) -> None: - """ - Plots a polyhedron structure and optionally saves it. + """Function to plot a polyhedron structure and optionally saves it. - Args: - site_label (str): Central site label for the polyhedron. - show_labels (bool, optional): Whether to display vertex labels. Defaults to True. - is_displayed (bool, optional): Display plot interactively. Defaults to False. - output_dir (str, optional): Directory to save the plot. Defaults to None. + Parameters + ---------- + site_label : str + Central site label for the polyhedron + show_labels : bool, optional + Whether to display vertex labels, by default True + is_displayed : bool, optional + Display plot interactively, by default False + output_dir : str, optional + Directory to save the plot, by default None """ + coords, vertex_labels = get_polyhedron_coordinates_labels( self.CN_connections_by_best_methods, site_label ) diff --git a/src/cifkit/models/cif_ensemble.py b/src/cifkit/models/cif_ensemble.py index 47e379e..702535f 100644 --- a/src/cifkit/models/cif_ensemble.py +++ b/src/cifkit/models/cif_ensemble.py @@ -125,10 +125,8 @@ def CN_unique_values_by_best_methods(self) -> set[str]: ) def _attribute_stats(self, attribute_name, transform=None): - """ - Helper method to compute the count of each unique value of a given - attribute across all Cif objects. - """ + """Helper method to compute the count of each unique value of a given + attribute across all Cif objects.""" values = [ ( transform(getattr(cif, attribute_name)) diff --git a/src/cifkit/occupancy/mixing.py b/src/cifkit/occupancy/mixing.py index 0148d23..86a77a9 100644 --- a/src/cifkit/occupancy/mixing.py +++ b/src/cifkit/occupancy/mixing.py @@ -2,9 +2,7 @@ def frac_coordinates(atom_site_info: dict, label: str) -> tuple[str, str, str]: - """ - Return a tuple of fractional coordinates - """ + """Return a tuple of fractional coordinates.""" x_frac = atom_site_info[label]["x_frac_coord"] y_frac = atom_site_info[label]["y_frac_coord"] z_frac = atom_site_info[label]["z_frac_coord"] @@ -14,9 +12,7 @@ def frac_coordinates(atom_site_info: dict, label: str) -> tuple[str, str, str]: def compute_coord_occupancy_sum( site_labels: list[str], atom_site_info: dict ) -> dict[tuple[str, str, str], float]: - """ - Compute sum of occupancy per each coordinate - """ + """Compute sum of occupancy per each coordinate.""" coord_occupancy_sum: dict[tuple[str, str, str], float] = {} for label in site_labels: occupancy = round( @@ -31,9 +27,7 @@ def compute_coord_occupancy_sum( def get_site_mixing_type(site_labels: list[str], atom_site_info: dict) -> str: - """ - Get file-level atomic site mixing info. - """ + """Get file-level atomic site mixing info.""" is_full_occupancy = True coord_occupancy_sum = compute_coord_occupancy_sum( @@ -67,9 +61,7 @@ def get_site_mixing_type(site_labels: list[str], atom_site_info: dict) -> str: def get_mixing_type_per_pair_dict( site_labels: list[str], label_pairs: list[str], atom_site_info: dict ): - """ - Return a dictionary, alphabetically sorted pair - """ + """Return a dictionary, alphabetically sorted pair.""" coord_occupancy_sum = compute_coord_occupancy_sum( site_labels, atom_site_info ) diff --git a/src/cifkit/preprocessors/environment.py b/src/cifkit/preprocessors/environment.py index 8065f63..12c0379 100644 --- a/src/cifkit/preprocessors/environment.py +++ b/src/cifkit/preprocessors/environment.py @@ -9,9 +9,7 @@ def get_site_connections( supercell_points, cutoff_radius: float, ) -> dict: - """ - Compute all pair distances per site label. - """ + """Compute all pair distances per site label.""" labels, lengths, angles = parsed_data all_labels_connections = {} @@ -101,10 +99,8 @@ def get_nearest_dists_per_site( def get_most_connected_point_per_site( label: str, dist_dict: dict, dist_set: set ): - """ - Identify the reference point with the highest number of connections - within the 50 shortest distances from a set of distances. - """ + """Identify the reference point with the highest number of connections within + the 50 shortest distances from a set of distances.""" sorted_unique_dists = sorted(dist_set) # Get the 30 shortest distances shortest_dists = sorted_unique_dists[:50] diff --git a/src/cifkit/preprocessors/environment_util.py b/src/cifkit/preprocessors/environment_util.py index ccccb8d..e4042b6 100644 --- a/src/cifkit/preprocessors/environment_util.py +++ b/src/cifkit/preprocessors/environment_util.py @@ -5,16 +5,14 @@ def flat_site_connections( site_connections: dict, -): - """ - Transform site connections into a sorted list of tuples, - each containing a pair of alphabetically distance. - """ +) -> list[tuple[tuple[str, str], float]]: + """Transform site connections into a sorted list of tuples, each containing a + pair of alphabetically distance.""" flattened_points = [] for site_label, connections in site_connections.items(): for connection in connections: other_site_label = connection[0] - distance = connection[1] + distance = float(connection[1]) site_element = get_atom_type_from_label(site_label) other_site_element = get_atom_type_from_label(other_site_label) # Sort the site label and other site label alphabetically @@ -27,9 +25,7 @@ def flat_site_connections( def calculate_normalized_distances(connections): - """ - Calculate normalized distances for each connection - """ + """Calculate normalized distances for each connection.""" min_dist = connections[0][1] normalized_distances = [ float(np.round(dist / min_dist, 3)) for _, dist, _, _ in connections @@ -38,9 +34,7 @@ def calculate_normalized_distances(connections): def calculate_normalized_dist_diffs(normalized_distances): - """ - Calculate differences between consecutive normalized distances. - """ + """Calculate differences between consecutive normalized distances.""" normalized_dist_diffs = [ normalized_distances[k + 1] - normalized_distances[k] for k in range(len(normalized_distances) - 1) diff --git a/src/cifkit/preprocessors/error.py b/src/cifkit/preprocessors/error.py index fb614dc..dc57d16 100644 --- a/src/cifkit/preprocessors/error.py +++ b/src/cifkit/preprocessors/error.py @@ -6,9 +6,7 @@ def make_directory_and_move(file_path, dir_path, new_file_path): - """ - Create directory if it doesn't exist and move the file. - """ + """Create directory if it doesn't exist and move the file.""" os.makedirs(dir_path, exist_ok=True) new_file_path = os.path.join(dir_path, new_file_path) os.rename(file_path, new_file_path) diff --git a/src/cifkit/preprocessors/format.py b/src/cifkit/preprocessors/format.py index 1233a19..bfda52d 100644 --- a/src/cifkit/preprocessors/format.py +++ b/src/cifkit/preprocessors/format.py @@ -2,11 +2,12 @@ def preprocess_label_element_loop_values(file_path: str) -> None: - """ - Modify the atomic label site text in a .cif file. .cif files may - have the atomic labels in symbolic forms such as "M1" and some also - have two elements provided such as "In1,Co3B". Each case is handled - with specific examples demonstrated in the source and test code. + """Modify the atomic label site text in a .cif file. + + .cif files may have the atomic labels in symbolic forms such as "M1" and some + also have two elements provided such as "In1,Co3B". Each case is handled with + specific examples demonstrated in the source and test code. + """ is_cif_file_updated = False cif_block = cif_parser.get_cif_block(file_path) @@ -26,7 +27,6 @@ def preprocess_label_element_loop_values(file_path: str) -> None: ) unique_elements = cif_parser.get_unique_elements_from_loop(loop_values) - """ Type 8. Ex) 1817279.cif @@ -78,12 +78,12 @@ def preprocess_label_element_loop_values(file_path: str) -> None: is_cif_file_updated = True if atom_type_symbol != atom_type_from_label: - """ - Type 1. - Ex) 250165.cif + """Type 1. Ex) 250165.cif. + M1 Th 4 a 0 0 0 0.99 -> ThM1 Th 4 a 0 0 0 0.99 + """ if ( len(site_label) == 2 diff --git a/src/cifkit/preprocessors/supercell.py b/src/cifkit/preprocessors/supercell.py index 458d031..f1d8884 100644 --- a/src/cifkit/preprocessors/supercell.py +++ b/src/cifkit/preprocessors/supercell.py @@ -9,9 +9,7 @@ def get_supercell_points( block, supercell_generation_method, ) -> list[tuple[float, float, float, str]]: - """ - Return supercell points - """ + """Return supercell points.""" supercell_points = [] loop_values = cif_parser.get_loop_values(block) all_coords_list = get_unitcell_coords_for_all_labels(block) @@ -35,10 +33,8 @@ def get_supercell_points( def get_unitcell_coords_for_all_labels( block: Block, ) -> list[list[tuple[float, float, float, str]]]: - """ - Compute the new coordinates after applying - symmetry operations to the initial coordinates. - """ + """Compute the new coordinates after applying symmetry operations to the + initial coordinates.""" loop_values = cif_parser.get_loop_values(block) loop_length = len(loop_values[0]) @@ -78,10 +74,8 @@ def get_unitcell_coords_after_sym_operations_per_label( atom_site_fracs: tuple[float, float, float], atom_site_label: str, ) -> list[tuple[float, float, float, str]]: - """ - Generate a list of coordinates for each atom - site after applying symmetry operations. - """ + """Generate a list of coordinates for each atom site after applying symmetry + operations.""" symmetry_operations = find_symmetry_operations(block) if symmetry_operations is not None: @@ -125,9 +119,7 @@ def shift_and_append_points( atom_site_label: str, supercell_generation_method: int, ): - """ - Shift and duplicate points to create a supercell. - """ + """Shift and duplicate points to create a supercell.""" # Method 1 - No sfhits # Method 2 - +1 +1 +1 shifts diff --git a/src/cifkit/preprocessors/supercell_util.py b/src/cifkit/preprocessors/supercell_util.py index dfbfc81..9e9660c 100644 --- a/src/cifkit/preprocessors/supercell_util.py +++ b/src/cifkit/preprocessors/supercell_util.py @@ -1,5 +1,3 @@ def get_cell_atom_count(supercell_points) -> int: - """ - Count the number of atoms in the cell. - """ + """Count the number of atoms in the cell.""" return len(supercell_points) diff --git a/src/cifkit/utils/bond_pair.py b/src/cifkit/utils/bond_pair.py index 5fda0ec..c8c7631 100644 --- a/src/cifkit/utils/bond_pair.py +++ b/src/cifkit/utils/bond_pair.py @@ -3,11 +3,9 @@ from cifkit.data.mendeleeve_handler import get_mendeleev_nums_from_pair_tuple -def get_bond_pairs(labels: list[str]) -> set[tuple]: - """ - Generate all possible unique pairs, each tuple sorted alphabetically, - including pairs with identical elements. - """ +def get_bond_pairs(labels: list[str]) -> set[tuple[str, str]]: + """Generate all possible unique pairs, each tuple sorted alphabetically, + including pairs with identical elements.""" # Generate all combinations of two labels (this time including identical pairs) possible_pairs = product(labels, repeat=2) @@ -19,9 +17,7 @@ def get_bond_pairs(labels: list[str]) -> set[tuple]: def get_pairs_sorted_by_mendeleev( labels: list[str], ) -> set[tuple[str, str]]: - """ - Generate all unique pairs, each tuple sorted by the Mendeleeve number. - """ + """Generate all unique pairs, each tuple sorted by the Mendeleeve number.""" pairs = get_bond_pairs(labels) sorted_pairs = {order_tuple_pair_by_mendeleev(pair) for pair in pairs} @@ -29,9 +25,7 @@ def get_pairs_sorted_by_mendeleev( def order_tuple_pair_by_mendeleev(label_pair_tuple): - """ - Order a pair of elements based on Mendeleev numbers. - """ + """Order a pair of elements based on Mendeleev numbers.""" first_label = label_pair_tuple[0] second_label = label_pair_tuple[1] diff --git a/src/cifkit/utils/cif_editor.py b/src/cifkit/utils/cif_editor.py index fdd8a38..33dfba8 100644 --- a/src/cifkit/utils/cif_editor.py +++ b/src/cifkit/utils/cif_editor.py @@ -9,10 +9,11 @@ def remove_author_loop(file_path: str) -> None: - """ - Remove the author section from a .cif file to prevent parsing problems - caused by a wrongly formatted author block. This is a common issue in - PCD files. + """Remove the author section from a .cif file to prevent parsing problems + caused by a wrongly formatted author block. + + This is a common issue in PCD files. + """ ( start_index, @@ -32,9 +33,10 @@ def remove_author_loop(file_path: str) -> None: def add_hashtag_in_first_line(file_path: str): - """ - ICSD files start with (C) which causes parsing issues with gemmi. + """ICSD files start with (C) which causes parsing issues with gemmi. + If that is the case, add a # before (C) to fix the parsing issue. + """ # First, check if the file exists and is a CIF file if not os.path.exists(file_path) or not file_path.endswith(".cif"): @@ -55,10 +57,11 @@ def add_hashtag_in_first_line(file_path: str): def edit_cif_file_based_on_db(file_path: str): - """ - Edit a CIF file based on the database it is from. + """Edit a CIF file based on the database it is from. + PCD: Remove author loop and preprocess label element loop values ICSD: Add a hashtag in the first line + """ db_source = get_cif_db_source(file_path) if db_source == "ICSD": diff --git a/src/cifkit/utils/cif_parser.py b/src/cifkit/utils/cif_parser.py index 5e06d0d..06d47ec 100644 --- a/src/cifkit/utils/cif_parser.py +++ b/src/cifkit/utils/cif_parser.py @@ -1,6 +1,4 @@ -""" -Parses attributes from a .cif file. -""" +"""Parses attributes from a .cif file.""" from typing import Any @@ -19,9 +17,7 @@ def get_cif_block(file_path: str) -> Block: - """ - Return CIF block from file path. - """ + """Return CIF block from file path.""" doc = gemmi.cif.read_file(file_path) block = doc.sole_block() @@ -31,9 +27,7 @@ def get_cif_block(file_path: str) -> Block: def get_unitcell_lengths( block: Block, ) -> list[float]: - """ - Return the unit cell lengths. - """ + """Return the unit cell lengths.""" keys_lengths = [ "_cell_length_a", "_cell_length_b", @@ -51,9 +45,7 @@ def get_unitcell_lengths( def get_unitcell_angles_rad( block: Block, ) -> list[float]: - """ - Return the unit cell angles. - """ + """Return the unit cell angles.""" keys_angles = [ "_cell_angle_alpha", @@ -70,9 +62,7 @@ def get_unitcell_angles_rad( def get_loop_tags() -> list[str]: - """ - Return tags commonly used for atomic description. - """ + """Return tags commonly used for atomic description.""" loop_tags = [ "_atom_site_label", "_atom_site_type_symbol", @@ -88,9 +78,10 @@ def get_loop_tags() -> list[str]: def get_loop_values(block: Block) -> list[Column]: - """ - Retrieve a list of predefined loop tags for atomic site description. + """Retrieve a list of predefined loop tags for atomic site description. + If a tag is not found, None is inserted in its place in the list. + """ loop_tags = get_loop_tags() @@ -104,16 +95,12 @@ def get_loop_values(block: Block) -> list[Column]: def get_unique_label_count(loop_values: list) -> int: - """ - Count the number of labels in the loop. - """ + """Count the number of labels in the loop.""" return len(loop_values[0]) def get_unique_elements_from_loop(loop_values: list) -> set[str]: - """ - Return a list of alphabetically sorted unique elements from loop values. - """ + """Return a list of alphabetically sorted unique elements from loop values.""" num_atom_labels = get_unique_label_count(loop_values) unique_elements = set() for i in range(num_atom_labels): @@ -123,9 +110,7 @@ def get_unique_elements_from_loop(loop_values: list) -> set[str]: def get_unique_site_labels(loop_values: list) -> list[str]: - """ - Return a list of atom labels from loop values. - """ + """Return a list of atom labels from loop values.""" num_atom_labels = get_unique_label_count(loop_values) label_list = [] for i in range(num_atom_labels): @@ -138,9 +123,8 @@ def get_unique_site_labels(loop_values: list) -> list[str]: def get_label_occupancy_coordinates( loop_values: list, i ) -> tuple[str, float, tuple[float, float, float]]: - """ - Return atom information (label, occupancy, coordinates) for the i-th atom. - """ + """Return atom information (label, occupancy, coordinates) for the i-th + atom.""" label: str = loop_values[0][i] occupancy: float = get_string_to_formatted_float(loop_values[7][i]) coordinates: tuple[float, float, float] = ( @@ -155,9 +139,7 @@ def get_label_occupancy_coordinates( def get_loop_value_dict( loop_values: list, ) -> dict[str, dict[str, Any]]: - """ - Create a dictionary containing CIF loop values for each label. - """ + """Create a dictionary containing CIF loop values for each label.""" loop_value_dict = {} num_of_atom_labels = get_unique_label_count(loop_values) @@ -178,9 +160,7 @@ def get_loop_value_dict( def get_start_end_line_indexes( file_path: str, start_keyword: str ) -> tuple[int, int]: - """ - Find the starting and ending indexes of the lines in atom_site_loop - """ + """Find the starting and ending indexes of the lines in atom_site_loop.""" with open(file_path, "r") as f: lines = f.readlines() @@ -204,10 +184,11 @@ def get_start_end_line_indexes( def get_line_content_from_tag(file_path: str, start_keyword: str) -> list[str]: - """ - Returns a list containing file content with starting keyword. + """Returns a list containing file content with starting keyword. + This function only appropriate for PCD format for removing the author section. + """ start_index, end_index = get_start_end_line_indexes( file_path, start_keyword @@ -228,9 +209,7 @@ def get_line_content_from_tag(file_path: str, start_keyword: str) -> list[str]: def get_formula_structure_weight_s_group( block: Block, ) -> tuple[str, str, float, int, str]: - """ - Return the unit cell lengths. - """ + """Return the unit cell lengths.""" keys = [ "_chemical_formula_structural", "_chemical_name_structure_type", @@ -254,10 +233,11 @@ def get_formula_structure_weight_s_group( def get_unique_formulas_structures_weights_s_groups( file_path_list: list[str], ) -> tuple[set[str], set[str], set[float], set[int], set[str]]: - """ - Find all unique structures, formulas, weights, space groups. - This function requires no initialization and should be more efficient - in analyzing and filtering a dataset. + """Find all unique structures, formulas, weights, space groups. + + This function requires no initialization and should be more efficient in + analyzing and filtering a dataset. + """ formulas = set() structures = set() @@ -284,10 +264,8 @@ def get_unique_formulas_structures_weights_s_groups( def get_tag_from_third_line(file_path: str, db_source="PCD") -> str: - """ - Extract the tag from the provided CIF file path - appropriate for PCD db source only. - """ + """Extract the tag from the provided CIF file path appropriate for PCD db + source only.""" if not db_source == "PCD": return None @@ -296,7 +274,7 @@ def get_tag_from_third_line(file_path: str, db_source="PCD") -> str: # Read first three lines f.readline() # First line f.readline() # Second line - third_line = f.readline().strip() # Thrid line + third_line = f.readline().strip() # Third line third_line = third_line.replace(",", "") # Split based on '#' and filter out empty strings @@ -315,8 +293,8 @@ def get_tag_from_third_line(file_path: str, db_source="PCD") -> str: def parse_atom_site_occupancy_info(file_path: str) -> dict: - """Parse atom site loop information including element, occupancy, - fractional coordinates, multiplicity, and wyckoff symbol.""" + """Parse atom site loop information including element, occupancy, fractional + coordinates, multiplicity, and wyckoff symbol.""" block = get_cif_block(file_path) loop_vals = get_loop_values(block) label_count = len(loop_vals[0]) diff --git a/src/cifkit/utils/distance.py b/src/cifkit/utils/distance.py index 1be3781..fedfcf0 100644 --- a/src/cifkit/utils/distance.py +++ b/src/cifkit/utils/distance.py @@ -5,10 +5,8 @@ def calc_dist_two_cart_points( point1: list[float], point2: list[float], ) -> float: - """ - Calculate the Euclidean distance between two points - in Cartesian coordinates. - """ + """Calculate the Euclidean distance between two points in Cartesian + coordinates.""" diff = np.array(point2) - np.array(point1) distance = float(np.linalg.norm(diff)) diff --git a/src/cifkit/utils/folder.py b/src/cifkit/utils/folder.py index 970839a..ea9babe 100644 --- a/src/cifkit/utils/folder.py +++ b/src/cifkit/utils/folder.py @@ -6,25 +6,20 @@ def get_file_path(dir_path: str, file_name: str) -> str: - """ - Construct and return the full path for a file within a specified directory. - """ + """Construct and return the full path for a file within a specified + directory.""" return os.path.join(dir_path, file_name) def get_file_count(dir_path: str, ext=".cif") -> int: - """ - Count files with a given extension in a directory. - """ + """Count files with a given extension in a directory.""" return len(glob.glob(os.path.join(dir_path, f"*{ext}"))) def get_file_paths( dir_path: str, ext=".cif", add_nested_files=False ) -> list[str]: - """ - Return a list of file paths with a given extension from a directory. - """ + """Return a list of file paths with a given extension from a directory.""" if add_nested_files: # Traverse through directory and subdirectories files_list = [] @@ -43,9 +38,7 @@ def get_file_paths( def make_output_folder(dir_path: str, new_folder_name: str) -> str: - """ - Create an output folder - """ + """Create an output folder.""" full_path = os.path.join(dir_path, new_folder_name) # Check if the directory already exists @@ -79,9 +72,7 @@ def check_file_not_empty(file_path: str) -> bool: def move_files(to_directory: str, file_path_list: list[str]) -> None: - """ - Move files to another folder, creating the folder if it doesn't exist. - """ + """Move files to another folder, creating the folder if it doesn't exist.""" # Ensure the destination directory exists os.makedirs(to_directory, exist_ok=True) @@ -95,9 +86,7 @@ def move_files(to_directory: str, file_path_list: list[str]) -> None: def copy_files(to_directory: str, file_path_list: list[str]) -> None: - """ - Copy files to another folder, creating the folder if it doesn't exist. - """ + """Copy files to another folder, creating the folder if it doesn't exist.""" # Ensure the destination directory exists os.makedirs(to_directory, exist_ok=True) diff --git a/src/cifkit/utils/formula.py b/src/cifkit/utils/formula.py index e14d24f..27296e7 100644 --- a/src/cifkit/utils/formula.py +++ b/src/cifkit/utils/formula.py @@ -1,6 +1,4 @@ -""" -Parses a formula -""" +"""Parses a formula.""" import re @@ -19,9 +17,7 @@ def get_validated_formula_label(formula: str) -> str: def get_parsed_formula(formula: str) -> list[tuple[str, str]]: - """ - Return a list of tuples, each tuple containing an element and its index. - """ + """Return a list of tuples, each tuple containing an element and its index.""" trimmed_formula = get_validated_formula_label(formula) pattern = r"([A-Z][a-z]*)(\d*\.?\d*)" elements = re.findall(pattern, trimmed_formula) @@ -29,9 +25,7 @@ def get_parsed_formula(formula: str) -> list[tuple[str, str]]: def get_normalized_formula(formula: str, demical_places=3) -> str: - """ - Return a formula with the stoichiometry coefficient sum of 1 - """ + """Return a formula with the stoichiometry coefficient sum of 1.""" index_sum = 0.0 normalized_formula_parts = [] parsed_formula_set = get_parsed_formula(formula) @@ -59,10 +53,8 @@ def get_normalized_formula(formula: str, demical_places=3) -> str: def get_parsed_norm_formula(formula: str) -> list[tuple[str, str]]: - """ - Return a list of tuples, each tuple containing element - and normalized index. - """ + """Return a list of tuples, each tuple containing element and normalized + index.""" normalized_formula = get_normalized_formula(formula) parsed_normalized_formula = get_parsed_formula(normalized_formula) return parsed_normalized_formula @@ -76,16 +68,12 @@ def get_unique_elements(formula: str) -> list[str]: def get_unique_element_count(formula: str) -> int: - """ - Return the number of unique elements in the chemical formula. - """ + """Return the number of unique elements in the chemical formula.""" return len(get_unique_elements(formula)) def get_unique_elements_from_formulas(formulas: list) -> set[str]: - """ - Return unique elements from a list of formulas. - """ + """Return unique elements from a list of formulas.""" unique_elements = set() # Create a set to store unique elements for formula in formulas: @@ -100,9 +88,7 @@ def get_unique_elements_from_formulas(formulas: list) -> set[str]: def get_subscripted_formula(formula: str) -> str: - """ - Return a subscripted formula used for plotting. - """ + """Return a subscripted formula used for plotting.""" validated_formula = get_validated_formula_label(formula) # Use regular expression to find elements and numbers subscripted_formula = re.sub( diff --git a/src/cifkit/utils/prompt.py b/src/cifkit/utils/prompt.py index 916e63b..5b1ec98 100644 --- a/src/cifkit/utils/prompt.py +++ b/src/cifkit/utils/prompt.py @@ -2,9 +2,7 @@ def log_connected_points(all_labels_connections): - """ - Print nearest neightbor information. - """ + """Print nearest neighbor information.""" for label, connections in all_labels_connections.items(): logging.info(f"\nAtom site {label}:") for ( @@ -17,7 +15,5 @@ def log_connected_points(all_labels_connections): def log_save_file_message(file_type: str, file_path: str): - """ - Print when a file is saved. - """ + """Print when a file is saved.""" logging.info(f"{file_type} has been saved in {file_path}.") diff --git a/src/cifkit/utils/random.py b/src/cifkit/utils/random.py index fac86fe..efb8493 100644 --- a/src/cifkit/utils/random.py +++ b/src/cifkit/utils/random.py @@ -5,9 +5,7 @@ def generate_random_numbers( count: int, low: int | float, high: int | float, is_float=True ): random.seed(42) - """ - Generate a list of random numbers (floating-point or integer). - """ + """Generate a list of random numbers (floating-point or integer).""" if is_float: return [random.uniform(low, high) for _ in range(count)] else: diff --git a/src/cifkit/utils/sort.py b/src/cifkit/utils/sort.py index e700871..793f74a 100644 --- a/src/cifkit/utils/sort.py +++ b/src/cifkit/utils/sort.py @@ -1,9 +1,7 @@ def sort_element_pair_tuples( element_pair_tuples: list[tuple[tuple[str, str], float]], ) -> list[tuple[tuple[str, str], float]]: - """ - Alphabetically sort the pair tuple of elements - """ + """Alphabetically sort the pair tuple of elements.""" # First, sort the elements within each tuple alp_sorted_tuples = [ ((min(a, b), max(a, b)), distance) diff --git a/src/cifkit/utils/string_parser.py b/src/cifkit/utils/string_parser.py index 187803e..d3fee64 100644 --- a/src/cifkit/utils/string_parser.py +++ b/src/cifkit/utils/string_parser.py @@ -5,9 +5,7 @@ def get_atom_type_from_label(site_label: str) -> str: - """ - Return the element from the given label. - """ + """Return the element from the given label.""" validated_label = formula.get_validated_formula_label(site_label) if not isinstance(validated_label, str): raise TypeError(GeneralError.INVALID_TYPE.value) @@ -26,9 +24,7 @@ def get_atom_type_from_label(site_label: str) -> str: def get_string_to_formatted_float(str_value: str) -> float: - """ - Remove parentheses from a value string and convert to float. - """ + """Remove parentheses from a value string and convert to float.""" str_value = str_value.strip() return ( @@ -39,22 +35,19 @@ def get_string_to_formatted_float(str_value: str) -> float: def trim_string(formula: str) -> str: - """ - Remove "~", " ", and "'" characters from the parsed formula. - """ + """Remove "~", " ", and "'" characters from the parsed formula.""" return formula.replace("~", "").replace(" ", "").replace("'", "") def clean_parsed_structure(structure_type: str) -> str: - """ - Split the parsed structure text and remove "~". - """ + """Split the parsed structure text and remove "~".""" return structure_type.split(",")[0].replace("~", "") def strip_numbers_and_symbols(value: str) -> str: - """ - Removes all digits and '+' and '-' characters from the input string. + """Removes all digits and '+' and '-' characters from the input string. + Some ICSD, COD have charges in atomic site element e.g. "Fe0+". + """ return re.sub(r"[\d\+\-]", "", value) diff --git a/src/cifkit/utils/unit.py b/src/cifkit/utils/unit.py index 5bafdb6..1bc6bff 100644 --- a/src/cifkit/utils/unit.py +++ b/src/cifkit/utils/unit.py @@ -2,17 +2,13 @@ def get_radians_from_degrees(angles: list[float]) -> list[float]: - """ - Convert angles from degrees to radians and round to 5 decimal places. - """ + """Convert angles from degrees to radians and round to 5 decimal places.""" radians = [round(float(np.radians(angle)), 5) for angle in angles] return radians def round_float(distance: float, precision: int = 3) -> float: - """ - Round a distance value to a specified precision. - """ + """Round a distance value to a specified precision.""" return round(distance, precision) @@ -21,10 +17,8 @@ def fractional_to_cartesian( cell_lengths: list[float], cell_angles_rad: list[float], ) -> list[float]: - """ - Convert fractional coordinates to Cartesian - coordinates using cell lengths and angles. - """ + """Convert fractional coordinates to Cartesian coordinates using cell lengths + and angles.""" alpha, beta, gamma = cell_angles_rad # Calculate the components of the transformation matrix diff --git a/tests/core/models/test_cif.py b/tests/core/models/test_cif.py index d210378..79e99b6 100644 --- a/tests/core/models/test_cif.py +++ b/tests/core/models/test_cif.py @@ -667,7 +667,12 @@ def test_init_without_mendeeleve_number(): ("tests/data/cif/sources/MS/U13Rh4.cif", "MS", {"U", "Fe"}, 2988), ("tests/data/cif/sources/MS/U13Rh4.cif", "MS", {"U", "Fe"}, 2988), ("tests/data/cif/sources/COD/1010581.cif", "COD", {"Cu", "Se"}, 1383), - ("tests/data/cif/sources/CCDC/2294753.cif", "CCDC", {'Er', 'In', 'Co'}, 3844), + ( + "tests/data/cif/sources/CCDC/2294753.cif", + "CCDC", + {"Er", "In", "Co"}, + 3844, + ), ( "tests/data/cif/sources/MP/LiFeP2O7.cif", "MP",