Merge pull request #49 from bobleesj/fix-infra

Apply docstring automatic format, add docstrings to CIF object, set line-width to 90, fix typos
bobleesj · Oct 27, 2024 · 0067986 · 0067986
2 parents bc69d6e + 9534d71
commit 0067986
Show file tree

Hide file tree

Showing 43 changed files with 420 additions and 368 deletions.
diff --git a/.codespell/ignore_lines.txt b/.codespell/ignore_lines.txt
@@ -0,0 +1,22 @@
+;; Please include filenames and explanations for each ignored line.
+;; See https://docs.openverse.org/meta/codespell.html for docs.
+
+;; .github/workflows/draft_releases.yml
+;; The release-drafter/release-drafter configuration variable name cannot be changed
+          commitish: main
+
+;; frontend/src/locales/scripts/en.json5
+;; Prettier insists we escape a single quote rather than the double quotes and codespell
+;; does not understand the escaped `\'t` as "couldn't". It instead just sees "couldn".
+    heading: 'We couldn\'t find anything for "{query}".',
+
+;; catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py
+;; "Titel" matches "title", but the phrase is in Dutch, not English, so "titel"
+;; is actually correct. Similarly, "als" incorrectly matches to "also" in the same
+;; block of Dutch text.
+        "Identificatie Titel(s):  Allegorie op kunstenaar Francesco Mazzoli, "
+        "bekend als Parmigianino"
+
+;; packages/js/eslint-plugin/configs/vue.ts
+;; `te` gets matched with `the` and others
+const i18nDestructureRules = ["t", "tc", "te", "td", "d", "n"].map(
diff --git a/.codespell/ignore_words.txt b/.codespell/ignore_words.txt
@@ -0,0 +1,11 @@
+;; Please include explanations for each ignored word (lowercase) using full sentences.
+;; See https://docs.openverse.org/meta/codespell.html for docs.
+
+;; `nd` is a chemical element.
+nd
+
+;; `te` is a chemical element.
+te
+
+;; `Indide` refers to the keywords in the tyvanchuk_crystal_2024 paper.
+indide
diff --git a/.flake8 b/.flake8
@@ -5,7 +5,7 @@ exclude =
     build,
     dist,
     doc/source/conf.py
-max-line-length = 115
+max-line-length = 90
 
 # https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#labels-why-pycodestyle-warnings
 extend-ignore = E203
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ variables for handling large datasets, on the order of tens of thousands, of
 
 `cifkit` provides higher-level functions in just a few lines of code.
 
-- **Coordination geometry** - `cifkit` provides fuctions for visualing
+- **Coordination geometry** - `cifkit` provides functions for visualing
   coordination geometry from each site and extracts physics-based features like
   volume and packing efficiency in each polyhedron.
 - **Atomic mixing** - `cifkit` extracts atomic mixing information at the bond
@@ -79,7 +79,7 @@ ensemble.filter_by_structures(["Co1.75Ge", "CoIn2"])
 ensemble.filter_by_structures("CeAl2Ga2")
 ```
 
-To learn more, please read the official documention here:
+To learn more, please read the official documentation here:
 https://bobleesj.github.io/cifkit.
 
 ## Quotes
@@ -122,3 +122,10 @@ Here is how you can contribute to the `cifkit` project if you found it helpful:
 - If you have any suggestions or need further clarification on how to use
   `cifkit`, please reach out to Bob Lee
   ([@bobleesj](https://github.com/bobleesj)).
+
+## To render documentation
+
+```bash
+pip install -r requirements/docs.txt
+mkdocs serve
+```
diff --git a/docs/index.md b/docs/index.md
@@ -29,7 +29,7 @@ mixing, among other parameters.
 
 `cifkit` provides higher-level functions in just a few lines of code.
 
-- **Coordination geometry** - `cifkit` provides fuctions for visualing
+- **Coordination geometry** - `cifkit` provides functions for visualing
   coordination geometry from each site and extracts physics-based features like
   volume and packing efficiency in each polyhedron.
 - **Atomic mixing** - `cifkit` extracts atomic mixing information at the bond

diff --git a/docs/notebooks/01_cif.ipynb b/docs/notebooks/01_cif.ipynb
@@ -45,7 +45,7 @@
     "from cifkit import Example\n",
     "from cifkit import Cif\n",
     "\n",
-    "# Initalize with the example file provided\n",
+    "# Initialize with the example file provided\n",
     "cif = Cif(Example.Er10Co9In20_file_path)\n",
     "\n",
     "# Print attributes\n",
@@ -60,7 +60,7 @@
    "source": [
     "## Get instant properties - parsed information\n",
     "\n",
-    "The `Cif` class provides a set of accessible properties that can be accessed. Each object is intialized with the `file_path` to the `.cif` file."
+    "The `Cif` class provides a set of accessible properties that can be accessed. Each object is initialized with the `file_path` to the `.cif` file."
    ]
   },
   {
@@ -83,7 +83,7 @@
     "print(\"Formula:\", cif.formula)\n",
     "print(\"Structure:\", cif.structure)\n",
     "print(\"Unique elements:\", cif.unique_elements)\n",
-    "print(\"Unitcell lenghts:\", cif.unitcell_lengths)\n",
+    "print(\"Unitcell lengths:\", cif.unitcell_lengths)\n",
     "print(\"Unitcell angles:\", cif.unitcell_angles)\n",
     "print(\"Site labels:\", cif.site_labels)\n",
     "print(\"Weight:\", cif.weight)\n",
@@ -102,7 +102,7 @@
    "source": [
     "### How tag is parsed\n",
     "\n",
-    "Tag is parsed from the thrid line of each `.cif` file. Some databases such as Pearson's Crystal Data offers tags for each file.\n",
+    "Tag is parsed from the third line of each `.cif` file. Some databases such as Pearson's Crystal Data offers tags for each file.\n",
     "\n"
    ]
   },

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -61,4 +61,5 @@ plugins:
           options:
             show_source: false
             show_root_heading: true     # Show module names as headings
+            docstring_style: numpy
   - mkdocs-jupyter
diff --git a/src/cifkit/coordination/bond_distance.py b/src/cifkit/coordination/bond_distance.py
@@ -1,9 +1,7 @@
 def get_shortest_distance_per_bond_pair(
     flattened_connections: list[tuple[tuple[str, str], float]],
 ) -> dict[tuple[str, str], float]:
-    """
-    Determine the minimum distance for all possible unique pair of elements.
-    """
+    """Determine the min distance for all possible unique pair of elements."""
 
     # Initialize the dictionary with a specific type
     min_dist_per_element_pair: dict[tuple[str, str], float] = {}

diff --git a/src/cifkit/coordination/composition.py b/src/cifkit/coordination/composition.py
@@ -9,9 +9,7 @@ def get_bond_counts(
     connections: dict[str, list],
     sorted_by_mendeleev=False,
 ) -> dict:
-    """
-    Return a dictionary containing bond pairs and counts per label site.
-    """
+    """Return a dictionary containing bond pairs and counts per label site."""
     if sorted_by_mendeleev:
         bond_pairs = bond_pair.get_pairs_sorted_by_mendeleev(elements)
     else:
@@ -58,9 +56,7 @@ def get_bond_counts(
 
 
 def get_bond_fractions(bond_pair_data: dict) -> dict[tuple[str, str], float]:
-    """
-    Calculate the fraction of each bond type across all labels.
-    """
+    """Calculate the fraction of each bond type across all labels."""
     total_bond_counts: dict[tuple[str, str], float] = {}
     total_bonds = 0
 
@@ -83,9 +79,7 @@ def get_bond_fractions(bond_pair_data: dict) -> dict[tuple[str, str], float]:
 
 
 def count_connections_per_site(connections: dict) -> dict[str, int]:
-    """
-    Calculate the coordination number for each atom site.
-    """
+    """Calculate the coordination number for each atom site."""
     neighbor_count = {}
     for label, connection_data in connections.items():
         neighbor_count[label] = len(connection_data)
@@ -94,9 +88,7 @@ def count_connections_per_site(connections: dict) -> dict[str, int]:
 
 
 def compute_avg_CN(connections: dict[str, int]) -> float:
-    """
-    Calculate the average coordination number across all sites.
-    """
+    """Calculate the average coordination number across all sites."""
     coordination_numbers = count_connections_per_site(connections)
     total = 0
     for _, value in coordination_numbers.items():
@@ -105,9 +97,7 @@ def compute_avg_CN(connections: dict[str, int]) -> float:
 
 
 def get_unique_CN_values(connections: dict) -> set[int]:
-    """
-    Return unique coordination numbers from all sites.
-    """
+    """Return unique coordination numbers from all sites."""
     coordination_numbers = count_connections_per_site(connections)
     unique_numbers = set(coordination_numbers.values())
     return unique_numbers
diff --git a/src/cifkit/coordination/connection.py b/src/cifkit/coordination/connection.py
@@ -1,17 +1,15 @@
 def get_CN_connections_by_best_methods(
-    best_methods, conncetions: dict
+    best_methods, connections: dict
 ) -> dict:
-    """
-    Retrieve connections limited by the number of vertices (CN_value)
-    for each label.
-    """
+    """Retrieve connections limited by the number of vertices (CN) for each
+    label."""
     CN_connections = {}
 
     for label, data in best_methods.items():
         CN_value = data[
             "number_of_vertices"
         ]  # Extract the limit for the number of vertices
         # Limit the connections for this label using CN_value
-        CN_connections[label] = conncetions[label][:CN_value]
+        CN_connections[label] = connections[label][:CN_value]
 
     return CN_connections
diff --git a/src/cifkit/coordination/filter.py b/src/cifkit/coordination/filter.py
@@ -4,11 +4,8 @@
 
 
 def find_best_polyhedron(max_gaps_per_label, connections):
-    """
-    Find the best polyhedron for each label based on the minimum
-    distance between the reference atom to the average position of
-    connected atoms.
-    """
+    """Find the best polyhedron for each label based on the minimum distance
+    between the reference atom to the average position of connected atoms."""
     best_polyhedrons = {}
 
     for label, CN_data_per_method in max_gaps_per_label.items():
@@ -38,16 +35,17 @@ def find_best_polyhedron(max_gaps_per_label, connections):
 
             except Exception:
                 print(
-                    f"Error in determining polyhedron for {label} using {method} - skipped"
+                    f"Error in polyhedron calculation for"
+                    f"{label} using {method} - Skip"
                 )
                 continue  # Move to the next method
 
-            # Returns non if ther eis any error
+            # Returns non if there is any error
             polyhedron_metrics = compute_polyhedron_metrics(
                 polyhedron_points, hull
             )
 
-            # If there is no metrics, then skip the mthod
+            # If there is no metrics, then skip the method
             if polyhedron_metrics is None:
                 continue
 

diff --git a/src/cifkit/coordination/geometry.py b/src/cifkit/coordination/geometry.py
@@ -6,9 +6,10 @@
 def get_polyhedron_coordinates_labels(
     connections: dict, label: str
 ) -> tuple[list[list[float]], list[str]]:
-    """
-    Return a list of Cartesian coordinates and labels. The central atom is
-    the last index.
+    """Return a list of Cartesian coordinates and labels.
+
+    The central atom is the last index.
+
     """
     conn_data = connections[label]
     polyhedron_points = [conn[3] for conn in conn_data]
@@ -24,9 +25,7 @@ def get_polyhedron_coordinates_labels(
 
 
 def compute_polyhedron_metrics(polyhedron_points, hull):
-    """
-    Compute various metrics related to a given polyhedron.
-    """
+    """Compute various metrics related to a given polyhedron."""
     try:
         central_atom_coord = np.array(polyhedron_points[-1])
 
@@ -98,10 +97,8 @@ def compute_polyhedron_metrics(polyhedron_points, hull):
 def compute_center_of_mass_and_distance(
     polyhedron_points, hull, central_atom_coord
 ):
-    """
-    Calculate the center of mass of a polyhedron and the distance
-    from the center of mass to a given point.
-    """
+    """Calculate the center of mass of a polyhedron and the distance from the
+    center of mass to a given point."""
     center_of_mass = np.mean(polyhedron_points[hull.vertices, :], axis=0)
     vector_to_center_of_mass = center_of_mass - central_atom_coord
     distance_to_center = np.linalg.norm(vector_to_center_of_mass)

diff --git a/src/cifkit/coordination/method.py b/src/cifkit/coordination/method.py
@@ -64,7 +64,7 @@ def compute_CN_max_gap_per_site(
                 norm_dist_by_CIF_radius_sum = compute_normalized_value(
                     pair_dist, CIF_radius_sum_norm_value
                 )
-                norm_dist_by_CIF_radius_refined_sum = compute_normalized_value(
+                norm_dist_by_CIF_rad_ref_sum = compute_normalized_value(
                     pair_dist, CIF_radius_sum_refined_norm_value
                 )
                 norm_dist_by_Pauling_radius_sum = compute_normalized_value(
@@ -80,7 +80,7 @@ def compute_CN_max_gap_per_site(
                 distances = {
                     "dist_by_shortest_dist": norm_dist_by_min_dist,
                     "dist_by_CIF_radius_sum": norm_dist_by_CIF_radius_sum,
-                    "dist_by_CIF_radius_refined_sum": norm_dist_by_CIF_radius_refined_sum,
+                    "dist_by_CIF_radius_refined_sum": norm_dist_by_CIF_rad_ref_sum,
                     "dist_by_Pauling_radius_sum": norm_dist_by_Pauling_radius_sum,
                 }
             else:
@@ -118,10 +118,8 @@ def compute_normalized_value(number: float, ref_number: float) -> float:
 def get_rad_sum_value(
     rad_sum_data, method_name: str, ref_label: str, other_label: str
 ) -> float:
-    """
-    Return the sum of radii value for a given pair of elements,
-    ensuring the pair is alphabetically sorted.
-    """
+    """Return the sum of radii value for a given pair of elements, ensuring the
+    pair is alphabetically sorted."""
 
     # Extract the element types from the labels
     ref_element = get_atom_type_from_label(ref_label)

diff --git a/src/cifkit/coordination/site.py b/src/cifkit/coordination/site.py
@@ -4,10 +4,8 @@
 def get_min_distance_pair(
     connections: dict,
 ) -> tuple[tuple[str, str], float]:
-    """
-    Return an alphabetically sorted element pair with the global minimum
-    distance in the entire supercell.
-    """
+    """Return an alphabetically sorted element pair with the global minimum
+    distance in the entire supercell."""
     sorted_tuples = get_min_distance_pair_per_site_label(connections)
     min_dist_tuple = sorted_tuples[0]
     return min_dist_tuple
@@ -16,10 +14,8 @@ def get_min_distance_pair(
 def get_min_distance_pair_per_site_label(
     connections: dict,
 ) -> list[tuple[tuple[str, str], float]]:
-    """
-    Return a list of tuples containing element pairs
-    and the minimum distance from each site label in the loop.
-    """
+    """Return a list of tuples containing element pairs and the minimum distance
+    from each site label in the loop."""
     element_pairs = []
     # Iterate over each pair and their list of distances
     for ref_label, pair_data in connections.items():

diff --git a/src/cifkit/coordination/site_distance.py b/src/cifkit/coordination/site_distance.py
@@ -1,7 +1,5 @@
 def get_shortest_distance(connections: dict) -> float:
-    """
-    Return the shortest distance in the supercell.
-    """
+    """Return the shortest distance in the supercell."""
     min_dist = float("inf")
 
     # Iterate over each site's connections in the dictionary
@@ -16,9 +14,7 @@ def get_shortest_distance(connections: dict) -> float:
 def get_shortest_distance_per_site(
     connections: dict,
 ) -> dict[str, tuple[str, float]]:
-    """
-    Calculate the shortest distance for each label.
-    """
+    """Calculate the shortest distance for each label."""
     shortest_dist_info: dict[str, tuple[str, float]] = {}
 
     for label, connections in connections.items():

diff --git a/src/cifkit/data/example.py b/src/cifkit/data/example.py
@@ -3,9 +3,13 @@
 
 
 class Example(str, Enum):
+    # Define 'get_path' as a static method
+    @staticmethod
     def get_path(*args):
+        # Use '__file__' to get the directory of the current file and join paths
         return os.path.join(os.path.dirname(__file__), *args)
 
+    # Now use 'get_path' to define paths relative to this file
     ErCoIn_folder_path = get_path("ErCoIn")
     ErCoIn_big_folder_path = get_path("ErCoIn_big")
     Er10Co9In20_file_path = get_path("ErCoIn", "Er10Co9In20.cif")

diff --git a/src/cifkit/data/mendeleeve_handler.py b/src/cifkit/data/mendeleeve_handler.py
@@ -5,9 +5,10 @@
 def get_mendeleev_nums_from_pair_tuple(
     label_pair_tuple: tuple[str, str],
 ) -> tuple[int, int]:
-    """
-    Parse Mendeleev number for each label in the tuple.
+    """Parse Mendeleev number for each label in the tuple.
+
     If no number is found, default to 0 for that element.
+
     """
     # Parse the first and second elements
     first_element = string_parser.get_atom_type_from_label(label_pair_tuple[0])

diff --git a/src/cifkit/data/radius.py b/src/cifkit/data/radius.py
@@ -1,7 +1,5 @@
 def get_radius_data() -> dict:
-    """
-    Return a dictionary of element radii data.
-    """
+    """Return a dictionary of element radii data."""
     rad_data = {
         "Si": [1.176, 1.316],
         "Sc": [1.641, 1.620],