From 6573f4af78b2f6dff79693f2806f69ae46a15999 Mon Sep 17 00:00:00 2001
From: Parker Singleton <sps253@cornell.edu>
Date: Fri, 17 Jan 2025 14:40:15 -0500
Subject: [PATCH 1/2] add/edit docstrings for metadata_merge.py and
 validator.py (#408)

* docstrings

* docstrings

* lints
---
 cubids/metadata_merge.py | 39 +++++++++++++++++++++++++++++++++---
 cubids/validator.py      | 43 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py
index 6562f35b7..bb58233f2 100644
--- a/cubids/metadata_merge.py
+++ b/cubids/metadata_merge.py
@@ -1,4 +1,8 @@
-"""Tools for merging metadata."""
+"""Metadata merging utilities for CuBIDS.
+
+This module provides utilities for merging metadata in CuBIDS, including functions
+for checking merging operations, grouping acquisitions, and handling metadata fields.
+"""
 
 import json
 from collections import defaultdict
@@ -179,7 +183,18 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False):
 
 
 def is_nan(val):
-    """Return True if val is NaN."""
+    """Check if the given value is NaN (Not a Number).
+
+    Parameters
+    ----------
+    val : any
+        The value to check.
+
+    Returns
+    -------
+    bool
+        True if the value is NaN, False otherwise.
+    """
     if not isinstance(val, float):
         return False
 
@@ -187,7 +202,25 @@ def is_nan(val):
 
 
 def print_merges(merge_list):
-    """Print formatted text of merges."""
+    """Print formatted text of merges.
+
+    Parameters
+    ----------
+    merge_list : list of tuple
+        A list of tuples where each tuple contains two elements:
+
+        - src_id : tuple
+            The source identifier, where the last element is the source ID and
+            the first element is the source name.
+        - dest_id : tuple
+            The destination identifier, where the last element is the destination
+            ID and the first element is the destination name.
+
+    Returns
+    -------
+    str
+        A formatted string representing the merges, with each merge on a new line.
+    """
     merge_strings = []
     for src_id, dest_id in merge_list:
         src_id_str = f"{src_id[-1]}:{src_id[0]}"
diff --git a/cubids/validator.py b/cubids/validator.py
index 2ee09c252..5c7d97cc2 100644
--- a/cubids/validator.py
+++ b/cubids/validator.py
@@ -1,4 +1,8 @@
-"""Methods for validating BIDS datasets."""
+"""Methods for validating BIDS datasets.
+
+This module provides functions for validating BIDS datasets, including building
+subprocess commands for the BIDS validator and handling validation results.
+"""
 
 import glob
 import json
@@ -60,7 +64,24 @@ def get_bids_validator_version():
 
 
 def build_subject_paths(bids_dir):
-    """Build a list of BIDS dirs with 1 subject each."""
+    """Build a dictionary of BIDS directories with one subject each.
+
+    Parameters
+    ----------
+    bids_dir : str
+        The root directory of the BIDS dataset.
+
+    Returns
+    -------
+    dict
+        A dictionary where the keys are subject labels and the values are
+        lists of file paths associated with each subject.
+
+    Raises
+    ------
+    ValueError
+        If no subjects are found in the specified directory.
+    """
     bids_dir = str(bids_dir)
     if not bids_dir.endswith("/"):
         bids_dir += "/"
@@ -88,7 +109,21 @@ def build_subject_paths(bids_dir):
 
 
 def build_first_subject_path(bids_dir, subject):
-    """Build a list of BIDS dirs with 1 subject each."""
+    """Build a dictionary containing BIDS directory paths for a single subject.
+
+    Parameters
+    ----------
+    bids_dir : str
+        The root directory of the BIDS dataset.
+    subject : str
+        The path to the subject directory.
+
+    Returns
+    -------
+    dict
+        A dictionary where the key is the subject label and the value is a list of file paths
+        within the subject directory and the root BIDS directory.
+    """
     bids_dir = str(bids_dir)
     if not bids_dir.endswith("/"):
         bids_dir += "/"
@@ -224,7 +259,7 @@ def extract_summary_info(output):
 
 
 def update_dataset_description(path, new_info):
-    """Update or append information to dataset_description.json.
+    """Update or append information to dataset_description.json with new information.
 
     Parameters
     ----------

From 07c0947eea7adf6ba026e97796dbfb936b670dc5 Mon Sep 17 00:00:00 2001
From: Parker Singleton <sps253@cornell.edu>
Date: Fri, 17 Jan 2025 14:49:52 -0500
Subject: [PATCH 2/2] cubids print-metadata-fields exits if no
 dataset_desciption.json (#409)

---
 cubids/workflows.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/cubids/workflows.py b/cubids/workflows.py
index c09366d1e..5f419edff 100644
--- a/cubids/workflows.py
+++ b/cubids/workflows.py
@@ -952,15 +952,36 @@ def remove_metadata_fields(bids_dir, container, fields):
 
 
 def print_metadata_fields(bids_dir, container):
-    """Print unique metadata fields.
+    """Print unique metadata fields from a BIDS dataset.
+
+    This function identifies and prints all unique metadata fields from
+    the `dataset_description.json` file in a BIDS directory. It can run
+    either directly in Python or within a specified container (Docker or
+    Singularity).
 
     Parameters
     ----------
     bids_dir : :obj:`pathlib.Path`
-        Path to the BIDS directory.
+        Path to the BIDS directory containing the `dataset_description.json` file.
     container : :obj:`str`
-        Container in which to run the workflow.
+        Name of the container (e.g., Docker, Singularity) to use for running the
+        `cubids print-metadata-fields` command. If `None`, the operation is performed
+        directly in Python without a container.
+
+    Raises
+    ------
+    SystemExit
+        Raised in the following cases:
+        - The `dataset_description.json` file is not found in the BIDS directory.
+        - The subprocess returns a non-zero exit code when executed in a container.
+
     """
+    # Check if dataset_description.json exists
+    dataset_description = bids_dir / "dataset_description.json"
+    if not dataset_description.exists():
+        logger.error("dataset_description.json not found in the BIDS directory.")
+        sys.exit(1)
+
     # Run directly from python
     if container is None:
         bod = CuBIDS(data_root=str(bids_dir), use_datalad=False)