diff --git a/.gitignore b/.gitignore index 95f82a10..336385a0 100644 --- a/.gitignore +++ b/.gitignore @@ -136,4 +136,5 @@ data/* .DS_Store notebooks/testdata .vscode/settings.json -notebooks/test1 \ No newline at end of file +notebooks/test1 +docs/generated \ No newline at end of file diff --git a/cubids/cli.py b/cubids/cli.py index a6bcce32..846b401f 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -118,6 +118,7 @@ def _parse_validate(): parser = argparse.ArgumentParser( description="cubids validate: Wrapper around the official BIDS Validator", formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -245,6 +246,7 @@ def _parse_bids_version(): parser = argparse.ArgumentParser( description="cubids bids-version: Get BIDS Validator and Schema version", formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -328,6 +330,7 @@ def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) IsFile = partial(_is_file, parser=parser) @@ -412,6 +415,7 @@ def _parse_group(): parser = argparse.ArgumentParser( description="cubids group: find key and parameter groups in BIDS", formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -534,6 +538,7 @@ def _parse_apply(): parser = argparse.ArgumentParser( description=("cubids apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) IsFile = partial(_is_file, parser=parser) @@ -549,7 +554,7 @@ def _parse_apply(): ) parser.add_argument( "edited_summary_tsv", - type=IsFile, + type=Path, action="store", help=( "path to the _summary.tsv that has been edited " @@ -561,7 +566,7 @@ def _parse_apply(): ) parser.add_argument( "files_tsv", - type=IsFile, + type=Path, action="store", help=( "path to the _files.tsv that has been edited " @@ -669,6 +674,7 @@ def _parse_datalad_save(): parser = argparse.ArgumentParser( description=("cubids datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -756,6 +762,7 @@ def _parse_undo(): parser = argparse.ArgumentParser( description="cubids undo: revert most recent commit", formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -852,9 +859,10 @@ def _parse_copy_exemplars(): "one subject from each Acquisition Group in the BIDS dataset" ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) - IsFile = partial(_is_file, parser=parser) + # IsFile = partial(_is_file, parser=parser) parser.add_argument( "bids_dir", @@ -868,7 +876,7 @@ def _parse_copy_exemplars(): ) parser.add_argument( "exemplars_dir", - type=PathExists, + type=Path, action="store", help=( "name of the directory to create where to store exemplar dataset. " @@ -879,7 +887,7 @@ def _parse_copy_exemplars(): ) parser.add_argument( "exemplars_tsv", - type=IsFile, + type=Path, action="store", help=( "path to the .tsv that lists one " @@ -987,6 +995,7 @@ def _parse_add_nifti_info(): "files to the sidecars of each dataset" ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -1081,6 +1090,7 @@ def _parse_purge(): parser = argparse.ArgumentParser( description="cubids purge: purge associations from the dataset", formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) IsFile = partial(_is_file, parser=parser) @@ -1182,6 +1192,7 @@ def _parse_remove_metadata_fields(): parser = argparse.ArgumentParser( description="cubids remove-metadata-fields: delete fields from metadata", formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -1274,6 +1285,7 @@ def _parse_print_metadata_fields(): parser = argparse.ArgumentParser( description="cubids print-metadata-fields: print all unique metadata fields", formatter_class=argparse.ArgumentDefaultsHelpFormatter, + allow_abbrev=False, ) PathExists = partial(_path_exists, parser=parser) @@ -1359,7 +1371,7 @@ def _get_parser(): """ from cubids import __version__ - parser = argparse.ArgumentParser(prog="cubids") + parser = argparse.ArgumentParser(prog="cubids", allow_abbrev=False) parser.add_argument("-v", "--version", action="version", version=__version__) subparsers = parser.add_subparsers(help="CuBIDS commands") @@ -1371,6 +1383,7 @@ def _get_parser(): parents=[subparser], help=subparser.description, add_help=False, + allow_abbrev=False, ) return parser diff --git a/cubids/cubids.py b/cubids/cubids.py index fde4012b..4ae210c4 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -87,6 +87,8 @@ class CuBIDS(object): A data dictionary for TSV outputs. use_datalad : :obj:`bool` If True, use datalad to track changes to the BIDS dataset. + is_longitudinal : :obj:`bool` + If True, adds "ses" in filepath. """ def __init__( @@ -115,12 +117,17 @@ def __init__( self.data_dict = {} # data dictionary for TSV outputs self.use_datalad = use_datalad # True if flag set, False if flag unset self.schema = load_schema(schema_json) + self.is_longitudinal = self._infer_longitudinal() # inferred from dataset structure if self.use_datalad: self.init_datalad() - if self.acq_group_level == "session": + if self.is_longitudinal and self.acq_group_level == "session": NON_KEY_ENTITIES.remove("session") + elif not self.is_longitudinal and self.acq_group_level == "session": + raise ValueError( + 'Data is not longitudinal, so "session" is not a valid grouping level.' + ) @property def layout(self): @@ -134,6 +141,10 @@ def layout(self): # print("LAYOUT OBJECT SET") return self._layout + def _infer_longitudinal(self): + """Infer if the dataset is longitudinal based on its structure.""" + return any("ses-" in str(f) for f in Path(self.path).rglob("*")) + def reset_bids_layout(self, validate=False): """Reset the BIDS layout. @@ -480,6 +491,7 @@ def change_filename(self, filepath, entities): out_entities=entities, out_dir=str(self.path), schema=self.schema, + is_longitudinal=self.is_longitudinal, ) exts = Path(filepath).suffixes @@ -488,7 +500,8 @@ def change_filename(self, filepath, entities): suffix = entities["suffix"] sub = get_entity_value(filepath, "sub") - ses = get_entity_value(filepath, "ses") + if self.is_longitudinal: + ses = get_entity_value(filepath, "ses") # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -584,7 +597,10 @@ def change_filename(self, filepath, entities): self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! - ses_path = self.path + "/" + sub + "/" + ses + if self.is_longitudinal: + ses_path = self.path + "/" + sub + "/" + ses + elif not self.is_longitudinal: + ses_path = self.path + "/" + sub files_with_if = [] files_with_if += Path(ses_path).rglob("fmap/*.json") files_with_if += Path(ses_path).rglob("perf/*_m0scan.json") @@ -607,6 +623,7 @@ def change_filename(self, filepath, entities): data["IntendedFor"].remove(item) # add new filename data["IntendedFor"].append(_get_participant_relative_path(new_path)) + if item == _get_bidsuri(filepath, self.path): # remove old filename data["IntendedFor"].remove(item) @@ -1370,6 +1387,7 @@ def get_layout(self): return self.layout +# XXX: Remove _validate_json? def _validate_json(): """Validate a JSON file's contents. @@ -1409,8 +1427,29 @@ def _get_participant_relative_path(scan): This is what will appear in the IntendedFor field of any association. + Examples: + >>> _get_participant_relative_path( + ... "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz", + ... ) + 'ses-01/func/sub-01_ses-01_bold.nii.gz' + + >>> _get_participant_relative_path( + ... "/path/to/dset/sub-01/func/sub-01_bold.nii.gz", + ... ) + 'func/sub-01_bold.nii.gz' + + >>> _get_participant_relative_path( + ... "/path/to/dset/ses-01/func/ses-01_bold.nii.gz", + ... ) + Traceback (most recent call last): + ValueError: Could not find subject in ... """ - return "/".join(Path(scan).parts[-3:]) + parts = Path(scan).parts + # Find the first part that starts with "sub-" + for i, part in enumerate(parts): + if part.startswith("sub-"): + return "/".join(parts[i + 1 :]) + raise ValueError(f"Could not find subject in {scan}") def _get_bidsuri(filename, dataset_root): @@ -1741,7 +1780,7 @@ def get_entity_value(path, key): return part -def build_path(filepath, out_entities, out_dir, schema): +def build_path(filepath, out_entities, out_dir, schema, is_longitudinal): """Build a new path for a file based on its BIDS entities. This function could ultimately be replaced with bids.BIDSLayout.build_path(), @@ -1765,6 +1804,8 @@ def build_path(filepath, out_entities, out_dir, schema): (e.g., acquisition) to their corresponding keys (e.g., acq). - schema["objects"]["datatypes"]: a dictionary defining the valid datatypes. This function only uses the keys of this dictionary. + is_longitudinal : bool + If True, add "ses" to file path. Returns ------- @@ -1783,6 +1824,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"acquisition": "VAR", "suffix": "T2w"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz' @@ -1793,6 +1835,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz' @@ -1804,6 +1847,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz' @@ -1815,6 +1859,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz' @@ -1825,6 +1870,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz' @@ -1836,6 +1882,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"task": "rest", "acquisition": "VAR", "suffix": "bold"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz' @@ -1846,6 +1893,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz' @@ -1856,6 +1904,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"}, ... "/output", ... schema, + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_echo-1_bold.nii.gz' @@ -1866,6 +1915,7 @@ def build_path(filepath, out_entities, out_dir, schema): ... {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"}, ... "/output", ... schema, + ... True, ... ) WARNING: DATATYPE CHANGE DETECTED '/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz' @@ -1884,14 +1934,15 @@ def build_path(filepath, out_entities, out_dir, schema): It expects a longitudinal structure, so providing a cross-sectional filename won't work. XXX: This is a bug. + It also works for cross-sectional filename. >>> build_path( ... "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz", - ... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"}, + ... {"task": "rest", "acquisition": "VAR", "suffix": "bold"}, ... "/output", ... schema, + ... False, ... ) - Traceback (most recent call last): - ValueError: Could not extract subject or session from ... + '/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz' """ exts = Path(filepath).suffixes old_ext = "".join(exts) @@ -1912,9 +1963,13 @@ def build_path(filepath, out_entities, out_dir, schema): file_entities = {entity_names_to_keys[k]: v for k, v in file_entities.items()} sub = get_entity_value(filepath, "sub") - ses = get_entity_value(filepath, "ses") - if sub is None or ses is None: - raise ValueError(f"Could not extract subject or session from {filepath}") + if sub is None: + raise ValueError(f"Could not extract subject from {filepath}") + + if is_longitudinal: + ses = get_entity_value(filepath, "ses") + if ses is None: + raise ValueError(f"Could not extract session from {filepath}") # Add leading zeros to run entity if it's an integer. # If it's a string, respect the value provided. @@ -1928,7 +1983,10 @@ def build_path(filepath, out_entities, out_dir, schema): filename = "_".join([f"{key}-{value}" for key, value in file_entities.items()]) if len(filename) > 0: - filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}" + if is_longitudinal: + filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}" + elif not is_longitudinal: + filename = f"{sub}_{filename}_{suffix}{old_ext}" else: raise ValueError(f"Could not construct new filename for {filepath}") @@ -1946,5 +2004,9 @@ def build_path(filepath, out_entities, out_dir, schema): print("WARNING: DATATYPE CHANGE DETECTED") # Construct the new filename - new_path = str(Path(out_dir) / sub / ses / dtype_new / filename) + if is_longitudinal: + new_path = str(Path(out_dir) / sub / ses / dtype_new / filename) + elif not is_longitudinal: + new_path = str(Path(out_dir) / sub / dtype_new / filename) + return new_path diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 6562f35b..f3decd19 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -1,4 +1,8 @@ -"""Tools for merging metadata.""" +"""Metadata merging utilities for CuBIDS. + +This module provides utilities for merging metadata in CuBIDS, including functions +for checking merging operations, grouping acquisitions, and handling metadata fields. +""" import json from collections import defaultdict @@ -179,7 +183,18 @@ def merge_without_overwrite(source_meta, dest_meta_orig, raise_on_error=False): def is_nan(val): - """Return True if val is NaN.""" + """Check if the given value is NaN (Not a Number). + + Parameters + ---------- + val : any + The value to check. + + Returns + ------- + bool + True if the value is NaN, False otherwise. + """ if not isinstance(val, float): return False @@ -187,7 +202,25 @@ def is_nan(val): def print_merges(merge_list): - """Print formatted text of merges.""" + """Print formatted text of merges. + + Parameters + ---------- + merge_list : list of tuple + A list of tuples where each tuple contains two elements: + + - src_id : tuple + The source identifier, where the last element is the source ID and + the first element is the source name. + - dest_id : tuple + The destination identifier, where the last element is the destination + ID and the first element is the destination name. + + Returns + ------- + str + A formatted string representing the merges, with each merge on a new line. + """ merge_strings = [] for src_id, dest_id in merge_list: src_id_str = f"{src_id[-1]}:{src_id[0]}" @@ -243,13 +276,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False): return 0 -def get_acq_dictionary(): +def get_acq_dictionary(is_longitudinal=False): """Create a BIDS data dictionary from dataframe columns. Parameters ---------- - df : :obj:`pandas.DataFrame` - Pre export TSV that will be converted to a json dictionary. + is_longitudinal : :obj:`bool`, optional + If True, add "session" to acq_dict. Default is False. Returns ------- @@ -258,7 +291,8 @@ def get_acq_dictionary(): """ acq_dict = {} acq_dict["subject"] = {"Description": "Participant ID"} - acq_dict["session"] = {"Description": "Session ID"} + if is_longitudinal: + acq_dict["session"] = {"Description": "Session ID"} docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions" desc = "Acquisition Group. See Read the Docs for more information" acq_dict["AcqGroup"] = {"Description": desc + docs} @@ -266,7 +300,7 @@ def get_acq_dictionary(): return acq_dict -def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): +def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False): """Find unique sets of Key/Param groups across subjects. This writes out the following files: @@ -284,6 +318,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): Prefix for output files. acq_group_level : {"subject", "session"} Level at which to group acquisitions. + is_longitudinal : :obj:`bool`, optional + If True, add "session" to acq_dict. Default is False. """ from bids import config from bids.layout import parse_file_entities @@ -298,9 +334,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), file_entities.get("session")) + if is_longitudinal: + acq_id = (file_entities.get("subject"), file_entities.get("session")) + elif not is_longitudinal: + acq_id = file_entities.get("subject") acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) - else: + elif is_longitudinal and acq_group_level == "session": acq_id = (file_entities.get("subject"), None) acq_groups[acq_id].append( (row.EntitySet, row.ParamGroup, file_entities.get("session")) @@ -326,17 +365,21 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) - for subject, session in contents_to_subjects[content_id]: - grouped_sub_sess.append( - {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} - ) + if is_longitudinal: + for subject, session in contents_to_subjects[content_id]: + grouped_sub_sess.append( + {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} + ) + elif not is_longitudinal: + for subject in contents_to_subjects[content_id]: + grouped_sub_sess.append({"subject": "sub-" + subject, "AcqGroup": groupnum}) # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) # Create data dictionary for acq group tsv - acq_dict = get_acq_dictionary() + acq_dict = get_acq_dictionary(is_longitudinal) with open(output_prefix + "_AcqGrouping.json", "w") as outfile: json.dump(acq_dict, outfile, indent=4) diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py index ba92b603..51afa64f 100644 --- a/cubids/tests/test_apply.py +++ b/cubids/tests/test_apply.py @@ -237,33 +237,35 @@ def summary_data(): @pytest.mark.parametrize( - ("name", "skeleton", "intended_for", "expected"), + ("name", "skeleton", "intended_for", "is_longitudinal", "expected"), [ ( "relpath_long", relpath_intendedfor_long, "ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", + True, "pass", ), ( "bidsuri_long", bidsuri_intendedfor_long, "bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", + True, "pass", ), ( "relpath_cs", relpath_intendedfor_cs, - # XXX: CuBIDS enforces longitudinal dataset, so this fails. "dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", - ValueError, + False, + "pass", ), ( "bidsuri_cs", bidsuri_intendedfor_cs, - # XXX: CuBIDS enforces longitudinal dataset, so this fails. "bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", - ValueError, + False, + "pass", ), ], ) @@ -274,6 +276,7 @@ def test_cubids_apply_intendedfor( name, skeleton, intended_for, + is_longitudinal, expected, ): """Test cubids apply with different IntendedFor types. @@ -292,6 +295,8 @@ def test_cubids_apply_intendedfor( BIDS skeleton structure. intended_for : str IntendedFor field value. + is_longitudinal : bool + Indicate whether the data structure is longitudinal or cross-sectional. expected : str or Exception Expected result or exception. @@ -308,7 +313,7 @@ def test_cubids_apply_intendedfor( bids_dir = tmpdir / name generate_bids_skeleton(str(bids_dir), skeleton) - if "long" in name: + if is_longitudinal: fdata = files_data["longitudinal"] fmap_json = bids_dir / "sub-01/ses-01/fmap/sub-01_ses-01_dir-AP_epi.json" else: diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index a4da48a2..9f7f5b39 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -946,7 +946,11 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS( + data_root / "inconsistent", + acq_group_level="session", + use_datalad=True, + ) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1193,32 +1197,6 @@ def test_bids_version(tmp_path): ), f"Schema version {schema_version} is less than minimum {min_schema_version}" -def test_docker(): - """Verify that docker is installed and the user has permission to run docker images. - - Returns - ------- - int - -1 if Docker can't be found. - 0 if Docker is found, but the user can't connect to the daemon. - 1 if the test run is OK. - """ - try: - return_status = 1 - ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except OSError as e: - from errno import ENOENT - - if e.errno == ENOENT: - print("Cannot find Docker engine!") - return_status = 0 - raise e - if ret.stderr.startswith(b"Cannot connect to the Docker daemon."): - print("Cannot connect to Docker daemon!") - return_status = 0 - assert return_status - - # def test_image(image='pennlinc/bond:latest'): # """Check whether image is present on local system.""" # ret = subprocess.run(['docker', 'images', '-q', image], diff --git a/cubids/validator.py b/cubids/validator.py index 2ee09c25..5c7d97cc 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -1,4 +1,8 @@ -"""Methods for validating BIDS datasets.""" +"""Methods for validating BIDS datasets. + +This module provides functions for validating BIDS datasets, including building +subprocess commands for the BIDS validator and handling validation results. +""" import glob import json @@ -60,7 +64,24 @@ def get_bids_validator_version(): def build_subject_paths(bids_dir): - """Build a list of BIDS dirs with 1 subject each.""" + """Build a dictionary of BIDS directories with one subject each. + + Parameters + ---------- + bids_dir : str + The root directory of the BIDS dataset. + + Returns + ------- + dict + A dictionary where the keys are subject labels and the values are + lists of file paths associated with each subject. + + Raises + ------ + ValueError + If no subjects are found in the specified directory. + """ bids_dir = str(bids_dir) if not bids_dir.endswith("/"): bids_dir += "/" @@ -88,7 +109,21 @@ def build_subject_paths(bids_dir): def build_first_subject_path(bids_dir, subject): - """Build a list of BIDS dirs with 1 subject each.""" + """Build a dictionary containing BIDS directory paths for a single subject. + + Parameters + ---------- + bids_dir : str + The root directory of the BIDS dataset. + subject : str + The path to the subject directory. + + Returns + ------- + dict + A dictionary where the key is the subject label and the value is a list of file paths + within the subject directory and the root BIDS directory. + """ bids_dir = str(bids_dir) if not bids_dir.endswith("/"): bids_dir += "/" @@ -224,7 +259,7 @@ def extract_summary_info(output): def update_dataset_description(path, new_info): - """Update or append information to dataset_description.json. + """Update or append information to dataset_description.json with new information. Parameters ---------- diff --git a/cubids/workflows.py b/cubids/workflows.py index c09366d1..5f419edf 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -952,15 +952,36 @@ def remove_metadata_fields(bids_dir, container, fields): def print_metadata_fields(bids_dir, container): - """Print unique metadata fields. + """Print unique metadata fields from a BIDS dataset. + + This function identifies and prints all unique metadata fields from + the `dataset_description.json` file in a BIDS directory. It can run + either directly in Python or within a specified container (Docker or + Singularity). Parameters ---------- bids_dir : :obj:`pathlib.Path` - Path to the BIDS directory. + Path to the BIDS directory containing the `dataset_description.json` file. container : :obj:`str` - Container in which to run the workflow. + Name of the container (e.g., Docker, Singularity) to use for running the + `cubids print-metadata-fields` command. If `None`, the operation is performed + directly in Python without a container. + + Raises + ------ + SystemExit + Raised in the following cases: + - The `dataset_description.json` file is not found in the BIDS directory. + - The subprocess returns a non-zero exit code when executed in a container. + """ + # Check if dataset_description.json exists + dataset_description = bids_dir / "dataset_description.json" + if not dataset_description.exists(): + logger.error("dataset_description.json not found in the BIDS directory.") + sys.exit(1) + # Run directly from python if container is None: bod = CuBIDS(data_root=str(bids_dir), use_datalad=False) diff --git a/docs/conf.py b/docs/conf.py index ae90cf8c..9da1b27f 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,6 +56,7 @@ "sphinx_gallery.load_style", "sphinxarg.ext", # argparse extension "sphinxcontrib.bibtex", # bibtex-based bibliographies + "sphinx_design", # for adding in-line badges etc ] # Mock modules in autodoc: @@ -259,3 +260,12 @@ "term", "footcite", ] + +# ----------------------------------------------------------------------------- +# sphinx_copybutton +# ----------------------------------------------------------------------------- +# Configuration for sphinx_copybutton to remove shell prompts, i.e. $ +copybutton_prompt_text = "$ " +copybutton_only_copy_prompt_lines = ( + False # ensures all lines are copied, even those without a prompt +) diff --git a/docs/example.rst b/docs/example.rst index c10d5110..b6370f4b 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -174,9 +174,6 @@ Since we ran ``cubids add-nifti-info`` with the ``--use-datalad`` flag set, BIDS validation --------------- -The next step in the ``CuBIDS`` workflow is to run BIDS validation -to detect potential curation errors using ``cubids validate``. - .. code-block:: console $ cubids validate BIDS_Dataset_DataLad v0 --sequential @@ -187,6 +184,16 @@ to detect potential curation errors using ``cubids validate``. This can be helpful for identifying heterogeneous elements, but can be slowed down by extremely large datasets. +.. warning:: + For internetless use cases, please see dedicated section of the `Installation page + `_ on how to download a local version + of the validator. + + After that, you will need to add ``--local-validator`` option to the command string above. + +The next step in the ``CuBIDS`` workflow is to run BIDS validation +to detect potential curation errors using ``cubids validate``. + This command produces the following tsv: .. csv-table:: v0_validation.tsv diff --git a/docs/installation.rst b/docs/installation.rst index b6ebda3b..b8c323e7 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -54,6 +54,26 @@ We can accomplish this using the following command: The new schema-based ``bids-validator`` doesn't need to be installed and will be implemented automatically when `cubids validate` is called +.. dropdown:: If there is no Internet connection on compute nodes + + If your HPC doesn't allow internet access on its compute nodes, it won't be able to run the online + version of the BIDS validator. In that scenario, you need to install the BIDS validator on your HPC + and then point to the installed version in your cubids validate calls. + To do that, you should run one of these commands below, after installing deno, that downloads the latest version + of the bids-validator in your virtual environment either by installing a lightscript version + (into ``$HOME/.deno/bin``) or by compiling, respectively: + + .. code-block:: console + + $ deno install -ERN -g -n bids-validator jsr:@bids/validator + + or: + + .. code-block:: console + + $ deno compile -ERN -o bids-validator jsr:@bids/validator + + For more information, you can read: https://bids-validator.readthedocs.io/en/latest/user_guide/command-line.html We also recommend using ``CuBIDS`` with the optional ``DataLad`` version control capabilities. We use ``DataLad`` throughout our walkthrough of the CuBIDS Workflow on diff --git a/pyproject.toml b/pyproject.toml index b90d2032..1337663e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ license = {file = "LICENSE"} requires-python = ">=3.9" dependencies = [ "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", - "numpy<=2.2.1", + "numpy<=2.2.2", "pandas<=2.2.3", "pybids<=0.18.1", "pyyaml", @@ -51,6 +51,7 @@ doc = [ "sphinx_markdown_tables", "sphinx_rtd_theme", "sphinxcontrib-bibtex", + "sphinx_design", ] tests = [ "codespell",