Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Update CuBIDS to allow both longitudinal and cross-sectional structure by adding is_longitudinal attribute to CUBIDS class #406

Merged
merged 14 commits into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cubids/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import importlib.resources
import yaml


def load_config(config_file):
"""Load a YAML file containing a configuration for param groups.

Expand Down
91 changes: 76 additions & 15 deletions cubids/cubids.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ class CuBIDS(object):
A data dictionary for TSV outputs.
use_datalad : :obj:`bool`
If True, use datalad to track changes to the BIDS dataset.
is_longitudinal : :obj:`bool`
If True, adds "ses" in filepath.
"""

def __init__(
Expand All @@ -110,11 +112,17 @@ def __init__(
self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
self.data_dict = {} # data dictionary for TSV outputs
self.use_datalad = use_datalad # True if flag set, False if flag unset
self.is_longitudinal = self._infer_longitudinal() # inferred from dataset structure

if self.use_datalad:
self.init_datalad()

if self.acq_group_level == "session":
if self.is_longitudinal and self.acq_group_level == "session":
NON_KEY_ENTITIES.remove("session")
tientong98 marked this conversation as resolved.
Show resolved Hide resolved
elif not self.is_longitudinal and self.acq_group_level == "session":
raise ValueError(
'Data is not longitudinal, so "session" is not a valid grouping level.'
)

@property
def layout(self):
Expand All @@ -128,6 +136,10 @@ def layout(self):
# print("LAYOUT OBJECT SET")
return self._layout

def _infer_longitudinal(self):
"""Infer if the dataset is longitudinal based on its structure."""
return any("ses-" in str(f) for f in Path(self.path).rglob("*"))

def reset_bids_layout(self, validate=False):
"""Reset the BIDS layout.

Expand Down Expand Up @@ -473,6 +485,7 @@ def change_filename(self, filepath, entities):
filepath=filepath,
entities=entities,
out_dir=str(self.path),
is_longitudinal=self.is_longitudinal,
)

exts = Path(filepath).suffixes
Expand All @@ -481,7 +494,8 @@ def change_filename(self, filepath, entities):
suffix = entities["suffix"]

sub = get_entity_value(filepath, "sub")
ses = get_entity_value(filepath, "ses")
if self.is_longitudinal:
ses = get_entity_value(filepath, "ses")

# Add the scan path + new path to the lists of old, new filenames
self.old_filenames.append(filepath)
Expand Down Expand Up @@ -577,7 +591,10 @@ def change_filename(self, filepath, entities):
self.new_filenames.append(new_labeling)

# RENAME INTENDED FORS!
ses_path = self.path + "/" + sub + "/" + ses
if self.is_longitudinal:
ses_path = self.path + "/" + sub + "/" + ses
elif not self.is_longitudinal:
ses_path = self.path + "/" + sub
files_with_if = []
files_with_if += Path(ses_path).rglob("fmap/*.json")
files_with_if += Path(ses_path).rglob("perf/*_m0scan.json")
Expand All @@ -600,6 +617,7 @@ def change_filename(self, filepath, entities):
data["IntendedFor"].remove(item)
# add new filename
data["IntendedFor"].append(_get_participant_relative_path(new_path))

if item == _get_bidsuri(filepath, self.path):
# remove old filename
data["IntendedFor"].remove(item)
Expand Down Expand Up @@ -1363,6 +1381,7 @@ def get_layout(self):
return self.layout


# XXX: Remove _validate_json?
def _validate_json():
"""Validate a JSON file's contents.

Expand Down Expand Up @@ -1402,8 +1421,29 @@ def _get_participant_relative_path(scan):

This is what will appear in the IntendedFor field of any association.

Examples:
>>> _get_participant_relative_path(
... "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
... )
'ses-01/func/sub-01_ses-01_bold.nii.gz'

>>> _get_participant_relative_path(
... "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
... )
'func/sub-01_bold.nii.gz'

>>> _get_participant_relative_path(
... "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
... )
Traceback (most recent call last):
ValueError: Could not find subject in ...
"""
return "/".join(Path(scan).parts[-3:])
parts = Path(scan).parts
# Find the first part that starts with "sub-"
for i, part in enumerate(parts):
if part.startswith("sub-"):
return "/".join(parts[i + 1 :])
raise ValueError(f"Could not find subject in {scan}")


def _get_bidsuri(filename, dataset_root):
Expand Down Expand Up @@ -1734,7 +1774,7 @@ def get_entity_value(path, key):
return part


def build_path(filepath, entities, out_dir):
def build_path(filepath, entities, out_dir, is_longitudinal):
"""Build a new path for a file based on its BIDS entities.

Parameters
Expand All @@ -1746,6 +1786,8 @@ def build_path(filepath, entities, out_dir):
This should include all of the entities in the filename *except* for subject and session.
out_dir : str
The output directory for the new file.
is_longitudinal : bool
If True, add "ses" to file path.

Returns
-------
Expand All @@ -1758,6 +1800,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
... {"acquisition": "VAR", "suffix": "T2w"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'

Expand All @@ -1766,6 +1809,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'

Expand All @@ -1775,6 +1819,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'

Expand All @@ -1784,6 +1829,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'

Expand All @@ -1792,6 +1838,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'

Expand All @@ -1801,6 +1848,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'

Expand All @@ -1809,6 +1857,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'

Expand All @@ -1817,6 +1866,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'

Expand All @@ -1825,19 +1875,19 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
... {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
... "/output",
... True,
... )
WARNING: DATATYPE CHANGE DETECTED
'/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'

It expects a longitudinal structure, so providing a cross-sectional filename won't work.
XXX: This is a bug.
It also works for cross-sectional filename.
>>> build_path(
... "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
... {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... False,
... )
Traceback (most recent call last):
ValueError: Could not extract subject or session from ...
'/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
"""
exts = Path(filepath).suffixes
old_ext = "".join(exts)
Expand All @@ -1853,9 +1903,13 @@ def build_path(filepath, entities, out_dir):
entity_file_keys.append(key)

sub = get_entity_value(filepath, "sub")
ses = get_entity_value(filepath, "ses")
if sub is None or ses is None:
raise ValueError(f"Could not extract subject or session from {filepath}")
if sub is None:
raise ValueError(f"Could not extract subject from {filepath}")

if is_longitudinal:
ses = get_entity_value(filepath, "ses")
if ses is None:
raise ValueError(f"Could not extract session from {filepath}")

# Add leading zeros to run entity if it's an integer.
# If it's a string, respect the value provided.
Expand All @@ -1874,7 +1928,10 @@ def build_path(filepath, entities, out_dir):
.replace("reconstruction", "rec")
)
if len(filename) > 0:
filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
if is_longitudinal:
filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
elif not is_longitudinal:
filename = f"{sub}_{filename}_{suffix}{old_ext}"
else:
raise ValueError(f"Could not construct new filename for {filepath}")

Expand All @@ -1894,5 +1951,9 @@ def build_path(filepath, entities, out_dir):
dtype_new = dtype_orig

# Construct the new filename
new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
if is_longitudinal:
new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
elif not is_longitudinal:
new_path = str(Path(out_dir) / sub / dtype_new / filename)

return new_path
34 changes: 22 additions & 12 deletions cubids/metadata_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
return 0


def get_acq_dictionary():
def get_acq_dictionary(is_longitudinal=False):
"""Create a BIDS data dictionary from dataframe columns.

Parameters
----------
df : :obj:`pandas.DataFrame`
Pre export TSV that will be converted to a json dictionary.
is_longitudinal : :obj:`bool`, optional
If True, add "session" to acq_dict. Default is False.

Returns
-------
Expand All @@ -291,15 +291,16 @@ def get_acq_dictionary():
"""
acq_dict = {}
acq_dict["subject"] = {"Description": "Participant ID"}
acq_dict["session"] = {"Description": "Session ID"}
if is_longitudinal:
acq_dict["session"] = {"Description": "Session ID"}
docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
desc = "Acquisition Group. See Read the Docs for more information"
acq_dict["AcqGroup"] = {"Description": desc + docs}

return acq_dict


def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False):
"""Find unique sets of Key/Param groups across subjects.

This writes out the following files:
Expand All @@ -317,6 +318,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
Prefix for output files.
acq_group_level : {"subject", "session"}
Level at which to group acquisitions.
is_longitudinal : :obj:`bool`, optional
If True, add "session" to acq_dict. Default is False.
"""
from bids import config
from bids.layout import parse_file_entities
Expand All @@ -331,9 +334,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
file_entities = parse_file_entities(row.FilePath)

if acq_group_level == "subject":
acq_id = (file_entities.get("subject"), file_entities.get("session"))
if is_longitudinal:
acq_id = (file_entities.get("subject"), file_entities.get("session"))
elif not is_longitudinal:
acq_id = file_entities.get("subject")
acq_groups[acq_id].append((row.EntitySet, row.ParamGroup))
else:
elif is_longitudinal and acq_group_level == "session":
acq_id = (file_entities.get("subject"), None)
acq_groups[acq_id].append(
(row.EntitySet, row.ParamGroup, file_entities.get("session"))
Expand All @@ -359,17 +365,21 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
for groupnum, content_id_row in enumerate(descending_order, start=1):
content_id = content_ids[content_id_row]
acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id)
for subject, session in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
)
if is_longitudinal:
for subject, session in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
)
elif not is_longitudinal:
for subject in contents_to_subjects[content_id]:
grouped_sub_sess.append({"subject": "sub-" + subject, "AcqGroup": groupnum})

# Write the mapping of subject/session to
acq_group_df = pd.DataFrame(grouped_sub_sess)
acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False)

# Create data dictionary for acq group tsv
acq_dict = get_acq_dictionary()
acq_dict = get_acq_dictionary(is_longitudinal)
with open(output_prefix + "_AcqGrouping.json", "w") as outfile:
json.dump(acq_dict, outfile, indent=4)

Expand Down
Loading
Loading