diff --git a/brainstat/__init__.py b/brainstat/__init__.py index ecfbba10..64d6974e 100644 --- a/brainstat/__init__.py +++ b/brainstat/__init__.py @@ -1,2 +1,2 @@ """Neuroimaging statistics toolbox.""" -__version__ = "0.3.2" +__version__ = "0.3.3" diff --git a/brainstat/_utils.py b/brainstat/_utils.py index 57095916..7603a988 100644 --- a/brainstat/_utils.py +++ b/brainstat/_utils.py @@ -22,6 +22,7 @@ "ABIDE_DATA_DIR": BRAINSTAT_DATA_DIR / "abide_data", "BIGBRAIN_DATA_DIR": BRAINSTAT_DATA_DIR / "bigbrain_data", "GRADIENT_DATA_DIR": BRAINSTAT_DATA_DIR / "gradient_data", + "MICS_DATA_DIR": BRAINSTAT_DATA_DIR / "mics_data", "NEUROSYNTH_DATA_DIR": BRAINSTAT_DATA_DIR / "neurosynth_data", "PARCELLATION_DATA_DIR": BRAINSTAT_DATA_DIR / "parcellation_data", "SURFACE_DATA_DIR": BRAINSTAT_DATA_DIR / "surface_data", @@ -81,6 +82,15 @@ def generate_data_fetcher_json() -> None: "civet164k": { "url": "https://box.bic.mni.mcgill.ca/s/rei5HtTDvexlEPA/download" }, + "fsaverage5": { + "url": "https://box.bic.mni.mcgill.ca/s/jsSDyiDcm9KEQpf/download" + }, + "fsaverage": { + "url": "https://box.bic.mni.mcgill.ca/s/XiZx9HfHaufb4kD/download" + }, + "fslr32k": { + "url": "https://box.bic.mni.mcgill.ca/s/cFXCrSkfiJFjUJ0/download" + }, }, "spheres": { "civet41k": { @@ -92,6 +102,14 @@ def generate_data_fetcher_json() -> None: "url": "https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Phenotypic_V1_0b_preprocessed1.csv" }, }, + "mics_tutorial": { + "demographics": { + "url": "https://box.bic.mni.mcgill.ca/s/7bW9JIpvQvSJeuf/download" + }, + "thickness": { + "url": "https://box.bic.mni.mcgill.ca/s/kMi6lU91piwdaCf/download" + }, + }, } with open(json_file, "w") as f: json.dump(data, f, indent=4, sort_keys=True) @@ -129,7 +147,9 @@ def deprecated_func(*args, **kwargs): return deprecated_decorator -def _download_file(url: str, output_file: Path, overwrite: bool = False) -> None: +def _download_file( + url: str, output_file: Path, overwrite: bool = False, verbose=True +) -> None: """Downloads a file. Parameters @@ -140,12 +160,15 @@ def _download_file(url: str, output_file: Path, overwrite: bool = False) -> None Path object of the output file. overwrite : bool If true, overwrite existing files, defaults to False. + verbose : bool + If true, print a download message, defaults to True. """ if output_file.exists() and not overwrite: return - logger.info("Downloading " + str(output_file) + " from " + url + ".") + if verbose: + logger.info("Downloading " + str(output_file) + " from " + url + ".") with urllib.request.urlopen(url) as response, open(output_file, "wb") as out_file: shutil.copyfileobj(response, out_file) diff --git a/brainstat/context/histology.py b/brainstat/context/histology.py index 63d34201..ad087b8f 100644 --- a/brainstat/context/histology.py +++ b/brainstat/context/histology.py @@ -137,7 +137,7 @@ def read_histology_profile( data_dir = Path(data_dir) if data_dir else data_directories["BIGBRAIN_DATA_DIR"] if template[:5] == "civet": - logger.warn( + logger.info( "CIVET histology profiles were not included with BigBrainWarp. Interpolating from fsaverage using nearest neighbor interpolation." ) civet_template = template diff --git a/brainstat/data/data_urls.json b/brainstat/data/data_urls.json index 0ec4851c..c23e8fc1 100644 --- a/brainstat/data/data_urls.json +++ b/brainstat/data/data_urls.json @@ -26,6 +26,23 @@ }, "civet41k": { "url": "https://box.bic.mni.mcgill.ca/s/9kzBetBCZkkqN6w/download" + }, + "fsaverage": { + "url": "https://box.bic.mni.mcgill.ca/s/XiZx9HfHaufb4kD/download" + }, + "fsaverage5": { + "url": "https://box.bic.mni.mcgill.ca/s/jsSDyiDcm9KEQpf/download" + }, + "fslr32k": { + "url": "https://box.bic.mni.mcgill.ca/s/cFXCrSkfiJFjUJ0/download" + } + }, + "mics_tutorial": { + "demographics": { + "url": "https://box.bic.mni.mcgill.ca/s/7bW9JIpvQvSJeuf/download" + }, + "thickness": { + "url": "https://box.bic.mni.mcgill.ca/s/kMi6lU91piwdaCf/download" } }, "neurosynth_precomputed": { diff --git a/brainstat/datasets/base.py b/brainstat/datasets/base.py index b7dd5376..518a5480 100644 --- a/brainstat/datasets/base.py +++ b/brainstat/datasets/base.py @@ -65,7 +65,7 @@ def fetch_parcellation( data_dir.mkdir(parents=True, exist_ok=True) if template == "civet41k" or template == "civet164k": - logger.warn( + logger.info( "CIVET parcellations were not included with the toolbox. Interpolating parcellation from the fsaverage surface with a nearest neighbor interpolation." ) civet_template = template @@ -169,7 +169,8 @@ def fetch_mask( Parameters ---------- template : str - Name of the surface template. Valid templates are "civet41k" and "civet164k". + Name of the surface template. Valid templates are: "fsaverage5", + "fsaverage", "fslr32k", "civet41k", and "civet164k". join : bool, optional If true, returns a numpy array containing the mask. If false, returns a tuple containing the left and right hemispheric masks, respectively, by @@ -183,6 +184,7 @@ def fetch_mask( Midline mask, either as a single array or a tuple of a left and right hemispheric array. """ + data_dir = Path(data_dir) if data_dir else data_directories["SURFACE_DATA_DIR"] data_dir.mkdir(parents=True, exist_ok=True) @@ -235,7 +237,7 @@ def fetch_gradients( hf = h5py.File(gradients_file, "r") if template == "civet41k" or template == "civet164k": - logger.warn( + logger.info( "CIVET gradients were not included with the toolbox. Interpolating gradients from the fsaverage surface with a nearest interpolation." ) fsaverage_left, fsaverage_right = fetch_template_surface( diff --git a/brainstat/stats/SLM.py b/brainstat/stats/SLM.py index deeb88cf..6b9b293c 100644 --- a/brainstat/stats/SLM.py +++ b/brainstat/stats/SLM.py @@ -31,7 +31,7 @@ class SLM: def __init__( self, model: Union[FixedEffect, MixedEffect], - contrast: Union[ArrayLike, FixedEffect], + contrast: ArrayLike, surf: Optional[Union[str, dict, BSPolyData]] = None, mask: Optional[ArrayLike] = None, *, @@ -49,7 +49,7 @@ def __init__( ---------- model : brainstat.stats.terms.FixedEffect, brainstat.stats.terms.MixedEffect The linear model to be fitted of dimensions (observations, predictors). - contrast : array-like, brainstat.stats.terms.FixedEffect + contrast : array-like Vector of contrasts in the observations. surf : str, dict, BSPolyData, optional A surface provided as either a dictionary with keys 'tri' for its @@ -85,7 +85,7 @@ def __init__( """ # Input arguments. self.model = model - self.contrast = contrast + self.contrast = np.array(contrast) if isinstance(surf, str): self.surf_name = surf @@ -133,6 +133,10 @@ def fit(self, Y: np.ndarray) -> None: raise NotImplementedError( "One-tailed tests are not implemented for multivariate data." ) + if Y.shape[2] > 3 and "rft" in self.correction: + raise NotImplementedError( + "Random Field Theory corrections are not implemented for more than three variates." + ) student_t_test = Y.shape[2] == 1 else: student_t_test = True diff --git a/brainstat/stats/_multiple_comparisons.py b/brainstat/stats/_multiple_comparisons.py index afdabee2..e4041a90 100644 --- a/brainstat/stats/_multiple_comparisons.py +++ b/brainstat/stats/_multiple_comparisons.py @@ -229,6 +229,8 @@ def _random_field_theory(self) -> valid_rft_output: )[0] tlim = tlim[1] pval["P"] = pval["P"] * (self.t[0, :] > tlim) + (self.t[0, :] <= tlim) + if pval["C"] is not None: + pval["C"] = pval["C"].flatten() return pval, peak, clus, clusid @@ -916,20 +918,37 @@ def peak_clus( edg = voxid[edg[np.all(excurset[edg], 1), :]] nf = np.arange(1, n + 1) - # Find cluster id's in nf (from Numerical Recipes in C, page 346): - for el in range(1, edg.shape[0] + 1): - j = edg[el - 1, 0] - k = edg[el - 1, 1] - while nf[j - 1] != j: - j = nf[j - 1] - while nf[k - 1] != k: - k = nf[k - 1] - if j != k: - nf[j - 1] = k - - for j in range(1, n + 1): - while nf[j - 1] != nf[nf[j - 1] - 1]: - nf[j - 1] = nf[nf[j - 1] - 1] + # Find cluster id's in nf + not_used_indexes = set(np.arange(1, n + 1)) + + cluster_val = 1 + for i in range(1, n + 1): + if i not in not_used_indexes: + continue + in_cluster = set([i]) + not_used_indexes.remove(i) + + neighbours = ( + set(edg[np.isin(edg, list(in_cluster)).any(axis=1)].ravel()) + & not_used_indexes + ) + in_cluster = in_cluster | neighbours + + while True: + neighbours = ( + set(edg[np.isin(edg, list(neighbours)).any(axis=1)].ravel()) + & not_used_indexes + ) + not_used_indexes = not_used_indexes - neighbours + if len(neighbours) == 0: + break + else: + in_cluster = in_cluster | neighbours + + in_cluster_idx = [x - 1 for x in list(in_cluster)] + nf[in_cluster_idx] = cluster_val + + cluster_val = cluster_val + 1 vox = np.argwhere(excurset) + 1 ivox = np.argwhere(np.in1d(vox, lmvox)) + 1 diff --git a/brainstat/stats/_t_test.py b/brainstat/stats/_t_test.py index 9d3c6394..049412be 100644 --- a/brainstat/stats/_t_test.py +++ b/brainstat/stats/_t_test.py @@ -147,98 +147,32 @@ def _t_test(self) -> None: vf = np.divide(np.sum(np.square(np.dot(c.T, pinvX)), axis=1), self.df) self.sd = np.sqrt(vf * self.SSE[jj, :]) - if k == 2: - det = np.multiply(self.SSE[0, :], self.SSE[2, :]) - np.square( - self.SSE[1, :] - ) - - self.t = ( - np.multiply(np.square(self.ef[0, :]), self.SSE[2, :]) - + np.multiply(np.square(self.ef[1, :]), self.SSE[0, :]) - - np.multiply( - np.multiply(2 * self.ef[0, :], self.ef[1, :]), self.SSE[1, :] - ) - ) - - if k == 3: - det = ( - np.multiply( - self.SSE[0, :], - ( - np.multiply(self.SSE[2, :], self.SSE[5, :]) - - np.square(self.SSE[4, :]) - ), - ) - - np.multiply(self.SSE[5, :], np.square(self.SSE[1, :])) - + np.multiply( - self.SSE[3, :], - ( - np.multiply(self.SSE[1, :], self.SSE[4, :]) * 2 - - np.multiply(self.SSE[2, :], self.SSE[3, :]) - ), - ) - ) - - self.t = np.multiply( - np.square(self.ef[0, :]), - ( - np.multiply(self.SSE[2, :], self.SSE[5, :]) - - np.square(self.SSE[4, :]) - ), - ) - - self.t = self.t + np.multiply( - np.square(self.ef[1, :]), - ( - np.multiply(self.SSE[0, :], self.SSE[5, :]) - - np.square(self.SSE[3, :]) - ), - ) - - self.t = self.t + np.multiply( - np.square(self.ef[2, :]), - ( - np.multiply(self.SSE[0, :], self.SSE[2, :]) - - np.square(self.SSE[1, :]) - ), - ) - - self.t = self.t + np.multiply( - 2 * self.ef[0, :], - np.multiply( - self.ef[1, :], - ( - np.multiply(self.SSE[3, :], self.SSE[4, :]) - - np.multiply(self.SSE[1, :], self.SSE[5, :]) - ), - ), - ) - - self.t = self.t + np.multiply( - 2 * self.ef[0, :], - np.multiply( - self.ef[2, :], - ( - np.multiply(self.SSE[1, :], self.SSE[4, :]) - - np.multiply(self.SSE[2, :], self.SSE[3, :]) - ), - ), - ) - - self.t = self.t + np.multiply( - 2 * self.ef[1, :], - np.multiply( - self.ef[2, :], - ( - np.multiply(self.SSE[1, :], self.SSE[3, :]) - - np.multiply(self.SSE[0, :], self.SSE[4, :]) - ), - ), - ) - - if k > 3: - sys.exit("Hotelling" "s T for k>3 not programmed yet") - - self.t = np.multiply(np.divide(self.t, (det + (det <= 0))), (det > 0)) / vf - self.t = np.multiply(np.sqrt(self.t + (self.t <= 0)), (self.t > 0)) + # Initialize some variables + self.t = np.zeros(v) + indices_lower = np.tril_indices(self.k) + sse_indices = np.zeros((self.k, self.k), dtype=int) + sse_indices[indices_lower] = np.arange(0, self.k * (self.k + 1) / 2) + sse_indices += np.tril(sse_indices, -1).T + + M = np.zeros((self.k + 1, self.k + 1)) + M_ef = np.zeros((self.k + 1, self.k + 1), dtype=bool) + M_ef[1:, 0] = True + M_ef[0, 1:] = True + M_sse = np.zeros((self.k + 1, self.k + 1), dtype=bool) + M_sse[1:, 1:] = True + + ef_duplicate = np.concatenate((self.ef, self.ef), axis=0) + for i in range(v): + sse_vertex = self.SSE[:, i] + sse_matrix = sse_vertex[sse_indices] + + det_sse = np.linalg.det(sse_matrix) + if det_sse <= 0: + self.t[i] = 0 + else: + M[M_ef] = ef_duplicate[:, i] + M[M_sse] = sse_matrix.flatten() + + self.t[i] = np.sqrt(-np.linalg.det(M) / det_sse / vf) + self.t = np.atleast_2d(self.t) diff --git a/brainstat/stats/terms.py b/brainstat/stats/terms.py index f15226ee..7ff3f5e7 100644 --- a/brainstat/stats/terms.py +++ b/brainstat/stats/terms.py @@ -1,4 +1,5 @@ """Classes for fixed, mixed, and random effects.""" +import difflib import re from typing import Any, List, Optional, Sequence, Union @@ -484,12 +485,19 @@ def __init__( if v != 1: name_ran += f"{v}**2" else: - name = check_names(ran) - if name is not None and name_ran is None: - name_ran = name + if name_ran is None: + name = check_names(ran) + if name: + name_ran = name[0] + for i in range(1, len(name)): + sm = difflib.SequenceMatcher(None, name_ran, name[i]) + match = sm.find_longest_match( + 0, len(name_ran), 0, len(name[i]) + ) + name_ran = name_ran[match.a : match.a + match.size] + ran = ran @ ran.T ran = ran.values.ravel() - self.variance = FixedEffect(ran, names=name_ran, add_intercept=False) self.mean = FixedEffect(fix, names=name_fix, add_intercept=add_intercept) diff --git a/brainstat/tests/test_random_field_theory.py b/brainstat/tests/test_random_field_theory.py index 6b6bfa43..1d376d8d 100755 --- a/brainstat/tests/test_random_field_theory.py +++ b/brainstat/tests/test_random_field_theory.py @@ -30,6 +30,11 @@ def dummy_test(infile, expfile): efile = open(expfile, "br") expdic = pickle.load(efile) efile.close() + if expdic["pval"]["C"]: + print(expdic["pval"]) + expdic["pval"]["C"] = expdic["pval"][ + "C" + ].flatten() # Dimensions were modified since generation of test data. expected_output = (expdic["pval"], expdic["peak"], expdic["clus"], expdic["clusid"]) testout = [] diff --git a/brainstat/tutorial/utils.py b/brainstat/tutorial/utils.py index 77c07824..4d4aacfe 100644 --- a/brainstat/tutorial/utils.py +++ b/brainstat/tutorial/utils.py @@ -1,16 +1,57 @@ -import shutil -import warnings from pathlib import Path from typing import Optional, Sequence, Tuple, Union from urllib.error import HTTPError -from urllib.request import urlopen +import h5py import numpy as np import pandas as pd -from sklearn.utils import Bunch from tqdm import tqdm -from brainstat._utils import data_directories, read_data_fetcher_json +from brainstat._utils import ( + _download_file, + data_directories, + logger, + read_data_fetcher_json, +) + + +def fetch_mics_data( + data_dir: Optional[Union[str, Path]] = None, + overwrite: bool = False, +) -> Tuple[np.ndarray, pd.DataFrame]: + """Fetches MICS cortical thickness data. + + Parameters + ---------- + data_dir : str, pathlib.Path, optional + Path to store the MICS data, by default $HOME_DIR/brainstat_data/mics_data. + overwrite : bool, optional + If true overwrites existing data, by default False + + Returns + ------- + np.ndarray + Subject-by-vertex cortical thickness data on fsaverage5. + pd.DataFrame + Subject demographics. + """ + + data_dir = Path(data_dir) if data_dir else data_directories["MICS_DATA_DIR"] + data_dir.mkdir(exist_ok=True, parents=True) + demographics_file = data_dir / "mics_demographics.csv" + + demographics_url = read_data_fetcher_json()["mics_tutorial"]["demographics"] + _download_file(demographics_url["url"], demographics_file, overwrite) + df = pd.read_csv(demographics_file) + + thickness_file = data_dir / "mics_thickness.h5" + thickness_url = read_data_fetcher_json()["mics_tutorial"]["thickness"] + _download_file(thickness_url["url"], thickness_file, overwrite) + + with h5py.File(thickness_file, "r") as f: + thickness = f["thickness"][:] + + return thickness, df def fetch_abide_data( @@ -19,18 +60,40 @@ def fetch_abide_data( keep_control: bool = True, keep_patient: bool = True, overwrite: bool = False, -) -> Tuple[Bunch, Bunch]: - + min_rater_ok: int = 3, +) -> Tuple[np.ndarray, pd.DataFrame]: + """Fetches ABIDE cortical thickness data. + + Parameters + ---------- + data_dir : str, pathlib.Path, optional + Path to store the MICS data, by default $HOME_DIR/brainstat_data/mics_data. + sites : list, tuple, optional + List of sites to include. If none, uses all sites, by default None. + keep_control : bool, optional + If true keeps control subjects, by default True. + keep_patient : bool, optional + If true keeps patient subjects, by default True. + overwrite : bool, optional + If true overwrites existing data, by default False. + min_rater_ok : int, optional + Minimum number of raters who approved the data, by default 3. + + Returns + ------- + np.ndarray + Subject-by-vertex cortical thickness data on fsaverage5. + pd.DataFrame + Subject demographics. + """ data_dir = Path(data_dir) if data_dir else data_directories["ABIDE_DATA_DIR"] data_dir.mkdir(exist_ok=True, parents=True) summary_spreadsheet = data_dir / "summary_spreadsheet.csv" - if not summary_spreadsheet.is_file(): - summary_url = read_data_fetcher_json()["abide_tutorial"]["summary_spreadsheet"] - _download_file(summary_spreadsheet, summary_url["url"]) - + summary_url = read_data_fetcher_json()["abide_tutorial"]["summary_spreadsheet"] + _download_file(summary_url["url"], summary_spreadsheet, overwrite) df = pd.read_csv(summary_spreadsheet) - _select_subjects(df, sites, keep_patient, keep_control) + _select_subjects(df, sites, keep_patient, keep_control, min_rater_ok) # Download subject thickeness data def _thickness_url(derivative, identifier): @@ -50,9 +113,9 @@ def _thickness_url(derivative, identifier): f"native_rms_rsl_tlink_30mm_{hemi}", row.FILE_ID ) try: - _download_file(filename, thickness_url) + _download_file(thickness_url, filename, overwrite, verbose=False) except HTTPError: - warnings.warn(f"Could not download file for {row.SUB_ID}.") + logger.warn(f"Could not download file for {row.SUB_ID}.") remove_rows.append(i) continue @@ -66,18 +129,14 @@ def _thickness_url(derivative, identifier): return thickness_data, df -def _download_file(filename: Path, url: str) -> None: - if not filename.is_file(): - with urlopen(url) as u, open(filename, "wb") as f: - shutil.copyfileobj(u, f) - - def _select_subjects( df: pd.DataFrame, sites: Optional[Sequence[str]], keep_patient: bool, keep_control: bool, + min_rater_ok: int, ) -> None: + """Selects subjects based on demographics and site.""" df.drop(df[df.FILE_ID == "no_filename"].index, inplace=True) if not keep_patient: df.drop(df[df.DX_GROUP == 1].index, inplace=True) @@ -88,4 +147,12 @@ def _select_subjects( if sites is not None: df.drop(df[~df.SITE_ID.isin(sites)].index, inplace=True) + if min_rater_ok > 0: + rater_approved = ( + df[["qc_rater_1", "qc_anat_rater_2", "qc_anat_rater_3"]] + .eq("OK") + .sum(axis=1) + ) + df.drop(df[rater_approved < min_rater_ok].index, inplace=True) + df.reset_index(inplace=True) diff --git a/brainstat_matlab/+brainstat_utils/get_brainstat_directories.m b/brainstat_matlab/+brainstat_utils/get_brainstat_directories.m index 50bc61e4..bf7dcc62 100644 --- a/brainstat_matlab/+brainstat_utils/get_brainstat_directories.m +++ b/brainstat_matlab/+brainstat_utils/get_brainstat_directories.m @@ -24,6 +24,9 @@ case 'genetic_data_dir' directory = brainstat_utils.get_brainstat_directories('brainstat_data_dir') ... + filesep + "genetic_data"; + case 'mics_data_dir' + directory = brainstat_utils.get_brainstat_directories('brainstat_data_dir') ... + + filesep + "mics_data"; case 'neurosynth_data_dir' directory = brainstat_utils.get_brainstat_directories('brainstat_data_dir') ... + filesep + "neurosynth_data"; diff --git a/brainstat_matlab/context/meta_analytic_decoder.m b/brainstat_matlab/context/meta_analytic_decoder.m index afbad0c9..d4f671ab 100644 --- a/brainstat_matlab/context/meta_analytic_decoder.m +++ b/brainstat_matlab/context/meta_analytic_decoder.m @@ -31,7 +31,7 @@ % these packages were integral to generating the data used here. arguments - stat_data {mustBeNumeric} + stat_data (:, 1) {mustBeNumeric} options.template (1,:) char = 'fsaverage5' options.interpolation (1,:) char = 'nearest' options.data_dir (1,1) string = brainstat_utils.get_brainstat_directories('neurosynth_data_dir'); @@ -51,17 +51,14 @@ mask = interpolated_volume ~= 0; neurosynth_files = fetch_neurosynth_data(options.data_dir); -feature_names = regexp(neurosynth_files, '__[0-9a-zA-Z]+', 'match', 'once'); +feature_names = regexp(neurosynth_files, '__[0-9a-zA-Z ]+', 'match', 'once'); feature_names = cellfun(@(x) x(3:end), feature_names, 'Uniform', false); pearsons_r = zeros(numel(neurosynth_files), 1); for ii = 1:numel(neurosynth_files) neurosynth_volume = read_volume(neurosynth_files{ii}); mask_inf = mask & ~isinf(neurosynth_volume); - pearsons_r(ii) = corr(interpolated_volume(mask_inf), neurosynth_volume(mask_inf)); - if isnan(pearsons_r(ii)) - keyboard; - end + pearsons_r(ii) = corr(interpolated_volume(mask_inf), neurosynth_volume(mask_inf), 'rows', 'pairwise'); end if options.ascending diff --git a/brainstat_matlab/context/read_histology_profile.m b/brainstat_matlab/context/read_histology_profile.m index 34ef9850..9529652d 100644 --- a/brainstat_matlab/context/read_histology_profile.m +++ b/brainstat_matlab/context/read_histology_profile.m @@ -16,6 +16,13 @@ options.overwrite (1,1) logical = false end +if contains(options.template, 'civet') + civet_template = options.template; + options.template = "fsaverage"; +else + civet_template = ''; +end + if ~isfolder(options.data_dir) mkdir(options.data_dir) end @@ -25,8 +32,13 @@ download_histology_profiles(histology_file, options.template) end -profiles = double(h5read(histology_file, "/" + options.template)); - +if isempty(civet_template) + profiles = double(h5read(histology_file, "/" + options.template)); +else + warning('CIVET histology profiles were not included with BrainStat. Interpolating from fsaverage.') + profiles_fsaverage = double(h5read(histology_file, "/fsaverage")); + profiles = mesh_interpolate(profiles_fsaverage, 'fsaverage', civet_template); +end end function download_histology_profiles(histology_file, template) diff --git a/brainstat_matlab/context/yeo_networks_association.m b/brainstat_matlab/context/yeo_networks_association.m new file mode 100644 index 00000000..7052d21a --- /dev/null +++ b/brainstat_matlab/context/yeo_networks_association.m @@ -0,0 +1,43 @@ +function association = yeo_networks_association(data, options) +% YEO_NETWORKS_ASSOCIATION Computes the association with yeo networks. +% +% association = YEO_NETWORKS_ASSOCIATION(data, varargin) computes the +% mean of the datapoints within each Yeo network. Variable data must be a +% vector with length equal to the number of vertices in the surface +% template (default fsaverage5). +% +% Valid Name-Value pairs: +% seven_networks (logical): +% If true (default) uses the seven Yeo networks, otherwise uses +% the seventeen network parcellation. +% template (char): +% The surface template to use. Uses 'fsaverage5' by default. See +% fetch_parcellation for a list of valid templates. +% data_dir (string): +% Data directory to store the parcellation data. Defaults to +% $HOME_DIR/brainstat_data/parcellation_data +% reduction_operation (function_handle): +% Function to apply to the data within each network. This +% function should take a vector and return a scalar. Defaults to +% @(x) mean(x, 'omitnan'). + +arguments + data (:, 1) + options.seven_networks (1,1) logical = true + options.template (1,:) char = 'fsaverage5' + options.data_dir (1,1) string = brainstat_utils.get_brainstat_directories('parcellation_data_dir') + options.reduction_operation function_handle = @(x) mean(x, 'omitnan'); +end + +if options.seven_networks + n_networks = 7; +else + n_networks = 17; +end + +yeo_networks = fetch_parcellation(options.template, 'yeo', n_networks, ... + 'data_dir', options.data_dir); + +association = accumarray(yeo_networks+1, data, [], options.reduction_operation); +association(1) = []; % Remove undefined (undefined = yeo_networks==0) +end \ No newline at end of file diff --git a/brainstat_matlab/data/data_urls.json b/brainstat_matlab/data/data_urls.json index 0ec4851c..c23e8fc1 100644 --- a/brainstat_matlab/data/data_urls.json +++ b/brainstat_matlab/data/data_urls.json @@ -26,6 +26,23 @@ }, "civet41k": { "url": "https://box.bic.mni.mcgill.ca/s/9kzBetBCZkkqN6w/download" + }, + "fsaverage": { + "url": "https://box.bic.mni.mcgill.ca/s/XiZx9HfHaufb4kD/download" + }, + "fsaverage5": { + "url": "https://box.bic.mni.mcgill.ca/s/jsSDyiDcm9KEQpf/download" + }, + "fslr32k": { + "url": "https://box.bic.mni.mcgill.ca/s/cFXCrSkfiJFjUJ0/download" + } + }, + "mics_tutorial": { + "demographics": { + "url": "https://box.bic.mni.mcgill.ca/s/7bW9JIpvQvSJeuf/download" + }, + "thickness": { + "url": "https://box.bic.mni.mcgill.ca/s/kMi6lU91piwdaCf/download" } }, "neurosynth_precomputed": { diff --git a/brainstat_matlab/datasets/+dataset_utils/download_OSF_files.m b/brainstat_matlab/datasets/+dataset_utils/download_OSF_files.m index d77a5139..6c8f5a49 100644 --- a/brainstat_matlab/datasets/+dataset_utils/download_OSF_files.m +++ b/brainstat_matlab/datasets/+dataset_utils/download_OSF_files.m @@ -3,7 +3,7 @@ arguments template (1,:) char - options.data_dir (1,1) string {mustBeFolder} = brainstat_utils.get_brainstat_directories('surface_data_dir'); + options.data_dir (1,1) string = brainstat_utils.get_brainstat_directories('surface_data_dir'); options.parcellation char = '' end diff --git a/brainstat_matlab/datasets/fetch_abide_data.m b/brainstat_matlab/datasets/fetch_abide_data.m index fcf11542..bfa9c1c6 100644 --- a/brainstat_matlab/datasets/fetch_abide_data.m +++ b/brainstat_matlab/datasets/fetch_abide_data.m @@ -15,6 +15,9 @@ % If true, keeps patients, defaults to false. % 'overwrite' % If true, overwrites older files. Defaults to false. +% 'min_rater_ok' +% Number of raters that must OK a subject's anatomical files. +% Defaults to 3. arguments options.data_dir (1,1) string = brainstat_utils.get_brainstat_directories('abide_data_dir') @@ -22,6 +25,7 @@ options.keep_control (1,1) logical = true options.keep_patient (1,1) logical = true options.overwrite (1,1) logical = false + options.min_rater_ok (1,1) uint8 = 3 end if ~exist(options.data_dir, 'dir') @@ -35,7 +39,8 @@ end demographics = readtable(summary_spreadsheet, 'PreserveVariableNames', true); -demographics = select_subjects(demographics, options.sites, options.keep_patient, options.keep_control); +demographics = select_subjects(demographics, options.sites, ... + options.keep_patient, options.keep_control, options.min_rater_ok); thickness = zeros(size(demographics,1), 81924); keep_rows = ones(size(demographics,1), 1, 'logical'); @@ -59,7 +64,7 @@ end end end - thickness(ii, 1 + (jj-1) * 40962 : jj * 40962) = dlmread(filename); + thickness(ii, 1 + (jj-1) * 40962 : jj * 40962) = readmatrix(filename); end end @@ -68,7 +73,7 @@ end -function demographics = select_subjects(demographics, sites, keep_patient, keep_control) +function demographics = select_subjects(demographics, sites, keep_patient, keep_control, min_rater_ok) % Get a subselection of subjects. if ~keep_patient demographics = demographics(demographics.DX_GROUP ~= 1, :); @@ -78,9 +83,14 @@ demographics = demographics(demographics.DX_GROUP ~= 2, :); end -if ~isempty(sites) +if ~all(sites == "") demographics = demographics(ismember(demographics.SITE_ID, upper(sites)), :); end + +if min_rater_ok > 0 + rater_ok = sum([demographics.qc_rater_1, demographics.qc_anat_rater_2, demographics.qc_anat_rater_3] == "OK", 2); + demographics = demographics(rater_ok >= min_rater_ok, :); +end end function url = thickness_url(derivative, identifier) diff --git a/brainstat_matlab/datasets/fetch_gradients.m b/brainstat_matlab/datasets/fetch_gradients.m index 202509e0..dbf74cb5 100644 --- a/brainstat_matlab/datasets/fetch_gradients.m +++ b/brainstat_matlab/datasets/fetch_gradients.m @@ -29,4 +29,11 @@ websave(gradients_file, json.gradients.(name).url); end -gradients = h5read(gradients_file, "/" + template); \ No newline at end of file +if contains(template, 'civet') + gradients_fs = h5read(gradients_file, "/fsaverage"); + gradients = mesh_interpolate(gradients_fs, 'fsaverage', template); +else + gradients = h5read(gradients_file, "/" + template); +end + + diff --git a/brainstat_matlab/datasets/fetch_mask.m b/brainstat_matlab/datasets/fetch_mask.m index da9d6f81..c8ffcaef 100644 --- a/brainstat_matlab/datasets/fetch_mask.m +++ b/brainstat_matlab/datasets/fetch_mask.m @@ -1,7 +1,18 @@ function varargout = fetch_mask(template, options) -% FETCH_CIVET_MASK fetches masks for the midline. -% mask = FETCH_CIVET_MASK(template, varargin) fetches a mask for the CIVET41k or -% CIVET164k templates. Other templates are yet to be added. +% FETCH_MASK fetches masks for the midline. +% mask = FETCH_MASK(template, varargin) fetches a cortical mask for a +% surface template. Valid templates are civet41k, civet164k, fslr32k, +% fsaverage5, and fsaverage. +% +% Valid name-value pairs: +% 'data_dir' +% Location to store the data. Defaults to +% $HOME_DIR/brainstat_data/surface_data. +% 'join' +% If true, returns a single mask. If false, returns one for each +% hemisphere. Defaults to true. +% 'overwrite' +% If true, overwrites existing data. Defaults to false. arguments template (1,1) string @@ -10,6 +21,7 @@ options.overwrite (1,1) logical = false end +template = lower(template); mask_file = options.data_dir + filesep + template + "_mask.csv"; if ~exist(mask_file, 'file') || options.overwrite @@ -17,7 +29,7 @@ websave(mask_file, json.masks.(template).url); end -mask = logical(dlmread(mask_file)); +mask = logical(readmatrix(mask_file)); if options.join varargout = {mask}; diff --git a/brainstat_matlab/datasets/fetch_mics_data.m b/brainstat_matlab/datasets/fetch_mics_data.m new file mode 100644 index 00000000..a2b830d3 --- /dev/null +++ b/brainstat_matlab/datasets/fetch_mics_data.m @@ -0,0 +1,37 @@ +function [thickness, demographics] = fetch_mics_data(options) +% FETCH_MICS_DATA fetches thickness and demographic MICS data +% [thickness, demographics] = FETCH_MICS_DATA(data_dir) fetches MICS +% cortical thickness and demographics data. The following name-value +% pairs are allowed: +% +% 'data_dir' +% The directory to save the data. Defaults to +% $HOME/brainstat_data/mics_data +% 'overwrite' +% If true, overwrites older files. Defaults to false. + +arguments + options.data_dir (1,1) string = brainstat_utils.get_brainstat_directories('mics_data_dir') + options.overwrite (1,1) logical = false +end + +if ~exist(options.data_dir, 'dir') + mkdir(options.data_dir) +end + +demographics_file = options.data_dir + filesep + "mics_demographics.csv"; +if ~exist(demographics_file, 'file') || options.overwrite + json = brainstat_utils.read_data_fetcher_json(); + websave(demographics_file, json.mics_tutorial.demographics.url, ... + weboptions('TimeOut', 10)); +end +demographics = readtable(demographics_file, 'PreserveVariableNames', true); + +thickness_file = options.data_dir + filesep + "mics_thickness.h5"; +if ~exist(thickness_file, 'file') || options.overwrite + json = brainstat_utils.read_data_fetcher_json(); + websave(thickness_file, json.mics_tutorial.thickness.url); +end +thickness = h5read(thickness_file, '/thickness')'; + +end diff --git a/brainstat_matlab/datasets/fetch_parcellation.m b/brainstat_matlab/datasets/fetch_parcellation.m index 264c308a..3f040b4c 100644 --- a/brainstat_matlab/datasets/fetch_parcellation.m +++ b/brainstat_matlab/datasets/fetch_parcellation.m @@ -3,8 +3,7 @@ % parcellation = FETCH_PARCELLATION(template, atlas, n_regions, varargin) % downloads and loads the 'cammoun', 'glasser', 'schaefer', or 'yeo', % atlas on 'fsaverage5', 'fsaverage6', 'fsaverage', or 'fslr32k' (a.k.a. -% conte69) surface template. Yeo-7 networks are also supported on 'civet41k' -% and 'civet164k' +% conte69), 'civet41k', and 'civet164k' surface template. % % Supported number of regions for each atlas are as follows: % Cammoun: 33, 60, 125, 250, 500. @@ -29,8 +28,12 @@ atlas = lower(atlas); +civet_atlas = ''; if atlas == "conte69" atlas = 'fslr32k'; +elseif contains(atlas, 'civet') + civet_atlas = atlas; + atlas = 'fsaverage'; end switch lower(atlas) @@ -53,6 +56,11 @@ otherwise error("Unknown atlas: '%s'.", atlas) end + +if ~isempty(civet_atlas) + warning('CIVET parcellations were not included with the toolbox, interpolating from fsaverage.'); + parcellation = mesh_interpolate(parcellation, atlas, civet_atlas, 'data_dir', options.data_dir); +end end function labels = read_parcellation_from_targz(filename, template, atlas, n_regions, seven_networks) diff --git a/brainstat_matlab/io/read_volume.m b/brainstat_matlab/io/read_volume.m index cb590715..8ef409fb 100644 --- a/brainstat_matlab/io/read_volume.m +++ b/brainstat_matlab/io/read_volume.m @@ -3,8 +3,6 @@ % [volume, header] = READ_VOLUME(file) reads the nifti/minc volume in the % input file. Returns the image data in "volume" and header data in % "header". -% -% ADD LINK TO BRAINSTAT DOCUMENTATION. arguments file (1,:) char diff --git a/brainstat_matlab/mesh/mesh_interpolate.m b/brainstat_matlab/mesh/mesh_interpolate.m new file mode 100644 index 00000000..9bc41b8e --- /dev/null +++ b/brainstat_matlab/mesh/mesh_interpolate.m @@ -0,0 +1,68 @@ +function interpolated_data = mesh_interpolate(data, source, target, options) +% MESH_INTERPOLATE Interpolates data between two meshes. +% +% interpolated_data = MESH_INTERPOLATE(data, source, target, options) +% interpolates the data on the source template to the target template +% using a nearest neighbor interpolation. Variable data must be a matrix +% with size(data, 1) equal to the number of vertices in the source template. Source +% and target must either be surface structures containing an n-by-3 field +% called vertices, or a char containing the name of a valid template for +% fetch_surface_template(). +% +% Valid Name-Value pairs: +% data_dir: +% Data directory for the surface templates. Used only if source +% or target are provided as char. Defaults to +% $HOME_DIR/brainstat_data/surface_data +% interpolation: +% Type of interpolation to use. Only 'nearest_neighbor' is +% currently implemented. Defaults to 'nearest_neighbor'. + +%% Handle input arguments. +arguments + data + source + target + options.data_dir = brainstat_utils.get_brainstat_directories('surface_data_dir') + options.interpolation (1, :) char {isValidInterpolation} = 'nearest_neighbor' +end + +if isstring(source); source = char(source); end +if isstring(target); target = char(target); end + +if ischar(source) + source = fetch_template_surface(source, 'data_dir', options.data_dir, 'join', true); +end + +if ischar(target) + target = fetch_template_surface(target, 'data_dir', options.data_dir, 'join', true); +end + +if size(source.vertices, 1) ~= size(data,1) + error('BrainStat:invalidTemplate', ... + 'Number of vertices in the source template and the size of data''s first dimension are not equal.') +end + +%% Interpolate. +switch lower(options.interpolation) + case 'nearest_neighbor' + mapping = knnsearch(source.vertices, target.vertices); + subselect = repmat({':'}, ndims(data), 1); + subselect{1} = mapping; + interpolated_data = data(subselect{:}); + otherwise + error('Invalid interpolation type. Note: this error should''ve been caught earlier, please notify the BrainStat developers.'); +end +end + +%% Validation function. +function isValidInterpolation(interpolation) + valid_interpolations = {'nearest_neighbor'}; + if ~ismember(interpolation, valid_interpolations) + add_apostrophe = cellfun(@(x) ['''' x ''''], valid_interpolations, 'Uniform', false); + eid = 'BrainStat:InvalidInterpolation'; + msg = ['''' interpolation ''' is not a valid interpolation. Valid options are: ' ... + strjoin(add_apostrophe, ', ') '.']; + throwAsCaller(MException(eid, msg)); + end +end diff --git a/brainstat_matlab/stats/@SLM/SLM.m b/brainstat_matlab/stats/@SLM/SLM.m index 7c6d942c..4e192d84 100644 --- a/brainstat_matlab/stats/@SLM/SLM.m +++ b/brainstat_matlab/stats/@SLM/SLM.m @@ -128,6 +128,11 @@ function fit(obj, Y) error('One-tailed tests are not implemented for multivariate data.'); end end + + if size(Y, 3) > 3 && contains(obj.correction, 'rft') + error('RFT corrections have not been implemented for more than 3 variates.') + end + student_t_test = size(Y,3) == 1; obj.reset_fit_parameters(); @@ -338,7 +343,7 @@ function debug_set(obj, varargin) elseif ~isempty(fieldnames(surf)) surf_out = io_utils.convert_surface(surf, 'format', 'SurfStat'); else - surf_out = []; + surf_out = struct(); end end diff --git a/brainstat_matlab/stats/@SLM/t_test.m b/brainstat_matlab/stats/@SLM/t_test.m index f76894fa..ddcf4ce4 100755 --- a/brainstat_matlab/stats/@SLM/t_test.m +++ b/brainstat_matlab/stats/@SLM/t_test.m @@ -91,29 +91,41 @@ function t_test(obj) jj=j.*(j+1)/2; vf=sum((c'*pinvX).^2,2)/obj.df; obj.sd=sqrt(vf*obj.SSE(jj,:)); - if k==2 - det=obj.SSE(1,:).*obj.SSE(3,:)-obj.SSE(2,:).^2; - obj.t=obj.ef(1,:).^2.*obj.SSE(3,:) + ... - obj.ef(2,:).^2.*obj.SSE(1,:) - ... - 2*obj.ef(1,:).*obj.ef(2,:).*obj.SSE(2,:); - end - if k==3 - det=obj.SSE(1,:).*(obj.SSE(3,:).*obj.SSE(6,:)-obj.SSE(5,:).^2) - ... - obj.SSE(6,:).*obj.SSE(2,:).^2 + ... - obj.SSE(4,:).*(obj.SSE(2,:).*obj.SSE(5,:)*2-obj.SSE(3,:).*obj.SSE(4,:)); - obj.t= obj.ef(1,:).^2.*(obj.SSE(3,:).*obj.SSE(6,:)-obj.SSE(5,:).^2); - obj.t=obj.t+obj.ef(2,:).^2.*(obj.SSE(1,:).*obj.SSE(6,:)-obj.SSE(4,:).^2); - obj.t=obj.t+obj.ef(3,:).^2.*(obj.SSE(1,:).*obj.SSE(3,:)-obj.SSE(2,:).^2); - obj.t=obj.t+2*obj.ef(1,:).*obj.ef(2,:).*(obj.SSE(4,:).*obj.SSE(5,:)-obj.SSE(2,:).*obj.SSE(6,:)); - obj.t=obj.t+2*obj.ef(1,:).*obj.ef(3,:).*(obj.SSE(2,:).*obj.SSE(5,:)-obj.SSE(3,:).*obj.SSE(4,:)); - obj.t=obj.t+2*obj.ef(2,:).*obj.ef(3,:).*(obj.SSE(2,:).*obj.SSE(4,:)-obj.SSE(1,:).*obj.SSE(5,:)); - end - if k>3 - warning('Hotelling''s T for k>3 not programmed yet'); - return + + % Initialize a bunch of stuff to increase performance. + % If you're reading the following, I'm sorry. + % Optimization isn't always the most legible. + % Suffice to say initializing matrices is slow. - RV + obj.t = zeros(1, size(obj.SSE,2)); + + upper_tri = triu(ones(obj.k, 'logical')); + sse_indices = zeros(obj.k); + sse_indices(upper_tri) = 1:sum(upper_tri(:)); + sse_indices = sse_indices + triu(sse_indices, 1)'; + + M = zeros(obj.k+1); + + M_ef = zeros(obj.k+1, 'logical'); + M_ef(2:end, 1) = true; + M_ef(1, 2:end) = true; + + M_sse = zeros(obj.k+1, 'logical'); + M_sse(2:end, 2:end) = true; + + ef_duplicate = [obj.ef; obj.ef]; % Trust me - this provides a significant speed boost. + for ii = 1:size(obj.SSE, 2) + sse_vertex = obj.SSE(:,ii); + sse_matrix = sse_vertex(sse_indices); + det_sse = det(sse_matrix); + if det_sse <=0 + obj.t(ii) = 0; + else + M(M_ef) = ef_duplicate(:,ii); + M(M_sse) = sse_matrix; + + obj.t(ii) = sqrt(-det(M) / det_sse / vf); + end end - obj.t=obj.t./(det+(det<=0)).*(det>0)/vf; - obj.t=sqrt(obj.t+(obj.t<=0)).*(obj.t>0); end end diff --git a/brainstat_matlab/tutorials/tutorial_1_statistics.mlx b/brainstat_matlab/tutorials/tutorial_1_statistics.mlx index d4acd40f..181a81d0 100644 Binary files a/brainstat_matlab/tutorials/tutorial_1_statistics.mlx and b/brainstat_matlab/tutorials/tutorial_1_statistics.mlx differ diff --git a/brainstat_matlab/tutorials/tutorial_2_context.mlx b/brainstat_matlab/tutorials/tutorial_2_context.mlx index d7ef2378..115bf33c 100644 Binary files a/brainstat_matlab/tutorials/tutorial_2_context.mlx and b/brainstat_matlab/tutorials/tutorial_2_context.mlx differ diff --git a/docs/matlab/API/fetch_abide_data.rst b/docs/matlab/API/fetch_abide_data.rst index c65de853..698d5ecf 100644 --- a/docs/matlab/API/fetch_abide_data.rst +++ b/docs/matlab/API/fetch_abide_data.rst @@ -31,4 +31,6 @@ sites' 'keep_patient' - If true, keeps patients, defaults to false. 'overwrite' - - If true, overwrites older files. Defaults to false. \ No newline at end of file + - If true, overwrites older files. Defaults to false. +'min_rater_ok' + - Number of raters that must OK a subject's anatomical files. Defaults to 3. \ No newline at end of file diff --git a/docs/matlab/tutorials/tutorial_1_statistics.html b/docs/matlab/tutorials/tutorial_1_statistics.html index 9b71d53b..ed1b172d 100644 --- a/docs/matlab/tutorials/tutorial_1_statistics.html +++ b/docs/matlab/tutorials/tutorial_1_statistics.html @@ -1,13 +1,12 @@ -Untitled

Tutorial 1: Statistics Module

In this tutorial you will set up your first linear model with BrainStat. To this end, we will load some sample data from the ABIDE dataset. Note that,contrary to the results shown in our manuscript, we are only using a few sites to reduce computation time in this tutorial. As such the results shown here differ from those reported in our manuscript. To get identical results, simply set `sites` to `nan`.
But before we get started, please note that this tutorial requires that you have BrainStat and all its dependencies installed. For more details see our installation guide.
sites = ["PITT", "OLIN", "OHSU"];
[cortical_thickness, demographics] = fetch_abide_data('sites', sites);
[surf_left, surf_right] = fetch_template_surface('civet41k');
mask = fetch_mask('civet41k');
Lets have a look at the cortical thickness data. To do this, we will use the surface plotter included with BrainSpace. Note that we use a recent version of BrainSpace in this tutorial (released on 16 July, 2021). If you've installed an older version please update before continuing. Lets plot mean thickness. Note that we've defined a function, pretty_plot, at the end of this file to make the plots a little prettier when they're embedded inside live scripts. If you run this tutorial outside of the live scripts you'll want to comment out the pretty_plot lines.
obj = plot_hemispheres(...
mean(cortical_thickness)', ...
{surf_left, surf_right}, ...
'labeltext', {{'Cortical', 'Thickness'}} ...
);
pretty_plot(obj);
Next, lets see whether cortical thickness is related to age in our sample data. To this end we can create a linear model with BrainStat. First we declare the behavioral variables as FixedEffects. Once, that's done we can create the model by simply adding the terms together. Lets set up a model with age and patient status as fixed effects.
% Change the encoding of patient/control from 1/2 to strings.
conversion = ["Patient"; "Control"];
demographics.DX_GROUP = conversion(demographics.DX_GROUP);
term_age = FixedEffect(demographics.AGE_AT_SCAN, 'Age');
term_patient = FixedEffect(demographics.DX_GROUP);
model = term_age + term_patient;
disp(model)
116×4 FixedEffect array with properties: - - names: ["Age" "intercept" "Control" "Patient"] - matrix: [116×4 double]
As you can see, model contains two properties: the names of each variable and a matrix containing the effects in a observation-by-variable matrix.
Beside simple fixed effects, we may also be interested in interaction effects. We can add these to the model by multiplying terms. Lets create a model with an interaction between age and patient status.
model_interaction = 1 + term_age + term_patient + ...
term_age * term_patient;
Now, imagine we have some cortical marker (e.g. cortical thickness) for each subject, and we want to evaluate whether this marker changes with age whilst correcting for effects of patient status. To do this, we can use the model we defined before, and a contrast in observations (here: age). Then we simply initialize an SLM model and fit it to the cortical thickness data. Note that the fitting may take a few minutes.
contrast_age = demographics.AGE_AT_SCAN;
BrainStatModel = SLM( ...
model, ...
contrast_age, ...
'surf', 'civet41k', ...
'correction', 'rft', ...
'mask', mask);
BrainStatModel.fit(cortical_thickness);
We can access the outcome of this model through its properties. Lets plot the t-values and the vertexwise p-values derived from a random field theory correction.
to_plot1 = [BrainStatModel.t', BrainStatModel.P.pval.P'];
to_plot1(~mask,:) = inf;
plot1 = plot_hemispheres(...
to_plot1, ...
{surf_left, surf_right}, ...
'labeltext', {'t-values', 'p-values'} ...
);
plot1.colormaps({[parula; .7 .7 .7], [flipud(hot); .7 .7 .7]});
plot1.colorlimits([-6, 4; 0, 0.05]);
pretty_plot(plot1);
BrainStat also allows for assessing significant clusters and peaks. The data on clusters are stored in tables inside BrainStatModel.P.clus and information on the peaks is stored in BrainStatModel.P.peak. If a two-tailed test is run (BrainStat defaults to two-tailed), a table is returned for each tail. The first table uses the contrast as provided, the second table uses the inverse contrast. If a one-tailed test is performed, then only a single table is returned. Lets print the first 15 rows of the inverted contrast cluster table.
disp(BrainStatModel.P.clus{2}(1:15, :))
clusid nverts resels P - ______ ______ ________ __________ - - 1 8738 20.96 7.4502e-08 - 2 1088 1.597 0.00061997 - 3 492 0.64076 0.03337 - 4 431 0.54829 0.050165 - 5 64 0.21429 0.21632 - 6 24 0.153 0.27915 - 7 98 0.13461 0.3008 - 8 43 0.11565 0.32453 - 9 9 0.11245 0.32867 - 10 21 0.083554 0.368 - 11 2 0.041479 0.43119 - 12 2 0.039791 0.43387 - 13 13 0.027494 0.45367 - 14 1 0.023942 0.45949 - 15 1 0.022462 0.46193
Here, we see that cluster 1 contains 8738 vertices and is significant at a p-value of 7.45e-08. Clusters are sorted by p-value; later clusters will generally be smaller and have higher p-values. Lets now have a look at the peaks within these clusters.
disp(BrainStatModel.P.peak{2}(1:15, :))
t vertid clusid P yeo7 - ______ ______ ______ __________ _________ - - 6.3155 72883 1 7.3046e-06 "Visual" - 6.2384 72887 1 1.0292e-05 "Visual" - 5.8728 79402 1 5.0832e-05 "Visual" - 5.872 79524 1 5.1004e-05 "Visual" - 5.8695 79387 1 5.1545e-05 "Visual" - 5.8315 41573 1 6.0726e-05 "Visual" - 5.7759 79361 1 7.6883e-05 "Visual" - 5.747 79346 1 8.6891e-05 "Visual" - 5.4913 43386 1 0.0002529 "Visual" - 5.4565 79553 1 0.00029191 "Visual" - 5.4314 50635 1 0.00032245 "Visual" - 5.4171 71146 1 0.00034265 "Default" - 5.3522 71160 1 0.00044638 "Default" - 5.3499 71166 1 0.00045032 "Default" - 5.3112 71173 1 0.00052691 "Default"
Within cluster 1, we are able to detect several peaks. The most significant peak (t=6.3155) occurs at vertex 72883, which is inside the visual network as defined by the Yeo-7 networks. Note that the Yeo network membership is only provided if the surface is specified as a template name as we did here. For custom surfaces, or pre-loaded surfaces (as we will use below) this column is omitted.
By default BrainStat uses a two-tailed test. If you want to get a one-tailed test, simply specify it in the SLM model initialization with 'two_tailed', false. Note that the one-tailed test will test for positive t-values. If you want to test for negative t-values, simply invert the contrast. We may hypothesize based on prior research that cortical thickness decreases with age, so we could specify this as follows. Note the minus in front of contrast_age to test for decreasing thickness with age.
BrainStatModel_onetailed = SLM(...
model, ...
-contrast_age, ...
'correction', 'rft', ...
'surf', {surf_left, surf_right}, ...
'mask', mask, ...
'two_tailed', false ...
);
BrainStatModel_onetailed.fit(cortical_thickness);
to_plot2 = [BrainStatModel_onetailed.t', ...
BrainStatModel_onetailed.P.pval.P'];
to_plot2(~mask,:) = inf;
plot2 = plot_hemispheres(...
to_plot2, ...
{surf_left, surf_right}, ...
'labeltext', {'t-values', 'p-values'} ...
);
plot2.colormaps({[parula; .7 .7 .7], [flipud(hot); .7 .7 .7]});
plot2.colorlimits([-4, 6; 0, 0.05]);
pretty_plot(plot2)
Similarly, we could also use the patient/control status as a contrast, instead of age. This would work as follows:
contrast_patient = model.Patient - model.Control;
BrainStatModel_patient = SLM( ...
model, ...
contrast_patient, ...
'surf', 'civet41k', ...
'correction', 'rft', ...
'mask', mask);
BrainStatModel_patient.fit(cortical_thickness);
Now, imagine that instead of using a fixed effects model, you would prefer a mixed effects model wherein the scanning site is a random variable. This is simple to set up. All you need to do is initialize the site term with the MixedEffect class, all other code remains identical.
random_site = MixedEffect(demographics.SITE_ID);
model_random = term_age + term_patient + random_site;
BrainStatModel_random = SLM(model_random, contrast_age, ...
'surf', {surf_left, surf_right}, 'mask', mask, 'correction', 'rft');
BrainStatModel_random.fit(cortical_thickness)
to_plot3 = [BrainStatModel_random.t', BrainStatModel_random.P.pval.P'];
to_plot3(~mask,:) = inf;
plot3 = plot_hemispheres(...
to_plot3, ...
{surf_left, surf_right}, ...
'labeltext', {'t-values', 'p-values'} ...
);
plot3.colormaps({[parula; .7 .7 .7], [flipud(hot); .7 .7 .7]});
plot3.colorlimits([-4, 6; 0, 0.05]);
pretty_plot(plot3);
It appears that, after correction for site as a random variable, the significant results are far more localized. That concludes the basic usage of the BrainStat for statistical models. In the next tutorial we'll show you how to use the context decoding module.
function pretty_plot(obj)
% Makes the BrainSpace plots prettier inside live scripts.
obj.handles.figure.Units = 'pixels';
set(obj.handles.axes, 'Units', 'pixels')
set(obj.handles.colorbar, 'Units', 'pixels');
obj.handles.figure.Position = [1, 1, 700, size(obj.data, 2) * 95];
set(obj.handles.text, 'FontSize', 12);
for jj = 1:size(obj.data, 2) % variates
obj.handles.colorbar(jj).Position = [...
500, ...
30 + (size(obj.data, 2) * 95) - 10 - jj*90, ...
5, ...
50];
for ii = 1:4 % views
obj.handles.axes(jj, ii).Position = [...
75 + (ii-1)*105, ...
(size(obj.data, 2) * 95) - jj*95, ...
100, ...
100];
end
end
end
+.S7 { border-left: 1px solid rgb(233, 233, 233); border-right: 1px solid rgb(233, 233, 233); border-top: 1px solid rgb(233, 233, 233); border-bottom: 0px none rgb(0, 0, 0); border-radius: 0px; padding: 6px 45px 0px 13px; line-height: 17.234px; min-height: 18px; white-space: nowrap; color: rgb(0, 0, 0); font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-size: 14px; } +.S8 { margin: 10px 10px 9px 4px; padding: 0px; line-height: 21px; min-height: 0px; white-space: pre-wrap; color: rgb(0, 0, 0); font-family: Helvetica, Arial, sans-serif; font-style: normal; font-size: 14px; font-weight: 400; text-align: left; } +.S9 { margin: 15px 10px 5px 4px; padding: 0px; line-height: 18px; min-height: 0px; white-space: pre-wrap; color: rgb(60, 60, 60); font-family: Helvetica, Arial, sans-serif; font-style: normal; font-size: 17px; font-weight: 700; text-align: left; } +.S10 { border-left: 1px solid rgb(233, 233, 233); border-right: 1px solid rgb(233, 233, 233); border-top: 0px none rgb(0, 0, 0); border-bottom: 1px solid rgb(233, 233, 233); border-radius: 0px 0px 4px 4px; padding: 0px 45px 4px 13px; line-height: 17.234px; min-height: 18px; white-space: nowrap; color: rgb(0, 0, 0); font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-size: 14px; } +.S11 { border-left: 1px solid rgb(233, 233, 233); border-right: 1px solid rgb(233, 233, 233); border-top: 1px solid rgb(233, 233, 233); border-bottom: 1px solid rgb(233, 233, 233); border-radius: 4px 4px 0px 0px; padding: 6px 45px 4px 13px; line-height: 17.234px; min-height: 18px; white-space: nowrap; color: rgb(0, 0, 0); font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-size: 14px; } +.S12 { margin: 3px 10px 5px 4px; padding: 0px; line-height: 18px; min-height: 0px; white-space: pre-wrap; color: rgb(60, 60, 60); font-family: Helvetica, Arial, sans-serif; font-style: normal; font-size: 17px; font-weight: 700; text-align: left; }

Tutorial 1: Statistics Module

In this tutorial you will set up your first linear model with BrainStat. To this end, we will load some sample data from the MICS dataset. But before we get started, please note that this tutorial requires that you have BrainStat and all its dependencies installed. For more details see our installation guide.
Lets have a look at the cortical thickness data and the demographics data. Note that we've defined a function, pretty_plot, at the end of this file to make the plots a little prettier when they're embedded inside live scripts. If you run this tutorial outside of a live script you'll want to comment out the pretty_plot lines.
[cortical_thickness, demographics] = fetch_mics_data();
[surf_left, surf_right] = fetch_template_surface('fsaverage5');
surface = fetch_template_surface('fsaverage5', 'join', true);
obj = plot_hemispheres(...
mean(cortical_thickness)', ...
{surf_left, surf_right}, ...
'labeltext', {{'Cortical', 'Thickness'}} ...
);
pretty_plot(obj);
disp(demographics(1:10,:))
SUB_ID VISIT AGE_AT_SCAN SEX + __________ _____ ___________ _____ + + {'031404'} 1 -0.57968 {'F'} + {'04a144'} 1 -0.81212 {'M'} + {'0b78f1'} 1 0.11764 {'M'} + {'0d26b9'} 1 0.46629 {'F'} + {'1988b8'} 1 -0.1148 {'M'} + {'219baf'} 1 -1.0446 {'M'} + {'242357'} 1 -0.6959 {'F'} + {'26899f'} 1 -0.92833 {'M'} + {'26899f'} 2 -0.6959 {'M'} + {'29b77c'} 1 -1.0446 {'F'}
for ii = 1:2
fprintf(['Visit %d: N=%d, %d females, mean subject age: %2.2f, ' ...
'standard deviation of age: %2.2f.\n'], ...
ii, ...
sum(demographics.VISIT==ii), ...
sum(demographics.SEX(demographics.VISIT==ii) == "F"), ...
mean(demographics.AGE_AT_SCAN(demographics.VISIT==ii)), ...
std(demographics.AGE_AT_SCAN(demographics.VISIT==ii)));
end
Visit 1: N=70, 30 females, mean subject age: -0.02, standard deviation of age: 1.03. +Visit 2: N=12, 5 females, mean subject age: 0.09, standard deviation of age: 0.87.
Demographics contains four variables: a subject ID, a visit number (some subjects visited multiple times), their age at the time of scanning and their sex. We've also printed the means and standard deviations for visits 1, 2, and both. Next, we will assess whether a subject's age is related to their cortical thickness. To this end we can create a linear model with BrainStat. For our first model, we will only consider the effect of age, i.e. we will disregard the effect of sex and that some subjects visit twice.

A basic model

term_age = FixedEffect(demographics.AGE_AT_SCAN, 'Age');
model = term_age;
disp(model)
82×2 FixedEffect array with properties: + + names: ["intercept" "Age"] + matrix: [82×2 double]
As you can see, model contains two properties: the names of each variable and a matrix containing the effects in a observation-by-variable matrix. As you can see, an intercept is included in the model by default. We can also access the vectors related to each effect by their name i.e. model.intercept and model.Age will return the vectors of the intercept and age, respectively.
Next, we must define a contrast in observations (here: age). With the model, contrast, and cortical thickness data, we can then assess whether there is an effect of age on cortical thickness.
contrast_age = demographics.AGE_AT_SCAN;
mask = fetch_mask('fsaverage5');
slm = SLM( ...
model, ...
contrast_age, ...
'surf', 'fsaverage5', ...
'correction', {'rft', 'fdr'}, ...
'cluster_threshold', 0.01, ...
'mask', mask);
slm.fit(cortical_thickness);
We can access the outcome of this model through its properties. Lets plot the t-values, as well as the peak and cluster p-values derived from a random field theory correction. We've defined a function for slm results at the end of this file as we'll be doing this a lot.
plot_slm(slm, {surf_left, surf_right}, mask, true, true);
to_plot = [slm.t', slm.P.pval.C', slm.P.pval.P', slm.Q'];
to_plot(~mask, :) = inf;
obj = plot_hemispheres(to_plot, {surf_left, surf_right}, ...
'LabelText', {'t-values', 'Cluster p-values', ...
'Peak p-values', 'Vertex p-values'});
obj.colorlimits([-6, 4; 0, 0.05; 0, 0.05; 0, 0.05])
obj.colormaps({[parula; 0.7, 0.7, 0.7], ...
[flipud(autumn); 0.7, 0.7, 0.7], ...
[flipud(autumn); 0.7, 0.7, 0.7], ...
[flipud(autumn); 0.7, 0.7, 0.7]});
Only clusters are significant, and not peaks. This suggests that the age effect covers large regions, rather than local foci. Furthermore, at the vertex level we only find few small groups of significant vertices. Lets have a closer look at the clusters and their peaks. The data on clusters are stored in tables inside slm.P.clus and information on the peaks is stored in slm.P.peak. If a two-tailed test is run (BrainStat defaults to two-tailed), a table is returned for each tail. The first table uses the contrast as provided, the second table uses the inverse contrast. If a one-tailed test is performed, then only a single table is returned. Lets print the first 15 rows of the inverted contrast cluster table.
disp(slm.P.clus{2}(1:15, :))
clusid nverts resels P + ______ ______ ______ __________ + + 1 141 6.2833 3.3424e-05 + 2 82 3.9945 0.001858 + 3 69 3.8717 0.0023617 + 4 61 3.6705 0.0035174 + 5 82 3.6523 0.0036477 + 6 70 3.1277 0.010638 + 7 49 2.7576 0.023227 + 8 62 2.4526 0.044851 + 9 42 2.4264 0.047482 + 10 47 2.3414 0.057146 + 11 41 2.2456 0.070458 + 12 43 1.9359 0.1386 + 13 34 1.7405 0.21092 + 14 28 1.6818 0.23867 + 15 33 1.6255 0.26823
Here, we see that cluster 1 contains 141 vertices. Furthermore, we see that clusters 1-9 are significant P<0.05. Lets now have a look at the peaks within these clusters.
disp(slm.P.peak{2}(1:15, :))
t vertid clusid P yeo7 + ______ ______ ______ _________ ___________________ + + 5.6954 18720 11 0.0012475 "Ventral Attention" + 5.1648 5430 12 0.009035 "Default" + 4.8555 16911 6 0.027242 "Ventral Attention" + 4.834 19629 2 0.029335 "Frontoparietal" + 4.6283 12603 14 0.059519 "Default" + 4.4789 8454 10 0.098333 "Ventral Attention" + 4.4751 2169 3 0.099588 "Default" + 4.431 1286 16 0.1152 "Ventral Attention" + 4.3839 9060 3 0.13441 "Default" + 4.2564 16489 2 0.20282 "Ventral Attention" + 4.1685 15263 22 0.26787 "Default" + 4.1456 4866 5 0.28799 "Limbic" + 3.984 19000 21 0.47405 "Somatomotor" + 3.9577 4332 8 0.51269 "Ventral Attention" + 3.9243 1811 15 0.56692 "Frontoparietal"
We are able to detect several peaks. The peak with the highest t-statistic (t=5.6954) occurs at vertex 18720, which is inside the ventral attention network as defined by the Yeo-7 networks. However, this peak occurs inside a non-significant cluster (11). Note that the Yeo network membership is only provided if the surface is specified as a template name as we did here. For custom surfaces, or pre-loaded surfaces (as we will use below) this column is omitted.

Interaction models

Similarly to age, we can also test for the effect of sex on cortical thickness.
term_sex = FixedEffect(demographics.SEX);
model_sex = term_sex;
contrast_sex = (demographics.SEX == "M") - (demographics.SEX == "F");
slm_sex = SLM( ...
model_sex, ...
contrast_sex, ...
'surf', surface, ...
'correction', {'rft', 'fdr'}, ...
'cluster_threshold', 0.01, ...
'mask', mask);
slm_sex.fit(cortical_thickness);
plot_slm(slm_sex, {surf_left, surf_right}, mask);
Here, we find no significant effects of sex on cortical thickness. However, as we've already established, age has an effect on cortical thickness. So we may want to correct for this effect before evaluating whether sex has an effect on cortical thickenss. Lets make a new model that includes the effect of age.
model_sexage = term_age + term_sex;
slm_sexage = SLM( ...
model_sexage, ...
contrast_sex, ...
'surf', surface, ...
'correction', {'rft', 'fdr'}, ...
'cluster_threshold', 0.01, ...
'mask', mask);
slm_sexage.fit(cortical_thickness);
plot_slm(slm_sexage, {surf_left, surf_right}, mask);
After accounting for the effect of age, we still don't find significant clusters of effect of sex on cortical thickness. However, it could be that age affects men and women differently. To account for this, we could include an interaction effect into the model. Lets run the model again with an interaction effect.
model_sexage_int = term_age + term_sex + term_age * term_sex;
slm_sexage = SLM( ...
model_sexage_int, ...
contrast_sex, ...
'surf', surface, ...
'correction', {'rft', 'fdr'}, ...
'cluster_threshold', 0.01, ...
'mask', mask);
slm_sexage.fit(cortical_thickness);
plot_slm(slm_sexage, {surf_left, surf_right}, mask);
After including the interaction effect, we now do find significant effects of sex on cortical thickness in several clusters.
Instead of looking at the effects of age or sex, we could also look at whether the cortex of men and women changes differently with age by comparing their interaction effects. To do this, we have to modify the contrast.
contrast_sex_int = (demographics.AGE_AT_SCAN .* ...
(demographics.SEX == "M")) - ...
(demographics.AGE_AT_SCAN .* ...
(demographics.SEX == "F"));
slm_sexage_int = SLM( ...
model_sexage_int, ...
contrast_sex_int, ...
'surf', surface, ...
'correction', 'rft', ...
'cluster_threshold', 0.01, ...
'mask', mask);
slm_sexage_int.fit(cortical_thickness);
obj = plot_slm(slm_sexage_int, {surf_left, surf_right}, mask);
obj.colorlimits([-3, 3; 0, 0.05]);
Indeed, it appears that the interaction effect between sex and age is quite different across men and women, with stronger effects occuring in women.

One-tailed models

Imagine that, based on prior research, we hypothesize that men have higher cortical thickness than women. In that case, we could run this same model with a one-tailed test, rather than a two-tailed test. By default BrainStat uses a two-tailed test. If you want to get a one-tailed test, simply specify it in the SLM model initialization with 'two_tailed', false. Note that the one-tailed test will test for the significance of positive t-values. If you want to test for the significance of negative t-values, simply change the sign of the contrast. We may hypothesize based on prior research that cortical thickness decreases with age, so we could specify this as follows. Note the minus in front of contrast_age to test for decreasing thickness with age.
slm_onetail = SLM( ...
model_sexage_int, ...
contrast_sex, ...
'surf', surface, ...
'correction', {'rft', 'fdr'}, ...
'cluster_threshold', 0.01, ...
'two_tailed', false, ...
'mask', mask);
slm_onetail.fit(cortical_thickness);
plot_slm(slm_onetail, {surf_left, surf_right}, mask);
Notice the additional clusters that we find when using a one-tailed test.

Mixed Effects Models

So far, we've considered multiple visits of the same subject as two separate, independent measurements. Clearly, however, such measurements are not independent of each other. To account for this, we could add subject ID as a random effect. Lets do this and test the effect of age on cortical thickness again.
term_subject = MixedEffect(demographics.SUB_ID);
model_mixed = term_age + term_sex + term_age * term_sex + term_subject;
slm_mixed = SLM( ...
model_mixed, ...
-contrast_age, ...
'surf', surface, ...
'correction', 'rft', ...
'cluster_threshold', 0.01, ...
'two_tailed', false, ...
'mask', mask);
slm_mixed.fit(cortical_thickness);
obj = plot_slm(slm_mixed, {surf_left, surf_right}, mask);
obj.colorlimits([-6, 4; 0, 0.05]);
After inclusion of subject as a random variable into the one-tailed model shown earlier, we find fewer and smaller clusters, indicating that by not accounting for the repeated measures structure of the data we were overestimating the significance of effects.
That concludes the basic usage of the BrainStat for statistical models. In the next tutorial we'll show you how to use the context decoding module.
Below are the support functions used in this live script.
function pretty_plot(obj)
% Makes the BrainSpace plots prettier inside live scripts.
obj.handles.figure.Units = 'pixels';
set(obj.handles.axes, 'Units', 'pixels')
set(obj.handles.colorbar, 'Units', 'pixels');
obj.handles.figure.Position = [1, 1, 700, size(obj.data, 2) * 95];
set(obj.handles.text, 'FontSize', 12);
for jj = 1:size(obj.data, 2) % variates
obj.handles.colorbar(jj).Position = [...
500, ...
30 + (size(obj.data, 2) * 95) - 10 - jj*90, ...
5, ...
50];
for ii = 1:4 % views
obj.handles.axes(jj, ii).Position = [...
75 + (ii-1)*105, ...
(size(obj.data, 2) * 95) - jj*95, ...
100, ...
100];
end
end
end
function obj = plot_slm(slm, surfaces, mask, plot_peak, plot_fdr)
% Plot the t-values and p-values of an SLM.
arguments
slm
surfaces
mask
plot_peak (1,1) logical = false
plot_fdr (1,1) logical = false
end
to_plot = [slm.t(:), slm.P.pval.C(:)];
labels = {'t-values', {'Cluster', 'p-values (RFT)'}};
colormaps = {[parula; .7 .7 .7];
[flipud(autumn); .7 .7 .7]};
colorlimits = [-6, 4; 0, 0.05];
if plot_peak
to_plot = [to_plot, slm.P.pval.P(:)];
labels = [labels, {{'Peak', 'p-values (RFT)'}}];
colormaps = [colormaps; {[flipud(autumn); .7 .7 .7]}];
colorlimits = [colorlimits; 0, 0.05];
end
if plot_fdr
to_plot = [to_plot, slm.Q(:)];
labels = [labels, {{'Vertexwise', 'p-values (FDR)'}}];
colormaps = [colormaps; {[flipud(autumn); .7 .7 .7]}];
colorlimits = [colorlimits; 0, 0.05];
end
to_plot(~mask, :) = inf;
obj = plot_hemispheres(...
to_plot, ...
{surfaces{1}, surfaces{2}}, ...
'labeltext', labels ...
);
obj.colormaps(colormaps);
obj.colorlimits(colorlimits);
pretty_plot(obj);
end

\ No newline at end of file diff --git a/docs/matlab/tutorials/tutorial_2_context.html b/docs/matlab/tutorials/tutorial_2_context.html index 1a15eaa0..7005bc17 100644 --- a/docs/matlab/tutorials/tutorial_2_context.html +++ b/docs/matlab/tutorials/tutorial_2_context.html @@ -1,10 +1,13 @@ Untitled

Tutorial 2: Context Module

In this tutorial you will learn about the context decoding tools included with BrainStat. The context decoding module consists of three parts: genetic decoding, meta-analytic decoding and histological comparisons. First, we'll consider how to run the genetic decoding analysis. But before we get started, please note that this tutorial, alike the previous one, requires that you have BrainStat and all its dependencies installed. For more details see our installation guide.

Genetics

For genetic decoding we use the Allen Human Brain Atlas through the abagen toolbox. Note that, as abagen is only available in Python, we simply supply the genetic expression of pre-computed regions of interest with default parameters in the MATLAB toolbox. This should suffice for most use-cases, but for more specific use-cases we advise you to use the Python implementation of BrainStat. For a full list of supported parcellations see help fetch_genetic_expression.
[expression, gene_names] = fetch_genetic_expression('schaefer', 100);
Reading atlas from file. This may take a few minutes.
img = imagesc(expression);
xlabel('Genes');
ylabel('Regions');
colorbar;
The variable expression is a matrix containing the genetic expression of genes within each region of the atlas. These values will fall in the range [0, 1] where higher values represent higher expression. Some regions may return NaN values for all genes (e.g. see region 60 in the plot). This occurs when there are no samples within this region across all donors. The variable gene_names contains the names of each gene in the same order as the columns of expression.

Meta-Analytic Decoding

To perform meta-analytic decoding, BrainStat uses precomputed Neurosynth maps. Here we test which terms are most associated with a map of cortical thickness. A simple example analysis can be run as follows. First, we will load some cortical thickness data and two cortical surfaces. The ABIDE dataset provides this data on the CIVET surface, so we will also load those surfaces. The surface decoder interpolates the data from the surface to the voxels in the volume that are in between the two input surfaces. Please be aware that this analysis may take several minutes to download and run.
mask = fetch_mask('civet41k');
thickness = fetch_abide_data('sites', 'PITT');
[correlation, feature] = surface_decoder(mean(thickness), ...
'template', 'civet41k');
disp(correlation(1:3));
0.3895 - 0.3808 - 0.3630
disp(feature(1:3));
{'temporal'} {'frontotemporal'} {'pole'}
The variable correlation now contains the correlation values between the input map mean(thickness) and each of the neurosynth features. The corresponding feature names are stored in the variable feature.

Histology

For histological decoding we use microstructural profile covariance gradients, as first shown by (Paquola et al, 2019, Plos Biology), computed from the BigBrain dataset. Firstly, lets download the MPC data and compute its gradients.
schaefer_400 = fetch_parcellation('fsaverage5' ,'schaefer', 400);
Reading from version 2 +.S8 { border-left: 1px solid rgb(233, 233, 233); border-right: 1px solid rgb(233, 233, 233); border-top: 1px solid rgb(233, 233, 233); border-bottom: 0px none rgb(0, 0, 0); border-radius: 0px; padding: 6px 45px 0px 13px; line-height: 17.234px; min-height: 18px; white-space: nowrap; color: rgb(0, 0, 0); font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-size: 14px; } +.S9 { border-left: 1px solid rgb(233, 233, 233); border-right: 1px solid rgb(233, 233, 233); border-top: 0px none rgb(0, 0, 0); border-bottom: 1px solid rgb(233, 233, 233); border-radius: 0px; padding: 0px 45px 4px 13px; line-height: 17.234px; min-height: 18px; white-space: nowrap; color: rgb(0, 0, 0); font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-size: 14px; } +.S10 { margin: 10px 10px 9px 4px; padding: 0px; line-height: 21px; min-height: 0px; white-space: pre-wrap; color: rgb(0, 0, 0); font-family: Helvetica, Arial, sans-serif; font-style: normal; font-size: 14px; font-weight: 400; text-align: left; } +.S11 { border-left: 1px solid rgb(233, 233, 233); border-right: 1px solid rgb(233, 233, 233); border-top: 1px solid rgb(233, 233, 233); border-bottom: 1px solid rgb(233, 233, 233); border-radius: 0px 0px 4px 4px; padding: 6px 45px 4px 13px; line-height: 17.234px; min-height: 18px; white-space: nowrap; color: rgb(0, 0, 0); font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-size: 14px; } +.S12 { border-left: 1px solid rgb(233, 233, 233); border-right: 1px solid rgb(233, 233, 233); border-top: 1px solid rgb(233, 233, 233); border-bottom: 1px solid rgb(233, 233, 233); border-radius: 0px; padding: 6px 45px 4px 13px; line-height: 17.234px; min-height: 18px; white-space: nowrap; color: rgb(0, 0, 0); font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-size: 14px; }

Tutorial 2: Context Module

In this tutorial you will learn about the context decoding tools included with BrainStat. The context decoding module consists of three parts: genetic decoding, meta-analytic decoding and histological comparisons. First, we'll consider how to run the genetic decoding analysis. But before we get started, please note that this tutorial, alike the previous one, requires that you have BrainStat and all its dependencies installed. For more details see our installation guide. Also, we'll run a simple linear model again like we did in the previous tutorial, as we'll be using the results of this model.
[cortical_thickness, demographics] = fetch_mics_data();
[surf_left, surf_right] = fetch_template_surface('fsaverage5');
mask = fetch_mask('fsaverage5');
term_age = FixedEffect(demographics.AGE_AT_SCAN, 'Age');
term_sex = FixedEffect(demographics.SEX);
term_subject = MixedEffect(demographics.SUB_ID);
model = term_age + term_sex + term_age * term_sex + term_subject;
contrast_age = -demographics.AGE_AT_SCAN;
slm = SLM( ...
model, ...
contrast_age, ...
'surf', 'fsaverage5', ...
'correction', {'rft', 'fdr'}, ...
'mask', mask, ...
'two_tailed', false, ...
'cluster_threshold', 0.01);
slm.fit(cortical_thickness);

Genetics

For genetic decoding we use the Allen Human Brain Atlas through the abagen toolbox. Note that, as abagen is only available in Python, we simply supply the genetic expression of pre-computed regions of interest with default parameters in the MATLAB toolbox. This should suffice for most use-cases, but for more specific use-cases we advise you to use the Python implementation of BrainStat. For a full list of supported parcellations see help fetch_genetic_expression.
[expression, gene_names] = fetch_genetic_expression('schaefer', 100);
Reading atlas from file. This may take a few minutes.
img = imagesc(expression);
xlabel('Genes');
ylabel('Regions');
colorbar;
The variable expression is a matrix containing the genetic expression of genes within each region of the atlas. These values will fall in the range [0, 1] where higher values represent higher expression. Some regions may return NaN values for all genes (e.g. see region 60 in the plot). This occurs when there are no samples within this region across all donors. The variable gene_names contains the names of each gene in the same order as the columns of expression. We can also correlate these genes to the t-statistic map derived earlier. Lets do this for the highest correlating gene, WFDC1, and create a scatter plot as well as a surface map.
schaefer_100 = fetch_parcellation('fsaverage5', 'schaefer', 100);
Reading from version 2 +colortable with 51 entries read (originally Schaefer2018_100Parcels_7Networks) +Reading from version 2 +colortable with 51 entries read (originally Schaefer2018_100Parcels_7Networks)
t_schaefer_100 = full2parcel(slm.t, schaefer_100);
WFDC1_expression = expression(:, gene_names == "WFDC1");
no_nan = ~isnan(WFDC1_expression);
t_WFDC1_corr = corr(t_schaefer_100(no_nan), WFDC1_expression(no_nan));
bestfit_coefficients = polyfit(t_schaefer_100(no_nan), WFDC1_expression(no_nan), 1);
figure();
scatter(t_schaefer_100, WFDC1_expression, 200, 'k', '.');
hold on
plot(t_schaefer_100, polyval(bestfit_coefficients, t_schaefer_100), ...
'k', 'LineWidth', 3);
xlabel('t-statistic')
ylabel('WFDC1 Expression')
set(gca, 'FontSize', 16, 'YLim', [0, 1], 'YTick', [0, 1], 'Xtick', [-1.5, 2])
text(-1.4, 0.9, sprintf('r = %0.2f', t_WFDC1_corr), 'FontSize', 16);
WFDC1_expression(isnan(WFDC1_expression)) = -inf;
obj = plot_hemispheres(WFDC1_expression, {surf_left, surf_right}, ...
'parcellation', schaefer_100);
obj.colormaps([.7 .7 .7; parula])
obj.colorlimits([0, 0.85])
pretty_plot(obj)

Meta-Analytic Decoding

To perform meta-analytic decoding, BrainStat uses precomputed Neurosynth maps. Here we test which terms are most associated with our t-statistic map. The surface decoder interpolates the data from the surface to the voxels in the volume that are in between pial and white matter surface. Next, using all voxels that are non-zero both in this volume and in the Neurosynth database, a Pearson correlation is computed for every feature. Please be aware that this analysis may take several minutes to download and run.
[correlation, feature] = meta_analytic_decoder(slm.t, ...
'template', 'fsaverage5');
disp(correlation(1:3));
0.2072 + 0.2070 + 0.2003
disp(feature(1:3));
{'nucleus accumbens'} {'accumbens'} {'dorsal anterior'}
The variable correlation now contains the correlation values between the t-statistic map and each of the neurosynth features. The corresponding feature names are stored in the variable feature. An easy way of visualizing this data is by using a word cloud. These are simple to build with MATLAB:
figure
wc = wordcloud(feature(correlation>0), correlation(correlation>0));
Here, larger words represent higher correlations. It's common to see the largest words be related to anatomy. Looking past anatomical terms, we see several words related to motivation and risk taking behavior e.g. reward, motivation, additction, gambling, punishment.

Histology

For histological decoding we use microstructural profile covariance gradients, as first shown by (Paquola et al, 2019, Plos Biology), computed from the BigBrain dataset. Firstly, lets download the MPC data and compute its gradients.
schaefer_400 = fetch_parcellation('fsaverage5' ,'schaefer', 400);
Reading from version 2 colortable with 201 entries read (originally Schaefer2018_400Parcels_7Networks) Reading from version 2 -colortable with 201 entries read (originally Schaefer2018_400Parcels_7Networks)
profiles = read_histology_profile('template', 'fsaverage5');
mpc = compute_mpc(profiles, schaefer_400);
Sorting data by smallest to largest number in the label! (labelmean.m)
Warning: Found 0's in the label vector. Replacing these with NaN.
gm = compute_histology_gradients(mpc);
The variable profiles now contains histological profiles sampled at 50 different depths across the cortex, mpc contains the covariance of these profiles, and gm contains their gradients. Depending on your use-case, each of these variables could be of interest, but for purposes of this tutorial we'll plot the gradients to the surface with BrainSpace. For details on what the GradientMaps class, gm, contains please consult the BrainSpace documentation.
[surface_left, surface_right] = fetch_template_surface('fsaverage5');
vertexwise_gradients = parcel2full(gm.gradients{1}(:,1:2), schaefer_400);
vertexwise_gradients(isnan(vertexwise_gradients)) = inf;
obj = plot_hemispheres(vertexwise_gradients, ...
{surface_left, surface_right}, ...
'labeltext', {'MPC Gradient 1', 'MPC Gradient 2'});
obj.colormaps([parula; .7 .7 .7])
pretty_plot(obj);
Note that we no longer use the y-axis regression used in (Paquola et al., 2019, Plos Biology), as such the first gradient becomes an anterior-posterior- gradient.

Resting-State Contextualization

Lastly, BrainStat provides contextualization using resting-state fMRI markers: specifically, with the Yeo functional networks (Yeo et al., 2011, Journal of Neurophysiology), a clustering of resting-state connectivity, and the functional gradients (Margulies et al., 2016, PNAS), a lower dimensional manifold of resting-state connectivity.
Lets first have a look at contextualization of cortical thickness using the Yeo networks. We'll use some of the sample cortical thickness data included with BrainSpace, and see what its mean is within each Yeo network.
% Load cortical thickness
[thickness_left, thickness_right] = load_marker('thickness');
thickness = [thickness_left; thickness_right];
% Load Yeo networks
yeo_7_networks = fetch_parcellation('fslr32k', 'yeo', 7);
[network_names, colormap] = fetch_yeo_networks_metadata(7);
% Get mean intensity within yeo networks
mean_thickness = full2parcel(thickness', yeo_7_networks)';
% Plot
figure;
s = spider_plot_class(mean_thickness, ...
'AxesLabels', cellstr(network_names)', ...;
'AxesLimits', [2; 3.2] .* ones(2,7), ...
'AxesFontSize', 14, ...
'LabelFontSize', 14, ...
'LegendLabels', {'Thickness'});
Here we can see that, on average, the somatomotor/visual cortices have low cortical thickness whereas the default/limbic cortices have high thickness.
Next, lets have a look at how cortical thickness relates to the first functional gradient which describes a sensory-transmodal axis in the brain. First lets plot the first few gradients.
functional_gradients = fetch_gradients('fslr32k', 'margulies2016');
gradients_plot = functional_gradients;
gradients_plot(isnan(gradients_plot)) = -inf;
[surface_left, surface_right] = fetch_template_surface('fslr32k');
obj = plot_hemispheres(...
gradients_plot(:,1:3), ...
{surface_left, surface_right}, ...
'LabelText', "Gradient " + (1:3));
obj.colormaps([.7 .7 .7; parula])
obj.colorlimits([-10, 11; -8, 5; -5, 3])
pretty_plot(obj)
There are many ways to compare these gradients to cortical markers such as cortical thickness. In general, we recommend using corrections for spatial autocorrelation which are implemented in BrainSpace. We'll show a correction with spin test in this tutorial; for other methods and further details please consult the BrainSpace tutorials.
In a spin test we compare the empirical correlation between the gradient and the cortical marker to a distribution of correlations derived from data rotated across the cortical surface. The p-value then depends on the percentile of the empirical correlation within the permuted distribution.
[sphere_left, sphere_right] = load_conte69('spheres');
% Note: we generally recommend running >=1000 permutations. For purposes
% of this tutorial we will only run 100.
thickness_permuted = spin_permutations( ...
{thickness_left, thickness_right}, ...
{sphere_left, sphere_right}, 100, ...
'random_state', 2021);
Running Spin Permutation
thickness_permuted_full = squeeze( ...
[thickness_permuted{1}; thickness_permuted{2}]);
r_empirical = corr(thickness, functional_gradients(:, 1), ...
'rows', 'pairwise');
r_permuted = corr(thickness_permuted_full, functional_gradients(:, 1), ...
'rows', 'pairwise');
% Significance depends on whether we do a one-tailed or two-tailed test.
% If one-tailed it depends on in which direction the test is.
p_value_right_tailed = mean(r_empirical > r_permuted);
p_value_left_tailed = mean(r_empirical < r_permuted);
p_value_two_tailed = min(p_value_right_tailed, p_value_left_tailed) * 2;
fprintf(['The two-tailed p-value for the correlation between ' ...
'cortical thickness and \n gradient 1 is %0.2g.\n'], ...
p_value_two_tailed);
The two-tailed p-value for the correlation between cortical thickness and - gradient 1 is 0.04.
That concludes the tutorials of BrainStat. If anything is unclear, or if you think you've found a bug, please post it to the Issues page of our Github.
Happy BrainStating!
function pretty_plot(obj)
% Makes the BrainSpace plots prettier inside live scripts.
obj.handles.figure.Units = 'pixels';
set(obj.handles.axes, 'Units', 'pixels')
set(obj.handles.colorbar, 'Units', 'pixels');
obj.handles.figure.Position = [1, 1, 700, size(obj.data, 2) * 95];
set(obj.handles.text, 'FontSize', 12);
for jj = 1:size(obj.data, 2) % variates
obj.handles.colorbar(jj).Position = [...
500, ...
30 + (size(obj.data, 2) * 95) - 10 - jj*90, ...
5, ...
50];
for ii = 1:4 % views
obj.handles.axes(jj, ii).Position = [...
75 + (ii-1)*105, ...
(size(obj.data, 2) * 95) - jj*95, ...
100, ...
100];
end
end
end
+colortable with 201 entries read (originally Schaefer2018_400Parcels_7Networks)
profiles = read_histology_profile('template', 'fsaverage5');
mpc = compute_mpc(profiles, schaefer_400);
Sorting data by smallest to largest number in the label! (labelmean.m)
Warning: Found 0's in the label vector. Replacing these with NaN.
gm = compute_histology_gradients(mpc);
The variable profiles now contains histological profiles sampled at 50 different depths across the cortex, mpc contains the covariance of these profiles, and gm contains their gradients. Depending on your use-case, each of these variables could be of interest, but for purposes of this tutorial we'll plot the gradients to the surface with BrainSpace. For details on what the GradientMaps class, gm, contains please consult the BrainSpace documentation.
vertexwise_gradients = parcel2full(gm.gradients{1}(:,1:2), schaefer_400);
vertexwise_gradients(isnan(vertexwise_gradients)) = inf;
obj = plot_hemispheres(vertexwise_gradients, ...
{surf_left, surf_right}, ...
'labeltext', {'MPC Gradient 1', 'MPC Gradient 2'});
obj.colormaps([parula; .7 .7 .7])
pretty_plot(obj);
Note that we no longer use the y-axis regression used in (Paquola et al., 2019, Plos Biology), as such the first gradient becomes an anterior-posterior- gradient. Next, lets plot a scatter plot between our t-statistic map and the first gradient of MPC.
t_schaefer_400 = full2parcel(slm.t, schaefer_400);
t_mpc_corr = corr(t_schaefer_400, gm.gradients{1}(:,1));
bestfit_coefficients = polyfit(t_schaefer_400, gm.gradients{1}(:,1), 1);
figure();
scatter(t_schaefer_400, gm.gradients{1}(:,1), 200, 'k', '.');
hold on
plot(t_schaefer_400, polyval(bestfit_coefficients, t_schaefer_400), ...
'k', 'LineWidth', 3);
xlabel('t-statistic')
ylabel('MPC Gradient 1')
set(gca, 'FontSize', 16, 'YLim', [-1, 1], 'YTick', [-1, 1], ...
'Xtick', [-2, 3], 'XLim', [-2, 3])
text(-1.9, 0.8, sprintf('r = %0.2f', t_mpc_corr), 'FontSize', 16)

Resting-State Contextualization

Lastly, BrainStat provides contextualization using resting-state fMRI markers: specifically, with the Yeo functional networks (Yeo et al., 2011, Journal of Neurophysiology), a clustering of resting-state connectivity, and the functional gradients (Margulies et al., 2016, PNAS), a lower dimensional manifold of resting-state connectivity.
Lets first have a look at contextualization of our t-statistic map using their mean within the Yeo networks.
% Get mean/sem t-stat within yeo networks
compute_sem = @(x) std(x, 'omitnan') / sqrt(sum(~isnan(x)));
mean_t_stat = yeo_networks_association(slm.t, ...
'template', 'fsaverage5');
sem_t_stat = yeo_networks_association(slm.t, ...
'template', 'fsaverage5', 'reduction_operation', compute_sem);
[network_names, colormap] = fetch_yeo_networks_metadata(7);
% Create a t-statistic bar plot for the Yeo networks.
figure;
axes('XTick', 1:7, 'XTickLabel', network_names, 'FontSize', 16);
hold on
bar(mean_t_stat, 'FaceColor', 'flat', 'CData', colormap);
errorbar(1:numel(mean_t_stat), mean_t_stat, sem_t_stat, sem_t_stat, ...
'Color', 'k', 'LineStyle', 'None');
Here we can see that the effects are largest in the ventral attention and limbic networks, and that the visual network is the only region with a negative t-statistic.
Next, lets have a look at how cortical thickness relates to the first functional gradient which describes a sensory-transmodal axis in the brain. First lets plot the first few gradients and a scatter plot of the t-statistic map versus the first gradient.
functional_gradients = fetch_gradients('fsaverage5', 'margulies2016');
gradients_plot = functional_gradients;
gradients_plot(isnan(gradients_plot)) = -inf;
% Plot the gradients on the surface.
obj = plot_hemispheres(...
gradients_plot(:,1:3), ...
{surf_left, surf_right}, ...
'LabelText', "Gradient " + (1:3));
obj.colormaps([.7 .7 .7; parula])
obj.colorlimits([-10, 11; -8, 5; -5, 3])
pretty_plot(obj)
% Plot a scatter plot
figure();
coefficients = polyfit(slm.t(mask), functional_gradients(mask, 1), 1);
no_nan = ~isnan(slm.t)' & ~isnan(functional_gradients(:,1));
r = corr(slm.t(no_nan)', functional_gradients(no_nan, 1));
scatter(slm.t(no_nan), functional_gradients(no_nan, 1), 10, 'k', '.');
hold on
plot(slm.t(no_nan), polyval(coefficients, slm.t(no_nan)), ...
'k', 'LineWidth', 3);
xlabel('t-statistic')
ylabel('Functional Gradient 1')
set(gca, 'FontSize', 16, 'YLim', [-10, 15], 'YTick', [-10, 15], 'Xtick', [-6, 4])
text(-5.5, 13, sprintf('r = %0.2f', r), 'FontSize', 16)
There are many ways to compare these gradients to cortical markers such as cortical thickness. In general, we recommend using corrections for spatial autocorrelation which are implemented in BrainSpace. We'll show a correction with spin test in this tutorial; for other methods and further details please consult the BrainSpace tutorials.
In a spin test we compare the empirical correlation between the gradient and the cortical marker to a distribution of correlations derived from data rotated across the cortical surface. The p-value then depends on the percentile of the empirical correlation within the permuted distribution.
[sphere_left, sphere_right] = fetch_template_surface('fsaverage5','layer','sphere');
t_left = slm.t(1:end/2)';
t_right = slm.t(end/2+1:end)';
% Note: for the tutorial we run only 100 permutations.
% We generally recommend at least 1000.
t_permuted = spin_permutations( ...
{t_left, t_right}, ...
{sphere_left, sphere_right}, 100, ...
'random_state', 2021);
Running Spin Permutation
t_permuted_full = squeeze([t_permuted{1}; t_permuted{2}]);
r_empirical = corr(slm.t', functional_gradients(:, 1), ...
'rows', 'pairwise');
r_permuted = corr(t_permuted_full, functional_gradients(:, 1), ...
'rows', 'pairwise');
% Significance depends on whether we do a one-tailed or two-tailed test.
% If one-tailed it depends on in which direction the test is.
p_value_right_tailed = mean(r_empirical > r_permuted);
p_value_left_tailed = mean(r_empirical < r_permuted);
p_value_two_tailed = min(p_value_right_tailed, p_value_left_tailed) * 2;
fprintf(['The two-tailed p-value between our t-statistic map ' ...
'and \ngradient 1 is %0.2g.\n'], ...
p_value_two_tailed);
The two-tailed p-value between our t-statistic map and +gradient 1 is 0.1.
That concludes the tutorials of BrainStat. If anything is unclear, or if you think you've found a bug, please post it to the Issues page of our Github.
Happy BrainStating!
function pretty_plot(obj)
% Makes the BrainSpace plots prettier inside live scripts.
obj.handles.figure.Units = 'pixels';
set(obj.handles.axes, 'Units', 'pixels')
set(obj.handles.colorbar, 'Units', 'pixels');
obj.handles.figure.Position = [1, 1, 700, size(obj.data, 2) * 95];
set(obj.handles.text, 'FontSize', 12);
for jj = 1:size(obj.data, 2) % variates
obj.handles.colorbar(jj).Position = [...
500, ...
30 + (size(obj.data, 2) * 95) - 10 - jj*90, ...
5, ...
50];
for ii = 1:4 % views
obj.handles.axes(jj, ii).Position = [...
75 + (ii-1)*105, ...
(size(obj.data, 2) * 95) - jj*95, ...
100, ...
100];
end
end
end