diff --git a/.vscode/settings.json b/.vscode/settings.json
index 5657220a..c209ce1c 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,7 @@
 {
-    "editor.formatOnSave": true,
+    "[python]": {
+        "editor.formatOnSave": true
+    },
     "python.testing.pytestEnabled": true,
     "python.testing.unittestEnabled": false,
     "python.testing.nosetestsEnabled": false,
diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py
index 3456719e..dd0bad2c 100644
--- a/nltools/data/adjacency.py
+++ b/nltools/data/adjacency.py
@@ -692,13 +692,17 @@ def similarity(
         ignore_diagonal=False,
         **kwargs,
     ):
-        """Calculate similarity between two Adjacency matrices.
-        Default is to use spearman correlation and permutation test.
+        """
+        Calculate similarity between two Adjacency matrices. Default is to use spearman
+        correlation and permutation test.
+
         Args:
-            data: Adjacency data, or 1-d array same size as self.data
+            data (Adjacency or array): Adjacency data, or 1-d array same size as self.data
             perm_type: (str) '1d','2d', or None
             metric: (str) 'spearman','pearson','kendall'
-            ignore_diagonal: (bool) only applies to 'directed' Adjacency types using perm_type=None or perm_type='1d'
+            ignore_diagonal: (bool) only applies to 'directed' Adjacency types using
+            perm_type=None or perm_type='1d'
+
         """
         data1 = self.copy()
         if not isinstance(data, Adjacency):
@@ -1015,11 +1019,6 @@ def bootstrap(
     ):
         """Bootstrap an Adjacency method.
 
-            Example Useage:
-            b = dat.bootstrap('mean', n_samples=5000)
-            b = dat.bootstrap('predict', n_samples=5000, algorithm='ridge')
-            b = dat.bootstrap('predict', n_samples=5000, save_weights=True)
-
         Args:
             function: (str) method to apply to data for each bootstrap
             n_samples: (int) number of samples to bootstrap with replacement
@@ -1027,7 +1026,14 @@ def bootstrap(
                         (useful for aggregating many bootstraps on a cluster)
             n_jobs: (int) The number of CPUs to use to do the computation.
                         -1 means all CPUs.Returns:
-        output: summarized studentized bootstrap output
+
+        Returns:
+            summarized studentized bootstrap output
+
+        Examples:
+            >>>  b = dat.bootstrap('mean', n_samples=5000)
+            >>>  b = dat.bootstrap('predict', n_samples=5000, algorithm='ridge')
+            >>>  b = dat.bootstrap('predict', n_samples=5000, save_weights=True)
 
         """
 
diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py
index cacb411c..446975d0 100644
--- a/nltools/data/brain_data.py
+++ b/nltools/data/brain_data.py
@@ -13,7 +13,8 @@
 __license__ = "MIT"
 
 from nilearn.signal import clean
-from scipy.stats import ttest_1samp, pearsonr
+from scipy.stats import ttest_1samp, pearsonr, spearmanr
+from scipy.spatial.distance import cdist
 from scipy.stats import t as t_dist
 from scipy.signal import detrend
 from scipy.interpolate import pchip
@@ -33,7 +34,7 @@
 from joblib import Parallel, delayed
 from nltools.mask import expand_mask
 from nltools.analysis import Roc
-from nilearn.input_data import NiftiMasker
+from nilearn.maskers import NiftiMasker
 from nilearn.plotting import plot_stat_map
 from nilearn.image import smooth_img, resample_to_img
 from nilearn.masking import intersect_masks
@@ -71,6 +72,7 @@
 from nilearn.decoding import SearchLight
 import deepdish as dd
 from pathlib import Path
+import warnings
 
 
 # Optional dependencies
@@ -90,15 +92,12 @@ class Brain_Data(object):
         Y: Pandas DataFrame of training labels
         X: Pandas DataFrame Design Matrix for running univariate models
         mask: binary nifiti file to mask brain data
-        output_file: Name to write out to nifti file
         **kwargs: Additional keyword arguments to pass to the prediction
                 algorithm
 
     """
 
-    def __init__(
-        self, data=None, Y=None, X=None, mask=None, output_file=None, **kwargs
-    ):
+    def __init__(self, data=None, Y=None, X=None, mask=None, **kwargs):
         if mask is not None:
             if not isinstance(mask, nib.Nifti1Image):
                 if isinstance(mask, str) or isinstance(mask, Path):
@@ -112,8 +111,10 @@ def __init__(
                     )
             self.mask = mask
         else:
+            # Load default mask
             self.mask = nib.load(resolve_mni_path(MNI_Template)["mask"])
-        self.nifti_masker = NiftiMasker(mask_img=self.mask)
+        # Learn transformation on mask
+        self.nifti_masker = NiftiMasker(mask_img=self.mask, **kwargs)
 
         if data is not None:
             if isinstance(data, str) or isinstance(data, Path):
@@ -150,25 +151,36 @@ def __init__(
                             for e in f["Y_index"]
                         ],
                     )
-                    self.mask = nib.Nifti1Image(
-                        f["mask_data"],
-                        affine=f["mask_affine"],
-                        file_map={
-                            "image": nib.FileHolder(filename=f["mask_file_name"])
-                        },
-                    )
-                    nifti_masker = NiftiMasker(self.mask)
-                    self.nifti_masker = nifti_masker.fit(self.mask)
-                    self.file_name = f["file_name"]
+                    if mask is None:
+                        # User didn't request a mask so try to load it from the h5 file,
+                        # i.e. overwrite the default mask loaded above
+                        self.mask = nib.Nifti1Image(
+                            f["mask_data"],
+                            affine=f["mask_affine"],
+                            file_map={
+                                "image": nib.FileHolder(filename=f["mask_file_name"])
+                            },
+                        )
+                        nifti_masker = NiftiMasker(self.mask)
+                        self.nifti_masker = nifti_masker.fit(self.mask)
+                    else:
+                        # Mask is already set above so use the default or user requested
+                        # mask rather than the one in h5 (if it exists)
+                        if "mask_data" in f:
+                            warnings.warn(
+                                "Existing mask found in HDF5 file but is being ignored because you passed a value for mask. Set mask=None to use existing mask in the HDF5 file"
+                            )
+                    # We're done initializing from the h5 file so just return no need to
+                    # run any of the additional checks and code below. We should really
+                    # refactor this entire init...
                     return
-
                 else:
                     data = nib.load(data)
                 self.data = self.nifti_masker.fit_transform(data)
             elif isinstance(data, list):
                 if isinstance(data[0], Brain_Data):
                     tmp = concatenate(data)
-                    for item in ["data", "Y", "X", "mask", "nifti_masker", "file_name"]:
+                    for item in ["data", "Y", "X", "mask", "nifti_masker"]:
                         setattr(self, item, getattr(tmp, item))
                 else:
                     if all(isinstance(x, data[0].__class__) for x in data):
@@ -222,17 +234,14 @@ def __init__(
         else:
             self.X = pd.DataFrame()
 
-        self.file_name = output_file if output_file is not None else []
-
     def __repr__(self):
-        return "%s.%s(data=%s, Y=%s, X=%s, mask=%s, output_file=%s)" % (
+        return "%s.%s(data=%s, Y=%s, X=%s, mask=%s)" % (
             self.__class__.__module__,
             self.__class__.__name__,
             self.shape(),
             len(self.Y),
             self.X.shape,
             os.path.basename(self.mask.get_filename()),
-            self.file_name,
         )
 
     def __getitem__(self, index):
@@ -479,7 +488,7 @@ def to_nifti(self):
 
         return self.nifti_masker.inverse_transform(self.data)
 
-    def write(self, file_name=None, **kwargs):
+    def write(self, file_name, **kwargs):
         """Write out Brain_Data object to Nifti or HDF5 File.
 
         Args:
@@ -504,7 +513,6 @@ def write(self, file_name=None, **kwargs):
                     "mask_affine": self.mask.affine,
                     "mask_data": self.mask.get_fdata(),
                     "mask_file_name": self.mask.get_filename(),
-                    "file_name": self.file_name,
                 },
                 compression=kwargs.get("compression", "blosc"),
             )
@@ -544,7 +552,7 @@ def plot(
         threshold_lower=None,
         figsize=(15, 2),
         axes=None,
-        **kwargs
+        **kwargs,
     ):
         """Create a quick plot of self.data.  Will plot each image separately
 
@@ -589,7 +597,7 @@ def plot(
                     colorbar=colorbar,
                     draw_cross=draw_cross,
                     axes=axes,
-                    **kwargs
+                    **kwargs,
                 )
             else:
                 if axes is not None:
@@ -608,7 +616,7 @@ def plot(
                         colorbar=colorbar,
                         draw_cross=draw_cross,
                         axes=a[i],
-                        **kwargs
+                        **kwargs,
                     )
             return
         elif view in ["glass", "mni", "full"]:
@@ -618,7 +626,7 @@ def plot(
                     how=view,
                     thr_upper=threshold_upper,
                     thr_lower=threshold_lower,
-                    **kwargs
+                    **kwargs,
                 )
             else:
                 raise ValueError(
@@ -905,6 +913,17 @@ def similarity(self, image, method="correlation"):
 
         """
 
+        supported_metrics = [
+            "correlation",
+            "pearson",
+            "rank_correlation",
+            "spearman",
+            "dot_product",
+            "cosine",
+        ]
+        if method not in supported_metrics:
+            raise ValueError(f"method must be one of {supported_metrics}")
+
         image = check_brain_data(image)
 
         # Check to make sure masks are the same for each dataset and if not
@@ -925,59 +944,20 @@ def similarity(self, image, method="correlation"):
             data2 = self.data
             image2 = image.data
 
-        def vector2array(data):
-            if len(data.shape) == 1:
-                return data.reshape(-1, 1).T
-            else:
-                return data
-
-        def flatten_array(data):
-            if np.any(np.array(data.shape) == 1):
-                data = data.flatten()
-                if len(data) == 1 & data.shape[0] == 1:
-                    data = data[0]
-            return data
-
-        # Calculate pattern expression
         if method == "dot_product":
-            if len(image2.shape) > 1:
-                if image2.shape[0] > 1:
-                    pexp = []
-                    for i in range(image2.shape[0]):
-                        pexp.append(np.dot(data2, image2[i, :]))
-                    pexp = np.array(pexp)
-                else:
-                    pexp = np.dot(data2, image2)
-            else:
-                pexp = np.dot(data2, image2)
-        elif method == "correlation":
-            if len(image2.shape) > 1:
-                if image2.shape[0] > 1:
-                    pexp = []
-                    for i in range(image2.shape[0]):
-                        pexp.append(pearson(image2[i, :], data2))
-                    pexp = np.array(pexp)
-                else:
-                    pexp = pearson(image2, data2)
-            else:
-                pexp = pearson(image2, data2)
+            func = lambda x, y: np.dot(x, y)
+        elif method in ["pearson", "correlation"]:
+            func = lambda x, y: pearsonr(x, y)[0]
+        elif method in ["spearman", "rank_correlation"]:
+            func = lambda x, y: spearmanr(x, y)[0]
         elif method == "cosine":
-            image2 = vector2array(image2)
-            data2 = vector2array(data2)
-            if image2.shape[1] > 1:
-                pexp = []
-                for i in range(image2.shape[0]):
-                    pexp.append(
-                        cosine_similarity(
-                            image2[i, :].reshape(-1, 1).T, data2
-                        ).flatten()
-                    )
-                pexp = np.array(pexp)
-            else:
-                pexp = cosine_similarity(image2, data2).flatten()
-        else:
-            raise ValueError("Method must be one of: correlation, dot_product, cosine")
-        return flatten_array(pexp)
+            func = method
+
+        out = cdist(np.atleast_2d(data2), np.atleast_2d(image2), func).squeeze()
+        # cdist metric argument returns distances by default (unless we specific a
+        # custom function like above) so flip it to similarity
+        out = 1 - out if method == "cosine" else out
+        return out
 
     def distance(self, metric="euclidean", **kwargs):
         """Calculate distance between images within a Brain_Data() instance.
@@ -1335,7 +1315,7 @@ def predict_multi(
         scoring=None,
         n_jobs=1,
         verbose=0,
-        **kwargs
+        **kwargs,
     ):
         """Perform multi-region prediction. This can be a searchlight analysis or multi-roi analysis if provided a Brain_Data instance with labeled non-overlapping rois.
 
@@ -1661,7 +1641,7 @@ def upload_neurovault(
         collection_id=None,
         img_type=None,
         img_modality=None,
-        **kwargs
+        **kwargs,
     ):
         """Upload Data to Neurovault.  Will add any columns in self.X to image
             metadata. Index will be used as image name.
@@ -1726,7 +1706,7 @@ def add_image_to_collection(
                 name=img_name,
                 modality=img_modality,
                 map_type=img_type,
-                **kwargs
+                **kwargs,
             )
 
         if len(self.shape()) == 1:
@@ -1786,7 +1766,7 @@ def filter(self, sampling_freq=None, high_pass=None, low_pass=None, **kwargs):
             standardize=standardize,
             high_pass=high_pass,
             low_pass=low_pass,
-            **kwargs
+            **kwargs,
         )
         return out
 
@@ -1955,23 +1935,25 @@ def bootstrap(
         n_jobs=-1,
         random_state=None,
         *args,
-        **kwargs
+        **kwargs,
     ):
-        """Bootstrap a Brain_Data method.
-
-            Example Useage:
-            b = dat.bootstrap('mean', n_samples=5000)
-            b = dat.bootstrap('predict', n_samples=5000, algorithm='ridge')
-            b = dat.bootstrap('predict', n_samples=5000, save_weights=True)
+        """Bootstrap a `Brain_Data` method.
 
         Args:
             function: (str) method to apply to data for each bootstrap
             n_samples: (int) number of samples to bootstrap with replacement
-            save_weights: (bool) Save each bootstrap iteration
-                        (useful for aggregating many bootstraps on a cluster)
-            n_jobs: (int) The number of CPUs to use to do the computation.
-                        -1 means all CPUs.Returns:
-        output: summarized studentized bootstrap output
+            save_weights: (bool) Save each bootstrap iteration (useful for aggregating
+            many bootstraps on a cluster)
+            n_jobs: (int) The number of CPUs to use to do the computation. -1 means all
+            CPUs.Returns:
+
+        Returns:
+            output: summarized studentized bootstrap output
+
+        Examples:
+            >>>  b = dat.bootstrap('mean', n_samples=5000)
+            >>>  b = dat.bootstrap('predict', n_samples=5000, algorithm='ridge')
+            >>>  b = dat.bootstrap('predict', n_samples=5000, save_weights=True)
 
         """
 
@@ -2036,16 +2018,6 @@ def align(self, target, method="procrustes", axis=0, *args, **kwargs):
 
         See nltools.stats.align for aligning multiple Brain_Data instances
 
-        Examples:
-            Hyperalign using procrustes transform:
-                out = data.align(target, method='procrustes')
-
-            Align using shared response model:
-                out = data.align(target, method='probabilistic_srm', n_features=None)
-
-            Project aligned data into original data:
-                original_data = np.dot(out['transformed'].data,out['transformation_matrix'].T)
-
         Args:
             target: (Brain_Data) object to align to.
             method: (str) alignment method to use
@@ -2056,6 +2028,13 @@ def align(self, target, method="procrustes", axis=0, *args, **kwargs):
             out: (dict) a dictionary containing transformed object,
                 transformation matrix, and the shared response matrix
 
+        Examples:
+            - Hyperalign using procrustes transform:
+                >>> out = data.align(target, method='procrustes')
+            - Align using shared response model:
+                >>> out = data.align(target, method='probabilistic_srm', n_features=None)
+            - Project aligned data into original data:
+                >>> original_data = np.dot(out[‘transformed’].data,out[‘transformation_matrix’].T)
         """
 
         if method not in ["probabilistic_srm", "deterministic_srm", "procrustes"]:
@@ -2156,19 +2135,20 @@ def find_spikes(self, global_spike_cutoff=3, diff_spike_cutoff=3):
         )
 
     def temporal_resample(self, sampling_freq=None, target=None, target_type="hz"):
-        """Resample Brain_Data timeseries to a new target frequency or number of samples
+        """
+        Resample Brain_Data timeseries to a new target frequency or number of samples
         using Piecewise Cubic Hermite Interpolating Polynomial (PCHIP) interpolation.
         This function can up- or down-sample data.
 
         Note: this function can use quite a bit of RAM.
 
-            Args:
-                sampling_freq:  (float) sampling frequency of data in hertz
-                target: (float) upsampling target
-                target_type: (str) type of target can be [samples,seconds,hz]
+        Args:
+            sampling_freq:  (float) sampling frequency of data in hertz
+            target: (float) upsampling target
+            target_type: (str) type of target can be [samples,seconds,hz]
 
-            Returns:
-                upsampled Brain_Data instance
+        Returns:
+            upsampled Brain_Data instance
         """
 
         out = self.copy()
diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py
index 6a186ec2..036b2d46 100644
--- a/nltools/data/design_matrix.py
+++ b/nltools/data/design_matrix.py
@@ -464,11 +464,16 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
 
     def vif(self, exclude_polys=True):
         """
-        Compute variance inflation factor amongst columns of design matrix,ignoring polynomial terms. Much faster that statsmodels and more reliable too. Uses the same method as Matlab and R (diagonal elements of the inverted correlation matrix).
+        Compute variance inflation factor amongst columns of design matrix,ignoring
+        polynomial terms. Much faster that statsmodels and more reliable too. Uses the
+        same method as Matlab and R (diagonal elements of the inverted correlation
+        matrix).
+
+        Args:
+            exclude_polys (bool): whether to skip checking of polynomial terms (i.e intercept, trends, basis functions); default True
 
         Returns:
             vifs (list): list with length == number of columns - intercept
-            exclude_polys (bool): whether to skip checking of polynomial terms (i.e intercept, trends, basis functions); default True
 
         """
         if self.shape[1] <= 1:
@@ -560,9 +565,8 @@ def convolve(self, conv_func="hrf", columns=None):
         return out
 
     def downsample(self, target, **kwargs):
-        """Downsample columns of design matrix. Relies on
-            nltools.stats.downsample, but ensures that returned object is a
-            design matrix.
+        """
+        Downsample columns of design matrix. Relies on nltools.stats.downsample, but ensures that returned object is a design matrix.
 
         Args:
             target(float): desired frequency in hz
@@ -588,9 +592,8 @@ def downsample(self, target, **kwargs):
         return newMat
 
     def upsample(self, target, **kwargs):
-        """Upsample columns of design matrix. Relies on
-            nltools.stats.upsample, but ensures that returned object is a
-            design matrix.
+        """
+        Upsample columns of design matrix. Relies on nltools.stats.upsample, but ensures that returned object is a design matrix.
 
         Args:
             target(float): desired frequence in hz
@@ -616,9 +619,8 @@ def upsample(self, target, **kwargs):
         return newMat
 
     def zscore(self, columns=[]):
-        """Z-score specific columns of design matrix. Relies on
-            nltools.stats.downsample, but ensures that returned object is a
-            design matrix.
+        """
+        Z-score specific columns of design matrix. Relies on nltools.stats.downsample, but ensures that returned object is a design matrix.
 
         Args:
             columns (list): columns to z-score; defaults to all columns
@@ -774,6 +776,11 @@ def clean(self, fill_na=0, exclude_polys=False, thresh=0.95, verbose=True):
         """
 
         # Temporarily turn off warnings for correlations
+        if any(self.columns.duplicated()):
+            raise ValueError(
+                "Duplicate columns names detected. Using .clean() with duplicate columns is not supported as it can produce unexpected results"
+            )
+
         old_settings = np.seterr(all="ignore")
         if fill_na is not None:
             out = self.fillna(fill_na)
@@ -801,6 +808,10 @@ def clean(self, fill_na=0, exclude_polys=False, thresh=0.95, verbose=True):
             out = out.drop(remove, axis=1)
             out.polys = [elem for elem in out.polys if elem not in remove]
             out = out._sort_cols()
+            if out.shape[1] >= self.shape[1]:
+                raise ValueError(
+                    f"Removing {len(remove)} cols has somehow made the design matrix LARGER or failed to change it's shape. Previously had {self.shape[1]} columns and now has {out.shape[1]} columns. This usually happens if you had duplicate column names prior to calling clean!"
+                )
         else:
             print("Dropping columns not needed...skipping")
         np.seterr(**old_settings)
diff --git a/nltools/file_reader.py b/nltools/file_reader.py
index 8913b7de..79dde817 100644
--- a/nltools/file_reader.py
+++ b/nltools/file_reader.py
@@ -28,7 +28,7 @@ def onsets_to_dm(
     **kwargs,
 ):
     """
-    This function can assist in reading in one or several in a 2-3 column onsets files, specified in seconds and converting it to a Design Matrix organized as samples X Stimulus Classes. sampling_freq should be specified in hertz; for TRs use hertz = 1/TR. Onsets files **must** be organized with columns in one of the following 4 formats:
+        This function can assist in reading in one or several in a 2-3 column onsets files, specified in seconds and converting it to a Design Matrix organized as samples X Stimulus Classes. sampling_freq should be specified in hertz; for TRs use hertz = 1/TR. Onsets files **must** be organized with columns in one of the following 4 formats:
 
     1) 'Stim, Onset'
     2) 'Onset, Stim'
@@ -38,22 +38,23 @@ def onsets_to_dm(
     No other file organizations are currently supported. *Note:* Stimulus offsets (onset + duration) that fall into an adjacent TR include that full TR. E.g. offset of 10.16s with TR = 2 has an offset of TR 5, which spans 10-12s, rather than an offset of TR 4, which spans 8-10s.
 
     Args:
-        F (filepath/DataFrame/list): path to file, pandas dataframe, or list of files or pandas dataframes
-        sampling_freq (float): sampling frequency in hertz; for TRs use (1 / TR)         run_length (int): number of TRs in the run these onsets came from
-        sort (bool, optional): whether to sort the columns of the resulting
-                                design matrix alphabetically; defaults to
-                                False
-        addpoly (int, optional: what order polynomial terms to add as new columns (e.g. 0 for intercept, 1 for linear trend and intercept, etc); defaults to None
-        header (str,optional): None if missing header, otherwise pandas
-                                header keyword; defaults to 'infer'
-        keep_separate (bool): whether to seperate polynomial columns if reading a list of files and using the addpoly option; defaults to True
-        unique_cols (list, optional): additional columns to keep seperate across files (e.g. spikes); defaults to []
-        fill_na (str/int/float, optional): what value fill NaNs in with if reading in a list of files; defaults to None
-        kwargs: additional inputs to pandas.read_csv
-
-        Returns:
-            Design_Matrix class
-
+        F (str/Path/pd.DataFrame): filepath or pandas dataframe
+        sampling_freq (float): samping frequency in hertz, i.e 1 / TR
+        run_length (int): run length in number of TRs
+        header (str/None, optional): whether there's an additional header row in the
+        supplied file/dataframe. See `pd.read_csv` for more details. Defaults to `"infer"`.
+        sort (bool, optional): whether to sort dataframe columns alphabetically. Defaults to False.
+        keep_separate (bool, optional): if a list of files or dataframes is supplied,
+        whether to create separate polynomial columns per file. Defaults to `True`.
+        add_poly (bool/int, optional): whether to add Nth order polynomials to design
+        matrix. Defaults to None.
+        unique_cols (list/None, optional): if a list of files or dataframes is supplied,
+        what additional columns to keep separate per file (e.g. spikes). Defaults to None.
+        fill_na (Any, optional): what to replace NaNs with. Defaults to None (no filling).
+
+
+    Returns:
+        nltools.data.Design_Matrix: design matrix organized as TRs x Stims
     """
 
     if not isinstance(F, list):
@@ -128,18 +129,19 @@ def conditional_round(x, TR):
                 X.loc[row["Onset"] : row["Offset"], row["Stim"]] = 1
             else:
                 X.loc[row["Onset"], row["Stim"]] = 1
+        # DISABLED cause this isn't quite accurate for stimuli of different durations
         # Run a check
-        if "Offset" in df.columns:
-            onsets = X.sum().values
-            stim_counts = data.Stim.value_counts(sort=False)[X.columns]
-            durations = data.groupby("Stim").Duration.mean().values
-            for i, (o, c, d) in enumerate(zip(onsets, stim_counts, durations)):
-                if c * (d / TR) <= o <= c * ((d / TR) + 1):
-                    pass
-                else:
-                    warnings.warn(
-                        f"Computed onsets for {data.Stim.unique()[i]} are inconsistent with expected values. Please manually verify the outputted Design_Matrix!"
-                    )
+        # if "Offset" in df.columns:
+        #     onsets = X.sum().values
+        #     stim_counts = data.Stim.value_counts(sort=False)[X.columns]
+        #     durations = data.groupby("Stim").Duration.mean().values
+        #     for i, (o, c, d) in enumerate(zip(onsets, stim_counts, durations)):
+        #         if c * (d / TR) <= o <= c * ((d / TR) + 1):
+        #             pass
+        #         else:
+        #             warnings.warn(
+        #                 f"Computed onsets for {data.Stim.unique()[i]} are inconsistent ({o}) with expected values ({c * (d / TR)} to {c * ((d / TR) + 1)}). Please manually verify the outputted Design_Matrix!"
+        #             )
 
         if sort:
             X = X.reindex(sorted(X.columns), axis=1)
diff --git a/nltools/mask.py b/nltools/mask.py
index c8c12d51..976e4e3f 100644
--- a/nltools/mask.py
+++ b/nltools/mask.py
@@ -155,9 +155,7 @@ def collapse_mask(mask, auto_label=True, custom_mask=None):
                 m_list.append(mask[x].to_nifti())
             intersect = intersect_masks(m_list, threshold=1, connected=False)
             intersect = Brain_Data(
-                nib.Nifti1Image(
-                    np.abs(intersect.get_data() - 1), intersect.get_affine()
-                ),
+                nib.Nifti1Image(np.abs(intersect.get_data() - 1), intersect.affine),
                 mask=custom_mask,
             )
 
diff --git a/nltools/plotting.py b/nltools/plotting.py
index 7cfc34f1..0543d1a4 100644
--- a/nltools/plotting.py
+++ b/nltools/plotting.py
@@ -146,12 +146,12 @@ def plot_t_brain(
     """
     Takes a brain data object and computes a 1 sample t-test across it's first axis. If a list is provided will compute difference between brain data objects in list (i.e. paired samples t-test).
     Args:
-        objIn:(list/Brain_Data) if list will compute difference map first
-        how: (list) whether to plot a glass brain 'glass', 3 view-multi-slice mni 'mni', or both 'full'
-        thr: (str) what method to use for multiple comparisons correction unc, fdr, or tfce
-        alpha: (float) p-value threshold
-        nperm: (int) number of permutations for tcfe; default 1000
-        cut_coords: (list) x,y,z coords to plot brain slice
+        objIn (list/Brain_Data): if list will compute difference map first
+        how (list): whether to plot a glass brain 'glass', 3 view-multi-slice mni 'mni', or both 'full'
+        thr (str): what method to use for multiple comparisons correction unc, fdr, or tfce
+        alpha (float): p-value threshold
+        nperm (int): number of permutations for tcfe; default 1000
+        cut_coords (list): x,y,z coords to plot brain slice
         kwargs: optionals args to nilearn plot functions (e.g. vmax)
 
     """
@@ -250,11 +250,12 @@ def plot_t_brain(
 def plot_brain(objIn, how="full", thr_upper=None, thr_lower=None, save=False, **kwargs):
     """
     More complete brain plotting of a Brain_Data instance
+
     Args:
-        obj: (Brain_Data) object to plot
-        how: (str) whether to plot a glass brain 'glass', 3 view-multi-slice mni 'mni', or both 'full'
-        thr_upper: (str/float) thresholding of image. Can be string for percentage, or float for data units (see Brain_Data.threshold()
-        thr_lower: (str/float) thresholding of image. Can be string for percentage, or float for data units (see Brain_Data.threshold()
+        obj (Brain_Data): object to plot
+        how (str): whether to plot a glass brain 'glass', 3 view-multi-slice mni 'mni', or both 'full'
+        thr_upper (str/float): thresholding of image. Can be string for percentage, or float for data units (see Brain_Data.threshold()
+        thr_lower (str/float): thresholding of image. Can be string for percentage, or float for data units (see Brain_Data.threshold()
         save (str): if a string file name or path is provided plots will be saved into this directory appended with the orientation they belong to
         kwargs: optionals args to nilearn plot functions (e.g. vmax)
 
diff --git a/nltools/simulator.py b/nltools/simulator.py
index 8aa48efa..fc39e535 100755
--- a/nltools/simulator.py
+++ b/nltools/simulator.py
@@ -16,7 +16,7 @@
 import nibabel as nib
 import pandas as pd
 import matplotlib.pyplot as plt
-from nilearn.input_data import NiftiMasker
+from nilearn.maskers import NiftiMasker
 from scipy.stats import multivariate_normal, binom, ttest_1samp
 from nltools.data import Brain_Data
 from nltools.stats import fdr, one_sample_permutation
diff --git a/nltools/stats.py b/nltools/stats.py
index 3bbebd5c..1d20f9bc 100644
--- a/nltools/stats.py
+++ b/nltools/stats.py
@@ -225,7 +225,7 @@ def multi_threshold(t_map, p_map, thresh):
     if not isinstance(thresh, list):
         raise ValueError("Make sure thresh is a list of p-values")
 
-    affine = t_map.to_nifti().get_affine()
+    affine = t_map.to_nifti().affine
     pos_out = np.zeros(t_map.to_nifti().shape)
     neg_out = deepcopy(pos_out)
     for thr in thresh:
@@ -812,9 +812,9 @@ def transform_pairwise(X, y):
     there are the same number of -1 as +1
 
     Reference: "Large Margin Rank Boundaries for Ordinal Regression",
-    R. Herbrich, T. Graepel, K. Obermayer.
-    Authors: Fabian Pedregosa <fabian@fseoane.net>
-             Alexandre Gramfort <alexandre.gramfort@inria.fr>
+    R. Herbrich, T. Graepel, K. Obermayer. Authors: Fabian Pedregosa
+    <fabian@fseoane.net> Alexandre Gramfort <alexandre.gramfort@inria.fr>
+
     Args:
         X: (np.array), shape (n_samples, n_features)
             The data
@@ -996,12 +996,34 @@ def regress(X, Y, mode="ols", stats="full", **kwargs):
     Does NOT add an intercept automatically to the X matrix before fitting like some other software packages. This is left up to the user.
 
     This function can compute regression in 3 ways:
-    1) Standard OLS
-    2) OLS with robust sandwich estimators for standard errors. 3 robust types of estimators exist:
-        1) 'hc0' - classic huber-white estimator robust to heteroscedasticity (default)
-        2) 'hc3' - a variant on huber-white estimator slightly more conservative when sample sizes are small
-        3) 'hac' - an estimator robust to both heteroscedasticity and auto-correlation; auto-correlation lag can be controlled with the 'nlags' keyword argument; default is 1
-    3) ARMA (auto-regressive moving-average) model (experimental). This model is fit through statsmodels.tsa.arima_model.ARMA, so more information about options can be found there. Any settings can be passed in as kwargs. By default fits a (1,1) model with starting lags of 2. This mode is **computationally intensive** and can take quite a while if Y has many columns.  If Y is a 2d array joblib.Parallel is used for faster fitting by parallelizing fits across columns of Y. Parallelization can be controlled by passing in kwargs. Defaults to multi-threading using 10 separate threads, as threads don't require large arrays to be duplicated in memory. Defaults are also set to enable memory-mapping for very large arrays if backend='multiprocessing' to prevent crashes and hangs. Various levels of progress can be monitored using the 'disp' (statsmodels) and 'verbose' (joblib) keyword arguments with integer values > 0.
+
+    1. Standard OLS
+    2. OLS with robust sandwich estimators for standard errors. 3 robust types of
+       estimators exist:
+
+    - 'hc0' - classic huber-white estimator robust to heteroscedasticity (default)
+    - 'hc3' - a variant on huber-white estimator slightly more conservative when sample sizes are small
+    - 'hac' - an estimator robust to both heteroscedasticity and auto-correlation;
+      auto-correlation lag can be controlled with the `nlags` keyword argument; default
+      is 1
+
+    3. ARMA (auto-regressive moving-average) model (experimental). This model is fit through statsmodels.tsa.arima_model.ARMA, so more information about options can be found there. Any settings can be passed in as kwargs. By default fits a (1,1) model with starting lags of 2. This mode is **computationally intensive** and can take quite a while if Y has many columns.  If Y is a 2d array joblib.Parallel is used for faster fitting by parallelizing fits across columns of Y. Parallelization can be controlled by passing in kwargs. Defaults to multi-threading using 10 separate threads, as threads don't require large arrays to be duplicated in memory. Defaults are also set to enable memory-mapping for very large arrays if backend='multiprocessing' to prevent crashes and hangs. Various levels of progress can be monitored using the 'disp' (statsmodels) and 'verbose' (joblib) keyword arguments with integer values > 0.
+
+    Args:
+        X (ndarray): design matrix; assumes intercept is included
+        Y (ndarray): dependent variable array; if 2d, a model is fit to each column of Y separately
+        mode (str): kind of model to fit; must be one of 'ols' (default), 'robust', or 'arma'
+        robust_estimator (str,optional): kind of robust estimator to use if mode = 'robust'; default 'hc0'
+        nlags (int,optional): auto-correlation lag correction if mode = 'robust' and robust_estimator = 'hac'; default 1
+        order (tuple,optional): auto-regressive and moving-average orders for mode = 'arma'; default (1,1)
+        kwargs (dict): additional keyword arguments to statsmodels.tsa.arima_model.ARMA and joblib.Parallel
+
+    Returns:
+        b: coefficients
+        t: t-statistics (coef/sterr)
+        p : p-values
+        df: degrees of freedom
+        res: residuals
 
     Examples:
         Standard OLS
@@ -1024,22 +1046,6 @@ def regress(X, Y, mode="ols", stats="full", **kwargs):
 
         >>> results = regress(X,Y,mode='arma',order=(2,3),backend='multiprocessing',n_jobs=8)
 
-    Args:
-        X (ndarray): design matrix; assumes intercept is included
-        Y (ndarray): dependent variable array; if 2d, a model is fit to each column of Y separately
-        mode (str): kind of model to fit; must be one of 'ols' (default), 'robust', or 'arma'
-        robust_estimator (str,optional): kind of robust estimator to use if mode = 'robust'; default 'hc0'
-        nlags (int,optional): auto-correlation lag correction if mode = 'robust' and robust_estimator = 'hac'; default 1
-        order (tuple,optional): auto-regressive and moving-average orders for mode = 'arma'; default (1,1)
-        kwargs (dict): additional keyword arguments to statsmodels.tsa.arima_model.ARMA and joblib.Parallel
-
-    Returns:
-        b: coefficients
-        t: t-statistics (coef/sterr)
-        p : p-values
-        df: degrees of freedom
-        res: residuals
-
     """
 
     if not isinstance(mode, str):
@@ -1192,15 +1198,6 @@ def align(data, method="deterministic_srm", n_features=None, axis=0, *args, **kw
     original data using Tranformation matrix. Inputs must be a list of Brain_Data
     instances or numpy arrays (observations by features).
 
-    Examples:
-        Hyperalign using procrustes transform:
-            out = align(data, method='procrustes')
-
-        Align using shared response model:
-            out = align(data, method='probabilistic_srm', n_features=None)
-
-        Project aligned data into original data:
-            original_data = [np.dot(t.data,tm.T) for t,tm in zip(out['transformed'], out['transformation_matrix'])]
 
     Args:
         data: (list) A list of Brain_Data objects
@@ -1215,6 +1212,13 @@ def align(data, method="deterministic_srm", n_features=None, axis=0, *args, **kw
             matrices, a list of transformation matrices, the shared
             response matrix, and the intersubject correlation of the shared resposnes
 
+    Examples:
+        - Hyperalign using procrustes transform:
+            >>> out = align(data, method='procrustes')
+        - Align using shared response model:
+            >>> out = align(data, method='probabilistic_srm', n_features=None)
+        - Project aligned data into original data:
+            >>> original_data = [np.dot(t.data,tm.T) for t,tm in zip(out['transformed'], out['transformation_matrix'])]
     """
 
     from nltools.data import Brain_Data, Adjacency
diff --git a/nltools/tests/test_brain_data.py b/nltools/tests/test_brain_data.py
index eccf67a3..89dd40b4 100644
--- a/nltools/tests/test_brain_data.py
+++ b/nltools/tests/test_brain_data.py
@@ -9,7 +9,7 @@
 from nltools.mask import create_sphere, roi_to_brain
 from pathlib import Path
 
-# from nltools.prefs import MNI_Template
+from nltools.prefs import MNI_Template
 
 
 shape_3d = (91, 109, 91)
@@ -52,7 +52,7 @@ def test_load(tmpdir):
     # Test i/o for hdf5
     dat.write(os.path.join(str(tmpdir.join("test_write.h5"))))
     b = Brain_Data(os.path.join(tmpdir.join("test_write.h5")))
-    for k in ["X", "Y", "mask", "nifti_masker", "file_name", "data"]:
+    for k in ["X", "Y", "mask", "nifti_masker", "data"]:
         if k == "data":
             assert np.allclose(b.__dict__[k], dat.__dict__[k])
         elif k in ["X", "Y"]:
@@ -68,6 +68,14 @@ def test_load(tmpdir):
             )
         else:
             assert b.__dict__[k] == dat.__dict__[k]
+    # Test situation where we present a user warning when they're trying to load an .h5
+    # file that includes a mask AND they pass in value for the mask argument. In this
+    # case the mask argument takes precedence so we warn the user
+    with pytest.warns(UserWarning):
+        bb = Brain_Data(
+            os.path.join(tmpdir.join("test_write.h5")), mask=MNI_Template["mask"]
+        )
+        assert bb.mask.get_filename() == MNI_Template["mask"]
 
 
 def test_shape(sim_brain_data):
@@ -506,6 +514,17 @@ def test_similarity(sim_brain_data):
     assert len(r) == shape_2d[0]
     r = sim_brain_data.similarity(stats["weight_map"], method="cosine")
     assert len(r) == shape_2d[0]
+    r = sim_brain_data.similarity(stats["weight_map"], method="spearman")
+    assert len(r) == shape_2d[0]
+    r = sim_brain_data.similarity(stats["weight_map"], method="pearson")
+    assert len(r) == shape_2d[0]
+    r = sim_brain_data.similarity(stats["weight_map"], method="correlation")
+    assert len(r) == shape_2d[0]
+    r = sim_brain_data.similarity(stats["weight_map"], method="rank_correlation")
+    assert len(r) == shape_2d[0]
+    r = stats["weight_map"].similarity(sim_brain_data)
+    assert len(r) == shape_2d[0]
+
     r = sim_brain_data.similarity(sim_brain_data, method="correlation")
     assert r.shape == (sim_brain_data.shape()[0], sim_brain_data.shape()[0])
     r = sim_brain_data.similarity(sim_brain_data, method="dot_product")
diff --git a/nltools/tests/test_design_matrix.py b/nltools/tests/test_design_matrix.py
index ce6cd7f2..5c65dcb9 100644
--- a/nltools/tests/test_design_matrix.py
+++ b/nltools/tests/test_design_matrix.py
@@ -1,6 +1,8 @@
 import numpy as np
+import pandas as pd
 from nltools.data import Design_Matrix
 from nltools.external.hrf import glover_hrf
+import pytest
 
 
 def test_add_poly(sim_design_matrix):
@@ -127,3 +129,16 @@ def test_append(sim_design_matrix):
         run = run.add_poly(2)
         all_runs = all_runs.append(run, unique_cols=["stim*", "cond*"])
     assert all_runs.shape == (44, 28)
+
+
+def test_clean(sim_design_matrix):
+
+    # Drop correlated column
+    corr_cols = sim_design_matrix.assign(new_col=lambda df: df.iloc[:, 0])
+    out = corr_cols.clean(verbose=True)
+    assert out.shape[1] < corr_cols.shape[1]
+
+    # Raise an error if try to clean with an input matrix that has duplicate column names
+    dup_cols = pd.concat([sim_design_matrix, sim_design_matrix], axis=1)
+    with pytest.raises(ValueError):
+        _ = dup_cols.clean()
diff --git a/nltools/version.py b/nltools/version.py
index 3f24b49b..a510e061 100644
--- a/nltools/version.py
+++ b/nltools/version.py
@@ -1,4 +1,4 @@
 """Specifies current version of nltools to be used by setup.py and __init__.py
 """
 
-__version__ = "0.4.5"
+__version__ = "0.4.6"