load_confounds/parser.py
File `parser.py` has 296 lines of code (exceeds 250 allowed). Consider refactoring."""Flexible method to load confounds generated by fMRIprep. Authors: load_confounds team"""import numpy as npimport pandas as pdfrom . import confounds as cffrom .compcor import _find_compcor # Global variables listing the admissible types of noise componentsall_confounds = [ "motion", "high_pass", "wm_csf", "global", "compcor", "ica_aroma", "scrub", "non_steady_state",] Function `_sanitize_strategy` has a Cognitive Complexity of 8 (exceeds 5 allowed). Consider refactoring.def _sanitize_strategy(strategy): """Defines the supported denoising strategies.""" if isinstance(strategy, list): for conf in strategy: if not conf in all_confounds: raise ValueError(f"{conf} is not a supported type of confounds.") else: raise ValueError("strategy needs to be a list of strings") # add non steady state if not present if "non_steady_state" not in strategy: strategy.append("non_steady_state") return strategy def _check_error(missing_confounds, missing_keys): """Consolidate a single error message across multiple missing confounds.""" if missing_confounds or missing_keys: error_msg = ( "The following keys or parameters are missing: " + f" {missing_confounds}" + f" {missing_keys}" + ". You may want to try a different denoising strategy." ) raise ValueError(error_msg) class Confounds: """ Confounds from fmriprep Parameters ---------- strategy : list of strings The type of noise confounds to include. "motion" head motion estimates. "high_pass" discrete cosines covering low frequencies. "wm_csf" confounds derived from white matter and cerebrospinal fluid. "global" confounds derived from the global signal. "ica_aroma" confounds derived from ICA-AROMA. "scrub" regressors for Power 2014 scrubbing approach. motion : string, optional Type of confounds extracted from head motion estimates. "basic" translation/rotation (6 parameters) "power2" translation/rotation + quadratic terms (12 parameters) "derivatives" translation/rotation + derivatives (12 parameters) "full" translation/rotation + derivatives + quadratic terms + power2d derivatives (24 parameters) n_motion : float Number of pca components to keep from head motion estimates. If the parameters is strictly comprised between 0 and 1, a principal component analysis is applied to the motion parameters, and the number of extracted components is set to exceed `n_motion` percent of the parameters variance. If the n_components = 0, then no PCA is performed. fd_thresh : float, optional Framewise displacement threshold for scrub (default = 0.2 mm) std_dvars_thresh : float, optional Standardized DVARS threshold for scrub (default = 3) wm_csf : string, optional Type of confounds extracted from masks of white matter and cerebrospinal fluids. "basic" the averages in each mask (2 parameters) "power2" averages and quadratic terms (4 parameters) "derivatives" averages and derivatives (4 parameters) "full" averages + derivatives + quadratic terms + power2d derivatives (8 parameters) global_signal : string, optional Type of confounds extracted from the global signal. "basic" just the global signal (1 parameter) "power2" global signal and quadratic term (2 parameters) "derivatives" global signal and derivative (2 parameters) "full" global signal + derivatives + quadratic terms + power2d derivatives (4 parameters) scrub : string, optional Type of scrub of frames with excessive motion (Power et al. 2014) "basic" remove time frames based on excessive FD and DVARS "full" also remove time windows which are too short after scrubbing. one-hot encoding vectors are added as regressors for each scrubbed frame. compcor : string, optional Type of confounds extracted from a component based noise correction method "anat" noise components calculated using anatomical compcor "temp" noise components calculated using temporal compcor "full" noise components calculated using both temporal and anatomical n_compcor : int or "auto", optional The number of noise components to be extracted. For acompcor_combined=False, and/or compcor="full", this is the number of components per mask. Default is "auto": select all components (50% variance explained by fMRIPrep defaults) acompcor_combined: boolean, optional If true, use components generated from the combined white matter and csf masks. Otherwise, components are generated from each mask separately and then concatenated. ica_aroma : None or string, optional None: default, not using ICA-AROMA related strategy "basic": use noise IC only. "full": use fMRIprep output `~desc-smoothAROMAnonaggr_bold.nii.gz` . demean : boolean, optional If True, the confounds are standardized to a zero mean (over time). This step is critical if the confounds are regressed out of time series using nilearn with no or zscore standardization, but should be turned off with "spc" normalization. Attributes ---------- `confounds_` : pandas.DataFrame The confounds loaded using the specified model. The columns of the dataframe contains the labels. `sample_mask_` : list of int The index of the niimgs along time/fourth dimension. This list includes indices for valid volumes for subsequent analysis. This attribute should be passed to parameter `sample_mask` of nilearn.NiftiMasker. Volumnes are removed if flagges as following: - Non-steady-state volumes (if present) - Motion outliers detected by scrubbing Notes ----- The predefined strategies implemented in this class are adapted from (Ciric et al. 2017). Band-pass filter is replaced by high-pass filter. Low-pass filters can be implemented, e.g., through nilearn maskers. Scrubbing is implemented by introducing regressors in the confounds, rather than eliminating time points. Other aspects of the preprocessing listed in Ciric et al. (2017) are controlled through fMRIprep, e.g. distortion correction. References ---------- Ciric et al., 2017 "Benchmarking of participant-level confound regression strategies for the control of motion artifact in studies of functional connectivity" Neuroimage 154: 174-87 https://doi.org/10.1016/j.neuroimage.2017.03.020 """ Function `__init__` has 13 arguments (exceeds 4 allowed). Consider refactoring. def __init__( self, strategy=["motion", "high_pass", "wm_csf"], motion="full", n_motion=0, scrub="full", fd_thresh=0.2, std_dvars_thresh=3, wm_csf="basic", global_signal="basic", compcor="anat", acompcor_combined=True, n_compcor="auto", ica_aroma=None, demean=True, ): """Default parameters.""" self.strategy = _sanitize_strategy(strategy) self.motion = motion self.n_motion = n_motion self.scrub = scrub self.fd_thresh = fd_thresh self.std_dvars_thresh = std_dvars_thresh self.wm_csf = wm_csf self.global_signal = global_signal self.compcor = compcor self.acompcor_combined = acompcor_combined self.n_compcor = n_compcor self.ica_aroma = ica_aroma self.demean = demean def load(self, img_files): """ Load fMRIprep confounds and sample mask Parameters ---------- img_files : path to processed image files, optionally as a list. Processed nii.gz/dtseries.nii/func.gii file from fmriprep. `nii.gz` or `dtseries.nii`: path to files, optionally as a list. `func.gii`: list of a pair of paths to files, optionally as a list of lists. The companion tsv will be automatically detected. Returns ------- confounds : pandas.DataFrame or list of pandas.DataFrame A reduced version of fMRIprep confounds based on selected strategy and flags. An intercept is automatically added to the list of confounds. The columns contains the labels of the regressors. sample_mask : list or list of list Index of time point to be preserved in the analysis """ return self._parse(img_files) def _parse(self, img_files): """Parse input image, find confound files and scrubbing etc.""" img_files, flag_single = cf._sanitize_confounds(img_files) confounds_out = [] sample_mask_out = [] self.missing_confounds_ = [] self.missing_keys_ = [] for file in img_files: sample_mask, conf = self._load_single(file) confounds_out.append(conf) sample_mask_out.append(sample_mask) # If a single input was provided, # send back a single output instead of a list if flag_single: confounds_out = confounds_out[0] sample_mask_out = sample_mask_out[0] self.confounds_ = confounds_out self.sample_mask_ = sample_mask_out return confounds_out, sample_mask_out def _load_single(self, confounds_raw): """Load a single confounds file from fmriprep.""" # Convert tsv file to pandas dataframe # check if relevant imaging files are present according to the strategy flag_acompcor = ("compcor" in self.strategy) and (self.compcor == "anat") flag_full_aroma = ("ica_aroma" in self.strategy) and (self.ica_aroma == "full") confounds_raw, self.json_ = cf._confounds_to_df( confounds_raw, flag_acompcor, flag_full_aroma ) confounds = pd.DataFrame() for confound in self.strategy: loaded_confounds = self._load_confound(confounds_raw, confound) confounds = pd.concat([confounds, loaded_confounds], axis=1) _check_error(self.missing_confounds_, self.missing_keys_) sample_mask, confounds= cf._prepare_output( confounds, self.demean ) return sample_mask, confounds def _load_confound(self, confounds_raw, confound): """Load a single type of confound.""" try: loaded_confounds = getattr(self, f"_load_{confound}")(confounds_raw) except cf.MissingConfound as exception: self.missing_confounds_ += exception.params self.missing_keys_ += exception.keywords loaded_confounds = pd.DataFrame() return loaded_confounds def _load_motion(self, confounds_raw): """Load the motion regressors.""" motion_params = cf._add_suffix( ["trans_x", "trans_y", "trans_z", "rot_x", "rot_y", "rot_z"], self.motion ) cf._check_params(confounds_raw, motion_params) confounds_motion = confounds_raw[motion_params] # Optionally apply PCA reduction if self.n_motion > 0: confounds_motion = cf._pca_motion( confounds_motion, n_components=self.n_motion ) return confounds_motion def _load_high_pass(self, confounds_raw): """Load the high pass filter regressors.""" high_pass_params = cf._find_confounds(confounds_raw, ["cosine"]) return confounds_raw[high_pass_params] def _load_wm_csf(self, confounds_raw): """Load the regressors derived from the white matter and CSF masks.""" wm_csf_params = cf._add_suffix(["csf", "white_matter"], self.wm_csf) cf._check_params(confounds_raw, wm_csf_params) return confounds_raw[wm_csf_params] def _load_global(self, confounds_raw): """Load the regressors derived from the global signal.""" global_params = cf._add_suffix(["global_signal"], self.global_signal) cf._check_params(confounds_raw, global_params) return confounds_raw[global_params] def _load_compcor(self, confounds_raw): """Load compcor regressors.""" compcor_cols = _find_compcor( self.json_, self.compcor, self.n_compcor, self.acompcor_combined ) cf._check_params(confounds_raw, compcor_cols) return confounds_raw[compcor_cols] def _load_ica_aroma(self, confounds_raw): """Load the ICA-AROMA regressors.""" if self.ica_aroma is None: raise ValueError("Please select an option when using ICA-AROMA strategy") if self.ica_aroma == "full": return pd.DataFrame() if self.ica_aroma == "basic": ica_aroma_params = cf._find_confounds(confounds_raw, ["aroma"]) return confounds_raw[ica_aroma_params] def _load_scrub(self, confounds_raw): """Perform basic scrub - Remove volumes if framewise displacement exceeds threshold.""" n_scans = len(confounds_raw) # Get indices of fd outliers fd_outliers = np.where( confounds_raw["framewise_displacement"] > self.fd_thresh )[0] dvars_outliers = np.where(confounds_raw["std_dvars"] > self.std_dvars_thresh)[0] combined_outliers = np.sort( np.unique(np.concatenate((fd_outliers, dvars_outliers))) ) # Do full scrubbing if desired, and motion outliers were detected if self.scrub == "full" and len(combined_outliers) > 0: combined_outliers = cf._optimize_scrub(combined_outliers, n_scans) # Make one-hot encoded motion outlier regressors motion_outlier_regressors = pd.DataFrame( np.transpose(np.eye(n_scans)[combined_outliers]).astype(int) ) column_names = [ "motion_outlier_" + str(num) for num in range(np.shape(motion_outlier_regressors)[1]) ] motion_outlier_regressors.columns = column_names return motion_outlier_regressors def _load_non_steady_state(self, confounds_raw): """Find non steady state regressors.""" nss_outliers = cf._find_confounds(confounds_raw, ["non_steady_state"]) if nss_outliers: return confounds_raw[nss_outliers] else: return pd.DataFrame()