SIMEXP/load_confounds

View on GitHub
load_confounds/parser.py

Summary

Maintainability
A
3 hrs
Test Coverage
File `parser.py` has 296 lines of code (exceeds 250 allowed). Consider refactoring.
"""Flexible method to load confounds generated by fMRIprep.
 
Authors: load_confounds team
"""
import numpy as np
import pandas as pd
from . import confounds as cf
from .compcor import _find_compcor
 
# Global variables listing the admissible types of noise components
all_confounds = [
"motion",
"high_pass",
"wm_csf",
"global",
"compcor",
"ica_aroma",
"scrub",
"non_steady_state",
]
 
 
Function `_sanitize_strategy` has a Cognitive Complexity of 8 (exceeds 5 allowed). Consider refactoring.
def _sanitize_strategy(strategy):
"""Defines the supported denoising strategies."""
if isinstance(strategy, list):
for conf in strategy:
if not conf in all_confounds:
raise ValueError(f"{conf} is not a supported type of confounds.")
else:
raise ValueError("strategy needs to be a list of strings")
# add non steady state if not present
if "non_steady_state" not in strategy:
strategy.append("non_steady_state")
return strategy
 
 
def _check_error(missing_confounds, missing_keys):
"""Consolidate a single error message across multiple missing confounds."""
if missing_confounds or missing_keys:
error_msg = (
"The following keys or parameters are missing: "
+ f" {missing_confounds}"
+ f" {missing_keys}"
+ ". You may want to try a different denoising strategy."
)
raise ValueError(error_msg)
 
 
class Confounds:
"""
Confounds from fmriprep
 
Parameters
----------
strategy : list of strings
The type of noise confounds to include.
"motion" head motion estimates.
"high_pass" discrete cosines covering low frequencies.
"wm_csf" confounds derived from white matter and cerebrospinal fluid.
"global" confounds derived from the global signal.
"ica_aroma" confounds derived from ICA-AROMA.
"scrub" regressors for Power 2014 scrubbing approach.
 
motion : string, optional
Type of confounds extracted from head motion estimates.
"basic" translation/rotation (6 parameters)
"power2" translation/rotation + quadratic terms (12 parameters)
"derivatives" translation/rotation + derivatives (12 parameters)
"full" translation/rotation + derivatives + quadratic terms + power2d derivatives (24 parameters)
 
n_motion : float
Number of pca components to keep from head motion estimates.
If the parameters is strictly comprised between 0 and 1, a principal component
analysis is applied to the motion parameters, and the number of extracted
components is set to exceed `n_motion` percent of the parameters variance.
If the n_components = 0, then no PCA is performed.
 
fd_thresh : float, optional
Framewise displacement threshold for scrub (default = 0.2 mm)
 
std_dvars_thresh : float, optional
Standardized DVARS threshold for scrub (default = 3)
 
wm_csf : string, optional
Type of confounds extracted from masks of white matter and cerebrospinal fluids.
"basic" the averages in each mask (2 parameters)
"power2" averages and quadratic terms (4 parameters)
"derivatives" averages and derivatives (4 parameters)
"full" averages + derivatives + quadratic terms + power2d derivatives (8 parameters)
 
global_signal : string, optional
Type of confounds extracted from the global signal.
"basic" just the global signal (1 parameter)
"power2" global signal and quadratic term (2 parameters)
"derivatives" global signal and derivative (2 parameters)
"full" global signal + derivatives + quadratic terms + power2d derivatives (4 parameters)
 
scrub : string, optional
Type of scrub of frames with excessive motion (Power et al. 2014)
"basic" remove time frames based on excessive FD and DVARS
"full" also remove time windows which are too short after scrubbing.
one-hot encoding vectors are added as regressors for each scrubbed frame.
 
compcor : string, optional
Type of confounds extracted from a component based noise correction method
"anat" noise components calculated using anatomical compcor
"temp" noise components calculated using temporal compcor
"full" noise components calculated using both temporal and anatomical
 
n_compcor : int or "auto", optional
The number of noise components to be extracted. For acompcor_combined=False,
and/or compcor="full", this is the number of components per mask.
Default is "auto": select all components (50% variance explained by fMRIPrep defaults)
 
acompcor_combined: boolean, optional
If true, use components generated from the combined white matter and csf
masks. Otherwise, components are generated from each mask separately and then
concatenated.
 
ica_aroma : None or string, optional
None: default, not using ICA-AROMA related strategy
"basic": use noise IC only.
"full": use fMRIprep output `~desc-smoothAROMAnonaggr_bold.nii.gz` .
 
demean : boolean, optional
If True, the confounds are standardized to a zero mean (over time).
This step is critical if the confounds are regressed out of time series
using nilearn with no or zscore standardization, but should be turned off
with "spc" normalization.
 
 
Attributes
----------
`confounds_` : pandas.DataFrame
The confounds loaded using the specified model. The columns of the dataframe
contains the labels.
 
`sample_mask_` : list of int
The index of the niimgs along time/fourth dimension.
This list includes indices for valid volumes for subsequent analysis.
This attribute should be passed to parameter `sample_mask` of nilearn.NiftiMasker.
Volumnes are removed if flagges as following:
- Non-steady-state volumes (if present)
- Motion outliers detected by scrubbing
 
Notes
-----
The predefined strategies implemented in this class are
adapted from (Ciric et al. 2017). Band-pass filter is replaced
by high-pass filter. Low-pass filters can be implemented, e.g., through
nilearn maskers. Scrubbing is implemented by introducing regressors in the
confounds, rather than eliminating time points. Other aspects of the
preprocessing listed in Ciric et al. (2017) are controlled through fMRIprep,
e.g. distortion correction.
 
References
----------
Ciric et al., 2017 "Benchmarking of participant-level confound regression
strategies for the control of motion artifact in studies of functional
connectivity" Neuroimage 154: 174-87
https://doi.org/10.1016/j.neuroimage.2017.03.020
"""
 
Function `__init__` has 13 arguments (exceeds 4 allowed). Consider refactoring.
def __init__(
self,
strategy=["motion", "high_pass", "wm_csf"],
motion="full",
n_motion=0,
scrub="full",
fd_thresh=0.2,
std_dvars_thresh=3,
wm_csf="basic",
global_signal="basic",
compcor="anat",
acompcor_combined=True,
n_compcor="auto",
ica_aroma=None,
demean=True,
):
"""Default parameters."""
self.strategy = _sanitize_strategy(strategy)
self.motion = motion
self.n_motion = n_motion
self.scrub = scrub
self.fd_thresh = fd_thresh
self.std_dvars_thresh = std_dvars_thresh
self.wm_csf = wm_csf
self.global_signal = global_signal
self.compcor = compcor
self.acompcor_combined = acompcor_combined
self.n_compcor = n_compcor
self.ica_aroma = ica_aroma
self.demean = demean
 
def load(self, img_files):
"""
Load fMRIprep confounds and sample mask
 
Parameters
----------
img_files : path to processed image files, optionally as a list.
Processed nii.gz/dtseries.nii/func.gii file from fmriprep.
`nii.gz` or `dtseries.nii`: path to files, optionally as a list.
`func.gii`: list of a pair of paths to files, optionally as a list of lists.
The companion tsv will be automatically detected.
 
Returns
-------
confounds : pandas.DataFrame or list of pandas.DataFrame
A reduced version of fMRIprep confounds based on selected strategy and flags.
An intercept is automatically added to the list of confounds.
The columns contains the labels of the regressors.
 
sample_mask : list or list of list
Index of time point to be preserved in the analysis
"""
return self._parse(img_files)
 
def _parse(self, img_files):
"""Parse input image, find confound files and scrubbing etc."""
img_files, flag_single = cf._sanitize_confounds(img_files)
 
confounds_out = []
sample_mask_out = []
self.missing_confounds_ = []
self.missing_keys_ = []
 
for file in img_files:
sample_mask, conf = self._load_single(file)
confounds_out.append(conf)
sample_mask_out.append(sample_mask)
 
# If a single input was provided,
# send back a single output instead of a list
if flag_single:
confounds_out = confounds_out[0]
sample_mask_out = sample_mask_out[0]
 
self.confounds_ = confounds_out
self.sample_mask_ = sample_mask_out
return confounds_out, sample_mask_out
 
 
def _load_single(self, confounds_raw):
"""Load a single confounds file from fmriprep."""
# Convert tsv file to pandas dataframe
# check if relevant imaging files are present according to the strategy
flag_acompcor = ("compcor" in self.strategy) and (self.compcor == "anat")
flag_full_aroma = ("ica_aroma" in self.strategy) and (self.ica_aroma == "full")
confounds_raw, self.json_ = cf._confounds_to_df(
confounds_raw, flag_acompcor, flag_full_aroma
)
 
confounds = pd.DataFrame()
 
for confound in self.strategy:
loaded_confounds = self._load_confound(confounds_raw, confound)
confounds = pd.concat([confounds, loaded_confounds], axis=1)
 
_check_error(self.missing_confounds_, self.missing_keys_)
sample_mask, confounds= cf._prepare_output(
confounds, self.demean
)
return sample_mask, confounds
 
def _load_confound(self, confounds_raw, confound):
"""Load a single type of confound."""
try:
loaded_confounds = getattr(self, f"_load_{confound}")(confounds_raw)
except cf.MissingConfound as exception:
self.missing_confounds_ += exception.params
self.missing_keys_ += exception.keywords
loaded_confounds = pd.DataFrame()
return loaded_confounds
 
def _load_motion(self, confounds_raw):
"""Load the motion regressors."""
motion_params = cf._add_suffix(
["trans_x", "trans_y", "trans_z", "rot_x", "rot_y", "rot_z"], self.motion
)
cf._check_params(confounds_raw, motion_params)
confounds_motion = confounds_raw[motion_params]
 
# Optionally apply PCA reduction
if self.n_motion > 0:
confounds_motion = cf._pca_motion(
confounds_motion, n_components=self.n_motion
)
return confounds_motion
 
def _load_high_pass(self, confounds_raw):
"""Load the high pass filter regressors."""
high_pass_params = cf._find_confounds(confounds_raw, ["cosine"])
return confounds_raw[high_pass_params]
 
def _load_wm_csf(self, confounds_raw):
"""Load the regressors derived from the white matter and CSF masks."""
wm_csf_params = cf._add_suffix(["csf", "white_matter"], self.wm_csf)
cf._check_params(confounds_raw, wm_csf_params)
return confounds_raw[wm_csf_params]
 
def _load_global(self, confounds_raw):
"""Load the regressors derived from the global signal."""
global_params = cf._add_suffix(["global_signal"], self.global_signal)
cf._check_params(confounds_raw, global_params)
return confounds_raw[global_params]
 
def _load_compcor(self, confounds_raw):
"""Load compcor regressors."""
compcor_cols = _find_compcor(
self.json_, self.compcor, self.n_compcor, self.acompcor_combined
)
cf._check_params(confounds_raw, compcor_cols)
return confounds_raw[compcor_cols]
 
def _load_ica_aroma(self, confounds_raw):
"""Load the ICA-AROMA regressors."""
if self.ica_aroma is None:
raise ValueError("Please select an option when using ICA-AROMA strategy")
if self.ica_aroma == "full":
return pd.DataFrame()
if self.ica_aroma == "basic":
ica_aroma_params = cf._find_confounds(confounds_raw, ["aroma"])
return confounds_raw[ica_aroma_params]
 
def _load_scrub(self, confounds_raw):
"""Perform basic scrub - Remove volumes if framewise displacement exceeds threshold."""
n_scans = len(confounds_raw)
# Get indices of fd outliers
fd_outliers = np.where(
confounds_raw["framewise_displacement"] > self.fd_thresh
)[0]
dvars_outliers = np.where(confounds_raw["std_dvars"] > self.std_dvars_thresh)[0]
combined_outliers = np.sort(
np.unique(np.concatenate((fd_outliers, dvars_outliers)))
)
# Do full scrubbing if desired, and motion outliers were detected
if self.scrub == "full" and len(combined_outliers) > 0:
combined_outliers = cf._optimize_scrub(combined_outliers, n_scans)
# Make one-hot encoded motion outlier regressors
motion_outlier_regressors = pd.DataFrame(
np.transpose(np.eye(n_scans)[combined_outliers]).astype(int)
)
column_names = [
"motion_outlier_" + str(num)
for num in range(np.shape(motion_outlier_regressors)[1])
]
motion_outlier_regressors.columns = column_names
return motion_outlier_regressors
 
def _load_non_steady_state(self, confounds_raw):
"""Find non steady state regressors."""
nss_outliers = cf._find_confounds(confounds_raw, ["non_steady_state"])
if nss_outliers:
return confounds_raw[nss_outliers]
else:
return pd.DataFrame()