neuropsychology/NeuroKit

View on GitHub
neurokit2/rsp/rsp_intervalrelated.py

Summary

Maintainability
A
0 mins
Test Coverage
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd

from .rsp_rrv import rsp_rrv


def rsp_intervalrelated(data, sampling_rate=1000):
    """**Performs RSP analysis on longer periods of data (typically > 10 seconds), such as resting-state data**

    Parameters
    ----------
    data : DataFrame or dict
        A DataFrame containing the different processed signal(s) as different columns, typically
        generated by :func:`.rsp_process` or :func:`.bio_process`. Can also take a dict containing
        sets of separately processed DataFrames.
    sampling_rate : int
        The sampling frequency of the signal (in Hz, i.e., samples/second).

    Returns
    -------
    DataFrame
        A dataframe containing the analyzed RSP features.
        The analyzed features consist of the following:

        * ``"RSP_Rate_Mean"``: the mean respiratory rate.
        * ``"RSP_Amplitude_Mean"``: the mean respiratory amplitude.
        * ``"RSP_RRV"``: the different respiratory rate variability metrices.
          See :func:`.rsp_rrv` docstrings for details.
        * ``"RSP_Phase_Duration_Inspiration"``: the average inspiratory duration.
        * ``"RSP_Phase_Duration_Expiration"``: the average expiratory duration.
        * ``"RSP_Phase_Duration_Ratio "``: the inspiratory-to-expiratory time ratio (I/E).

    See Also
    --------
    bio_process, rsp_eventrelated

    Examples
    ----------
    .. ipython:: python

      import neurokit2 as nk

      # Download data
      data = nk.data("bio_resting_5min_100hz")

      # Process the data
      df, info = nk.rsp_process(data["RSP"], sampling_rate=100)

      # Single dataframe is passed
      nk.rsp_intervalrelated(df, sampling_rate=100)

      epochs = nk.epochs_create(df, events=[0, 15000], sampling_rate=100, epochs_end=150)
      nk.rsp_intervalrelated(epochs)

    """
    intervals = {}

    # Format input
    if isinstance(data, pd.DataFrame):
        rate_cols = [col for col in data.columns if "RSP_Rate" in col]
        if len(rate_cols) == 1:
            intervals.update(_rsp_intervalrelated_formatinput(data, sampling_rate))
            intervals.update(_rsp_intervalrelated_rrv(data, sampling_rate))
        else:
            raise ValueError(
                "NeuroKit error: rsp_intervalrelated(): Wrong"
                "input, we couldn't extract breathing rate."
                "Please make sure your DataFrame"
                "contains an `RSP_Rate` column."
            )
        amp_cols = [col for col in data.columns if "RSP_Amplitude" in col]
        if len(amp_cols) == 1:
            intervals["RSP_Amplitude_Mean"] = data[amp_cols[0]].values.mean()
        else:
            raise ValueError(
                "NeuroKit error: rsp_intervalrelated(): Wrong"
                "input, we couldn't extract respiratory amplitude."
                "Please make sure your DataFrame"
                "contains an `RSP_Amplitude` column."
            )

        rsp_intervals = pd.DataFrame.from_dict(intervals, orient="index").T

    elif isinstance(data, dict):
        for index in data:
            intervals[index] = {}  # Initialize empty container

            # Add label info
            intervals[index]["Label"] = data[index]["Label"].iloc[0]

            # Rate, Amplitude and Phase
            intervals[index] = _rsp_intervalrelated_formatinput(
                data[index], sampling_rate, intervals[index]
            )

            # RRV
            intervals[index] = _rsp_intervalrelated_rrv(
                data[index], sampling_rate, intervals[index]
            )

        rsp_intervals = pd.DataFrame.from_dict(intervals, orient="index")

    return rsp_intervals


# =============================================================================
# Internals
# =============================================================================


def _rsp_intervalrelated_formatinput(data, sampling_rate, output={}):
    # Sanitize input
    colnames = data.columns.values
    if len([i for i in colnames if "RSP_Rate" in i]) == 0:
        raise ValueError(
            "NeuroKit error: rsp_intervalrelated(): Wrong"
            "input, we couldn't extract breathing rate."
            "Please make sure your DataFrame"
            "contains an `RSP_Rate` column."
        )
    if len([i for i in colnames if "RSP_Amplitude" in i]) == 0:
        raise ValueError(
            "NeuroKit error: rsp_intervalrelated(): Wrong"
            "input we couldn't extract respiratory amplitude."
            "Please make sure your DataFrame"
            "contains an `RSP_Amplitude` column."
        )

    rate = data["RSP_Rate"].values
    amplitude = data["RSP_Amplitude"].values

    output["RSP_Rate_Mean"] = np.mean(rate)
    output["RSP_Amplitude_Mean"] = np.mean(amplitude)

    if len([i for i in colnames if "RSP_Phase" in i]) == 0:
        raise ValueError(
            "NeuroKit error: rsp_intervalrelated(): Wrong"
            "input we couldn't extract respiratory phases."
            "Please make sure your DataFrame"
            "contains `RSP_Phase` and `RSP_Phase_Completion` columns."
        )

    # Extract inspiration durations
    insp_phases = data[data["RSP_Phase"] == 1]
    insp_start = insp_phases.index[insp_phases["RSP_Phase_Completion"] == 0]
    insp_end = insp_phases.index[insp_phases["RSP_Phase_Completion"] == 1]

    # Check that start of phase is before end of phase
    if insp_start[0] > insp_end[0]:
        insp_end = insp_end[1:]

    # Check for unequal lengths
    diff = abs(len(insp_start) - len(insp_end))
    if len(insp_start) > len(insp_end):
        insp_start = insp_start[: len(insp_start) - diff]  # remove extra start points
    elif len(insp_end) > len(insp_start):
        insp_end = insp_end[: len(insp_end) - diff]  # remove extra end points

    insp_times = np.array(insp_end - insp_start) / sampling_rate

    # Extract expiration durations
    exp_phases = data[data["RSP_Phase"] == 0]
    exp_start = exp_phases.index[exp_phases["RSP_Phase_Completion"] == 0]
    exp_end = exp_phases.index[exp_phases["RSP_Phase_Completion"] == 1]

    # Check that start of phase is before end of phase
    if exp_start[0] > exp_end[0]:
        exp_end = exp_end[1:]

    # Check for unequal lengths
    diff = abs(len(exp_start) - len(exp_end))
    if len(exp_start) > len(exp_end):
        exp_start = exp_start[: len(exp_start) - diff]  # remove extra start points
    elif len(exp_end) > len(exp_start):
        exp_end = exp_end[: len(exp_end) - diff]  # remove extra end points

    exp_times = np.array(exp_end - exp_start) / sampling_rate

    output["RSP_Phase_Duration_Inspiration"] = np.mean(insp_times)
    output["RSP_Phase_Duration_Expiration"] = np.mean(exp_times)
    output["RSP_Phase_Duration_Ratio"] = (
        output["RSP_Phase_Duration_Inspiration"] / output["RSP_Phase_Duration_Expiration"]
    )

    return output


def _rsp_intervalrelated_rrv(data, sampling_rate, output={}):

    rrv = rsp_rrv(data, sampling_rate=sampling_rate)
    for column in rrv.columns:
        output[column] = float(rrv[column])

    return output