neuropsychology/NeuroKit.py

View on GitHub
neurokit/bio/bio_data.py

Summary

Maintainability
D
2 days
Test Coverage
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import os
import datetime
import bioread

from ..miscellaneous import find_creation_date




# ==============================================================================
# ==============================================================================
# ==============================================================================
# ==============================================================================
# ==============================================================================
# ==============================================================================
# ==============================================================================
# ==============================================================================
def read_acqknowledge(filename, path="", index="datetime", sampling_rate="max", resampling_method="pad", fill_interruptions=True, return_sampling_rate=True):
    """
    Read and Format a BIOPAC's AcqKnowledge file into a pandas' dataframe.

    Parameters
    ----------
    filename :  str
        Filename (with or without the extension) of a BIOPAC's AcqKnowledge file.
    path : str
        Data directory.
    index : str
        How to index the dataframe. "datetime" for aproximate datetime (based on the file creation/change) and "range" for a simple range index.
    sampling_rate : int
        Final sampling rate (samples/second).
    resampling_method : str
        The resampling method: "mean", "pad" or "bfill",
    fill_interruptions : bool
        Automatically fill the eventual signal interruptions using a backfill method.
    return_sampling_rate : bool
        Should it return the sampling rate in a tuple with the dataframe? Default will be changed to True in the future.

    Returns
    ----------
    df, sampling_rate : pandas.DataFrame(), int
        The AcqKnowledge file converted to a dataframe and its sampling_rate.


    Example
    ----------
    >>> import neurokit as nk
    >>>
    >>> df, sampling_rate = nk.read_acqknowledge('file.acq', return_sampling_rate=True)

    Notes
    ----------
    *Authors*

    - `Dominique Makowski <https://dominiquemakowski.github.io/>`_

    *Dependencies*

    - pandas
    - bioread
    - datetime

    *See Also*

    - bioread package: https://github.com/njvack/bioread

    """



    # Check path
    file = path + filename
    if ".acq" not in file:
        file += ".acq"
    if os.path.exists(file) is False:
        print("NeuroKit Error: read_acqknowledge(): couldn't find the following file: " + filename)
        return()

    # Convert creation date
    creation_date = find_creation_date(file)
    creation_date = datetime.datetime.fromtimestamp(creation_date)


    # Read file
    file = bioread.read(file)


    # Get the channel frequencies
    freq_list = []
    for channel in file.named_channels:
        freq_list.append(file.named_channels[channel].samples_per_second)

    # Get data with max frequency and the others
    data = {}
    data_else = {}
    for channel in file.named_channels:
        if file.named_channels[channel].samples_per_second == max(freq_list):
            data[channel] = file.named_channels[channel].data
        else:
            data_else[channel] = file.named_channels[channel].data

    # Create index
    time = []
    beginning_date = creation_date - datetime.timedelta(0, max(file.time_index))
    for timestamps in file.time_index:
        time.append(beginning_date + datetime.timedelta(0, timestamps))
    df = pd.DataFrame(data, index=time)





    # max frequency must be 1000
    if len(data_else.keys()) > 0:  # if not empty
        for channel in data_else:
            channel_frequency = file.named_channels[channel].samples_per_second
            serie = data_else[channel]
            index = list(np.arange(0, max(file.time_index), 1/channel_frequency))
            index = index[:len(serie)]

            # Create index
            time = []
            for timestamps in index:
                time.append(beginning_date + datetime.timedelta(0, timestamps))
            data_else[channel] = pd.Series(serie, index=time)
        df2 = pd.DataFrame(data_else)

    # Create resampling factor
    if sampling_rate == "max":
        sampling_rate = max(freq_list)

    try:
        resampling_factor = str(int(1000/sampling_rate)) + "L"
    except TypeError:
        print("NeuroKit Warning: read_acqknowledge(): sampling_rate must be either num or 'max'. Setting to 'max'.")
        sampling_rate = max(freq_list)
        resampling_factor = str(int(1000/sampling_rate)) + "L"


    # Resample
    if resampling_method not in ["mean", "bfill", "pad"]:
        print("NeuroKit Warning: read_acqknowledge(): resampling_factor must be 'mean', 'bfill' or 'pad'. Setting to 'pad'.")
        resampling_method = 'pad'

    if resampling_method == "mean":
        if len(data_else.keys()) > 0:
            df2 = df2.resample(resampling_factor).mean()
        if int(sampling_rate) != int(max(freq_list)):
            df = df.resample(resampling_factor).mean()
    if resampling_method == "bfill":
        if len(data_else.keys()) > 0:
            df2 = df2.resample(resampling_factor).bfill()
        if int(sampling_rate) != int(max(freq_list)):
            df = df.resample(resampling_factor).bfill()
    if resampling_method == "pad":
        if len(data_else.keys()) > 0:
            df2 = df2.resample(resampling_factor).pad()
        if int(sampling_rate) != int(max(freq_list)):
            df = df.resample(resampling_factor).pad()



    # Join dataframes
    if len(data_else.keys()) > 0:
        df = pd.concat([df, df2], 1)

    if index == "range":
        df = df.reset_index()

    # Fill signal interruptions
    if fill_interruptions is True:
        df = df.fillna(method="backfill")

    if return_sampling_rate is False:
        return(df)
    else:
        return(df, sampling_rate)