pytim/observables/correlator.py from Marcello-Sega/pytim

pytim/observables/correlator.py
Summary

Maintainability

5 hrs
Test Coverage

Issues
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
""" Module: Correlator
    ==================
"""
from __future__ import print_function

import numpy as np
from pytim import utilities
from MDAnalysis.core.groups import Atom, AtomGroup, Residue, ResidueGroup


class Correlator(object):
    """ Computes the (self) correlation of an observable (scalar or vector)

    :param Observable observable: compute the autocorrelation of this observable.
                                  If the observable is None and the reference
                                  is not, the survival probability is computed.
    :param bool normalize: normalize the correlation to 1 at :math:`t=0`
    :param AtomGroup reference: if the group passed to the sample() function
                                changes its composition along the trajectory
                                (such as a layer group), a reference group that
                                includes all atoms that could appear in the
                                variable group must be passed, in order to
                                provide a proper normalization. See the example
                                below.

    Example:

    >>> import pytim
    >>> import MDAnalysis as mda
    >>> import numpy as np
    >>> from pytim.datafiles import WATERSMALL_GRO
    >>> from pytim.utilities import lap
    >>> #  tmpdir here is specified only for travis
    >>> WATERSMALL_TRR = pytim.datafiles.pytim_data.fetch('WATERSMALL_LONG_TRR',tmpdir='./') # doctest:+ELLIPSIS
    checking presence of a cached copy...

    >>> u = mda.Universe(WATERSMALL_GRO,WATERSMALL_TRR)
    >>> g = u.select_atoms('name OW')

    >>> velocity = pytim.observables.Velocity()
    >>> corr = pytim.observables.Correlator(observable=velocity)
    >>> for t in u.trajectory[1:]:
    ...     corr.sample(g)
    >>> vacf = corr.correlation()

    This produces the following (steps of 1 fs):

    .. plot::

        import pytim
        import MDAnalysis as mda
        import numpy as np
        from pytim.datafiles import WATERSMALL_GRO
        from pytim.utilities import lap
        WATERSMALL_TRR=pytim.datafiles.pytim_data.fetch('WATERSMALL_LONG_TRR')

        u = mda.Universe(WATERSMALL_GRO,WATERSMALL_TRR)
        g = u.select_atoms('name OW')

        velocity = pytim.observables.Velocity()
        corr = pytim.observables.Correlator(observable=velocity)
        for t in u.trajectory[1:]:
            corr.sample(g)

        vacf = corr.correlation()

        from matplotlib import pyplot as plt
        plt.plot(vacf[:1000])
        plt.plot([0]*1000)
        plt.show()


    In order to compute the correlation for variable groups, one should proceed
    as follows:

    >>> corr = pytim.observables.Correlator(observable=velocity,reference=g)
    >>> # notice the molecular=False switch, in order for the
    >>> # layer group to be made of oxygen atoms only and match
    >>> # the reference group
    >>> inter = pytim.ITIM(u,group=g,alpha=2.0,molecular=False)
    >>> # example only: sample longer for smooth results
    >>> for t in u.trajectory[1:10]:
    ...     corr.sample(inter.atoms)
    >>> layer_vacf = corr.correlation()


    In order to compute the survival probability of some atoms in a layer, it
    is possible to pass observable=None together with the reference group:

    >>> corr = pytim.observables.Correlator(observable=None, reference = g)
    >>> inter = pytim.ITIM(u,group=g,alpha=2.0, molecular=False)
    >>>  # example only: sample longer for smooth results
    >>> for t in u.trajectory[1:10]:
    ...     corr.sample(inter.atoms)
    >>> survival = corr.correlation()

    """

    def __init__(self, universe=None, observable=None, reference=None):
        self.name = self.__class__.__name__
        self.observable, self.reference = observable, reference
        self.timeseries, self.maskseries = [], []
        self.shape = None

        self.masked = False
        if self.reference is not None and self.observable is not None:
            self.masked = True

        if reference is not None:
            self._init_intermittent()

        elif observable is None:
            raise RuntimeError(
                self.name + ': specity at least an observable or the reference'
            )

    def _init_intermittent(self):
        if self.observable is not None:
            self.reference_obs = self.observable.compute(self.reference) * 0.0
        else:
            self.reference_obs = np.zeros(len(self.reference), dtype=np.double)
        if len(self.reference_obs.shape) > 2:
            raise RuntimeError(
                self.name + ' works only with scalar and vectors')

    def sample(self, group):
        """ Sample the timeseries for the autocorrelation function

            :parameter AtomGroup group: compute the observable using this group
        """
        # can be intermittent or continuous:
        if self.reference is not None:
            sampled = self._sample_intermittent(group)
        else:
            if self.observable is None:
                RuntimeError(
                    'Cannot compute survival probability without a reference')
            sampled = self.observable.compute(group)
            self.timeseries.append(list(sampled.flatten()))

        if self.shape is None:
            self.shape = sampled.shape

    def _sample_intermittent(self, group):
        # we need to collect also the residence
        # function
        # the residence function (1 if in the reference group, 0 otherwise)
        mask = np.isin(self.reference, group)
        # append the residence function to its timeseries
        self.maskseries.append(list(mask))
        if self.observable is not None:
            # this copies a vector of zeros with the correct shape
            sampled = self.reference_obs.copy()
            obs = self.observable.compute(group)
            sampled[np.where(mask)] = obs
            self.timeseries.append(list(sampled.flatten()))
        else:
            self.timeseries = self.maskseries
            if self.shape is None:
                self.shape = (1, )
            sampled = mask
        return sampled

    def correlation(self, normalized=True, continuous=True):
        """ Calculate the autocorrelation from the sampled data

            :parameter bool normalized: normalize the correlation function to:
                                        its zero-time value for regular
                                        correlations; to the average of the
                                        characteristic function for the
                                        survival probability.
            :parameter bool continuous: applies only when a reference group has
                                        been specified: if True (default) the
                                        contribution of a particle at time lag
                                        :math:`\\tau=t_1-t_0` is considered
                                        only if the particle did not leave the
                                        reference group between :math:`t_0` and
                                        :math:`t_1`. If False, the intermittent
                                        correlation is calculated, and the
                                        above restriction is released.

            Example:

            >>> # We build a fake trajectory to test the various options:
            >>> import MDAnalysis as mda
            >>> import pytim
            >>> import numpy as np
            >>> from pytim.datafiles import WATER_GRO
            >>> from pytim.observables import Correlator, Velocity
            >>> np.set_printoptions(suppress=True,precision=3)
            >>>
            >>> u = mda.Universe(WATER_GRO)
            >>> g = u.atoms[0:2]
            >>> g.velocities*=0.0
            >>> g.velocities+=1.0
            >>>
            >>> # velocity autocorrelation along x, variable group
            >>> vv = Correlator(observable=Velocity('x'), reference=g)
            >>> nn = Correlator(reference=g) # survival probability in group g
            >>>
            >>> for c in [vv,nn]:
            ...     c.sample(g)     # t=0
            ...     c.sample(g)     # t=1
            ...     c.sample(g[:1]) # t=2, exclude the second particle
            ...     g.velocities /= 2. # from now on v=0.5
            ...     c.sample(g)     # t=3
            >>>

            The timeseries sampled can be accessed using:

            >>> print(vv.timeseries) # rows refer to time, columns to particle
            [[1.0, 1.0], [1.0, 1.0], [1.0, 0.0], [0.5, 0.5]]
            >>>
            >>> print(nn.timeseries)
            [[True, True], [True, True], [True, False], [True, True]]
            >>>

            Note that the average of  the characteristic function
            :math:`h(t)` is done over all trajectories, including those
            that start with :math:`h=0`.
            The correlation :math:`\\langle h(t)h(0) \\rangle` is divided
            by the average :math:`\\langle h \\rangle` computed over all
            trajectores that extend up to a time lag :math:`t`. The
            `normalize` switch has no effect.

            >>> # normalized, continuous
            >>> corr = nn.correlation()
            >>> print (np.allclose(corr, [ 7./7, 4./5, 2./4, 1./2]))
            True
            >>> # normalized, intermittent
            >>> corr = nn.correlation(continuous=False)
            >>> print (np.allclose(corr, [ 7./7, 4./5, 3./4, 2./2 ]))
            True

            The autocorrelation functions are calculated by taking
            into account in the average only those trajectory that
            start with :math:`h=1` (i.e., which start within the reference
            group). The normalization is done by dividing the
            correlation at time lag :math:`t` by its value at time lag 0
            computed over all trajectories that extend up to time
            lag :math:`t` and do not start with :math:`h=0`.

            >>> # not normalizd, intermittent
            >>> corr = vv.correlation(normalized=False,continuous=False)
            >>> c0 = (1+1+1+0.25+1+1+0.25)/7
            >>> c1 = (1+1+0.5+1)/5 ; c2 = (1+0.5+0.5)/4 ; c3 = (0.5+0.5)/2
            >>> print (np.allclose(corr, [ c0, c1, c2, c3]))
            True
            >>> # check normalization
            >>> np.all(vv.correlation(continuous=False) == corr/corr[0])
            True
            >>> # not normalizd, continuous
            >>> corr = vv.correlation(normalized=False,continuous=True)
            >>> c0 = (1+1+1+0.25+1+1+0.25)/7
            >>> c1 = (1+1+0.5+1)/5 ; c2 = (1+0.5)/4 ; c3 = (0.5+0.)/2
            >>> print (np.allclose(corr, [ c0, c1, c2, c3]))
            True
            >>> # check normalization
            >>> np.all(vv.correlation(continuous=True) == corr/corr[0])
            True

        """
        intermittent = not continuous
        self.dim = self._determine_dimension()

        # the standard correlation
        if self.reference is None:
            ts = np.asarray(self.timeseries)
            corr = utilities.correlate(ts)
            corr = np.average(corr, axis=1)
            if normalized is True:
                corr /= corr[0]
            return corr

        # prepare the mask for the intermittent/continuous cases
        if intermittent is True:
            ms = np.asarray(self.maskseries, dtype=np.double)
        else:  # we add Falses at the begining and at the end to ease the
            # splitting in sub-trajectories
            falses = [[False] * len(self.maskseries[0])]
            ms = np.asarray(falses + self.maskseries + falses)

        # compute the survival probabily
        if self.observable is None:
            return self._survival_probability(ms, normalized, intermittent)

        # compute the autocorrelation function
        else:
            ts = np.asarray(self.timeseries)
            return self._autocorrelation(ts, ms, normalized, intermittent)

    def _autocorrelation(self, ts, ms, normalized, intermittent):

        if intermittent is True:
            corr = self._autocorrelation_intermittent(ts, ms)
        else:
            corr = self._autocorrelation_continuous(ts, ms)

        if normalized is True:
            corr = corr / corr[0]

        return corr

    def _survival_probability(self, ms, normalized, intermittent):
        if intermittent is True:
            corr = self._survival_intermittent(ms)
        else:
            corr = self._survival_continuous(ms)

        return corr

    def _survival_intermittent(self, ms):
        corr = np.sum(utilities.correlate(ms, _normalize=False), axis=1)
        return corr / np.sum(np.cumsum(self.timeseries, axis=0), axis=1)[::-1]

    @staticmethod
    def _find_edges(mask):
        return np.where(mask[:-1] != mask[1:])[0]

    def _survival_continuous(self, ms):
        n_part = len(ms[0])
        corr = np.zeros((self.nseries, n_part))
        counting = (1. + np.arange(len(self.timeseries)))

        for part in range(n_part):
            edges = self._find_edges(ms[::, part])
            deltat = edges[1::2] - edges[0::2]
            # for each of the disconnected segments:
            for n, dt in enumerate(deltat):
                # no need to compute the correlation, we know what it is
                corr[0:dt, part] += counting[:dt][::-1]

        corr = np.sum(corr, axis=1)
        return corr / np.sum(np.cumsum(self.timeseries, axis=0), axis=1)[::-1]

    def _autocorrelation_intermittent(self, ts, ms):

        dim = self.dim

        corr = ts.copy()
        for xyz in range(dim):
            corr[:, xyz::dim] = utilities.correlate(
                ts[:, xyz::dim] * ms, _normalize=False)
        corr = np.sum(
            corr, axis=1) / np.sum(
                np.cumsum(ms, axis=0), axis=1)[::-1]

        return corr

    def _autocorrelation_continuous(self, ts, ms):

        dim = self.dim
        n_part = len(ms[0])
        corr = np.zeros((int(ts.shape[0]), int(ts.shape[1]) // dim))

        for part in range(n_part):
            edges = self._find_edges(ms[::, part])
            deltat = edges[1::2] - edges[0::2]
            for n, dt in enumerate(
                    deltat):  # for each of the disconnected segments
                t1, t2 = edges[2 * n], edges[2 * n + 1]
                i1, i2 = dim * part, dim * (part + 1)
                corr[0:dt, part] += np.sum(
                    utilities.correlate(ts[t1:t2, i1:i2], _normalize=False),
                    axis=1)

        return np.sum(
            corr, axis=1) / np.sum(
                np.cumsum(self.maskseries, axis=0)[::-1], axis=1)

    def _determine_dimension(self):
        self.nseries = max(len(self.timeseries), len(self.maskseries))

        if len(self.shape) == 1:
            dim = 1
        elif len(self.shape) == 2:
            dim = self.shape[1]
        else:
            raise RuntimeError(
                "Correlations of tensorial quantites not allowed in " +
                self.name)
        return dim