gwastro/sbank

View on GitHub
sbank/overlap_cpu_lib.c

Summary

Maintainability
Test Coverage
/*
 * Copyright (C) 2012  Nickolas Fotopoulos
 * Copyright (C) 2016  Stephen Privitera
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <complex.h>
#include <sys/types.h>

/* ---------------- LAL STUFF NEEDED ----------------- */
int XLALPrintError(const char *fmt, ...);

typedef struct tagCOMPLEX8Vector {
     uint32_t length; /**< Number of elements in array. */
     float complex *data; /**< Pointer to the data array. */
} COMPLEX8Vector;

COMPLEX8Vector * XLALCreateCOMPLEX8Vector ( uint32_t length );
void XLALDestroyCOMPLEX8Vector ( COMPLEX8Vector * vector );

typedef struct tagCOMPLEX8FFTPlan COMPLEX8FFTPlan;

COMPLEX8FFTPlan * XLALCreateReverseCOMPLEX8FFTPlan( uint32_t size, int measurelvl );

int XLALCOMPLEX8VectorFFT (COMPLEX8Vector * restrict output, const COMPLEX8Vector * restrict input, const COMPLEX8FFTPlan *plan);

void XLALDestroyCOMPLEX8FFTPlan( COMPLEX8FFTPlan *plan );

/* ----------------------------------------------- */

typedef struct tagWS {
    size_t n;
    COMPLEX8FFTPlan *plan;
    COMPLEX8Vector *zf;
    COMPLEX8Vector *zt;
} WS;

WS *SBankCreateWorkspaceCache(void);

void SBankDestroyWorkspaceCache(WS *workspace_cache);

double _SBankComputeMatch(complex *inj, complex *tmplt, size_t min_len, double delta_f, WS *workspace_cache);

double _SBankComputeRealMatch(complex *inj, complex *tmplt, size_t min_len, double delta_f, WS *workspace_cache);

double _SBankComputeMatchMaxSkyLoc(complex *hp, complex *hc, const double hphccorr, complex *proposal, size_t min_len, double delta_f, WS *workspace_cache1, WS *workspace_cache2);

double _SBankComputeMatchMaxSkyLocNoPhase(complex *hp, complex *hc, const double hphccorr, complex *proposal, size_t min_len, double delta_f, WS *workspace_cache1, WS *workspace_cache2);

#define MAX_NUM_WS 32  /* maximum number of workspaces */
#define CHECK_OOM(ptr, msg) if (!(ptr)) { XLALPrintError((msg)); exit(-1); }

/*
 * set up workspaces
 */

WS *SBankCreateWorkspaceCache(void) {
    WS *workspace_cache = calloc(MAX_NUM_WS, sizeof(WS));
    CHECK_OOM(workspace_cache, "unable to allocate workspace\n");
    return workspace_cache;
}

void SBankDestroyWorkspaceCache(WS *workspace_cache) {
    size_t k = MAX_NUM_WS;
    for (;k--;) {
        if (workspace_cache[k].n) {
            XLALDestroyCOMPLEX8FFTPlan(workspace_cache[k].plan);
            XLALDestroyCOMPLEX8Vector(workspace_cache[k].zf);
            XLALDestroyCOMPLEX8Vector(workspace_cache[k].zt);
        }
    }
    free(workspace_cache);
}

static WS *get_workspace(WS *workspace_cache, const size_t n) {
    if (!n) {
        fprintf(stderr, "Zero size workspace requested\n");
        abort();
    }

    /* if n already in cache, return it */
    WS *ptr = workspace_cache;
    while (ptr->n) {
        if (ptr->n == n) return ptr;
        if (++ptr - workspace_cache > MAX_NUM_WS) return NULL;  /* out of space! */
    }

    /* if n not in cache, ptr now points at first blank entry */
    ptr->zf = XLALCreateCOMPLEX8Vector(n);
    CHECK_OOM(ptr->zf->data, "unable to allocate workspace array zf\n");
    memset(ptr->zf->data, 0, n * sizeof(float complex));

    ptr->zt = XLALCreateCOMPLEX8Vector(n);
    CHECK_OOM(ptr->zf->data, "unable to allocate workspace array zt\n");
    memset(ptr->zt->data, 0, n * sizeof(float complex));

    ptr->n = n;
    ptr->plan = XLALCreateReverseCOMPLEX8FFTPlan(n, 1);
    CHECK_OOM(ptr->plan, "unable to allocate plan");

    return ptr;
}

/* by default, complex arithmetic will call built-in function __muldc3, which does a lot of error checking for inf and nan; just do it manually */
static void multiply_conjugate(float complex * restrict out, float complex *a, float complex *b, const size_t size) {
    size_t k = 0;
    for (;k < size; ++k) {
        const float ar = crealf(a[k]);
        const float br = crealf(b[k]);
        const float ai = cimagf(a[k]);
        const float bi = cimagf(b[k]);
        __real__ out[k] = ar * br + ai * bi;
        __imag__ out[k] = ar * -bi + ai * br;
    }
}

static double abs_real(const float complex x) {
    const double re = crealf(x);
    return re;
}

static double abs2(const float complex x) {
    const double re = crealf(x);
    const double im = cimagf(x);
    return re * re + im * im;
}

/* interpolate the peak with a parabolic interpolation */
static double vector_peak_interp(const double ym1, const double y, const double yp1) {
    const double dy = 0.5 * (yp1 - ym1);
    const double d2y = 2. * y - ym1 - yp1;
    return y + 0.5 * dy * dy / d2y;
}

/*
 * Returns the match for two whitened, normalized, positive-frequency
 * COMPLEX8FrequencySeries inputs.
 */
double _SBankComputeMatch(complex *inj, complex *tmplt, size_t min_len, double delta_f, WS *workspace_cache) {

    /* get workspace for + and - frequencies */
    size_t n = 2 * (min_len - 1);   /* no need to integrate implicit zeros */
    WS *ws = get_workspace(workspace_cache, n);
    if (!ws) {
        XLALPrintError("out of space in the workspace_cache\n");
        exit(-1);
    }

    /* compute complex SNR time-series in freq-domain, then time-domain */
    /* Note that findchirp paper eq 4.2 defines a positive-frequency integral,
       so we should only fill the positive frequencies (first half of zf). */
    multiply_conjugate(ws->zf->data, inj, tmplt, min_len);
    XLALCOMPLEX8VectorFFT(ws->zt, ws->zf, ws->plan); /* plan is reverse */

    /* maximize over |z(t)|^2 */
    float complex *zdata = ws->zt->data;
    size_t k = n;
    ssize_t argmax = -1;
    double max = 0.;
    for (;k--;) {
        double temp = abs2(zdata[k]);
        if (temp > max) {
            argmax = k;
            max = temp;
        }
    }
    if (max == 0.) return 0.;

    /* refine estimate of maximum */
    double result;
    if (argmax == 0 || argmax == (ssize_t) n - 1)
        result = max;
    else
        result = vector_peak_interp(abs2(zdata[argmax - 1]), abs2(zdata[argmax]), abs2(zdata[argmax + 1]));

    /* compute match */
    return 4. * delta_f * sqrt(result); 
}


/*
  Compute the overlap between a normalized template waveform h and a
  normalized signal proposal maximizing over the template h's overall
  amplitude. This is the most basic match function one can compute.
*/
double _SBankComputeRealMatch(complex *inj, complex *tmplt, size_t min_len, double delta_f, WS *workspace_cache) {

    /* get workspace for + and - frequencies */
    size_t n = 2 * (min_len - 1);   /* no need to integrate implicit zeros */
    WS *ws = get_workspace(workspace_cache, n);
    if (!ws) {
        XLALPrintError("out of space in the workspace_cache\n");
    exit(-1);
    }

    /* compute complex SNR time-series in freq-domain, then time-domain */
    /* Note that findchirp paper eq 4.2 defines a positive-frequency integral,
       so we should only fill the positive frequencies (first half of zf). */
    multiply_conjugate(ws->zf->data, inj, tmplt, min_len);
    XLALCOMPLEX8VectorFFT(ws->zt, ws->zf, ws->plan); /* plan is reverse */

    /* maximize over |Re z(t)| */
    float complex *zdata = ws->zt->data;
    size_t k = n;
    double max = 0.;
    for (;k--;) {
    double temp = abs_real((zdata[k]));
    if (temp > max) {
        max = temp;
    }
    }
    return 4. * delta_f * max;
}


/*
  Compute the overlap between a template waveform h and a signal
  proposal assuming only the (2,2) mode and maximizing over the
  template h's coalescence phase, overall amplitude and effective
  polarization / sky position. The function assumes that the plus and
  cross polarization hp and hc are both normalized to unity and that
  hphccorr is the correlation between these normalized components.
 */
double _SBankComputeMatchMaxSkyLoc(complex *hp, complex *hc, const double hphccorr, complex *proposal, size_t min_len, double delta_f, WS *workspace_cache1, WS *workspace_cache2) {

    /* get workspace for + and - frequencies */
    size_t n = 2 * (min_len - 1);   /* no need to integrate implicit zeros */
    WS *ws1 = get_workspace(workspace_cache1, n);
    if (!ws1) {
        XLALPrintError("out of space in the workspace_cache\n");
        exit(-1);
    }
    WS *ws2 = get_workspace(workspace_cache2, n);
    if (!ws2) {
        XLALPrintError("out of space in the workspace_cache\n");
        exit(-1);
    }


    /* compute complex SNR time-series in freq-domain, then time-domain */
    /* Note that findchirp paper eq 4.2 defines a positive-frequency integral,
       so we should only fill the positive frequencies (first half of zf). */
    multiply_conjugate(ws1->zf->data, hp, proposal, min_len);
    XLALCOMPLEX8VectorFFT(ws1->zt, ws1->zf, ws1->plan); /* plan is reverse */
    multiply_conjugate(ws2->zf->data, hc, proposal, min_len);
    XLALCOMPLEX8VectorFFT(ws2->zt, ws2->zf, ws2->plan);


    /* COMPUTE DETECTION STATISTIC */

    /* First start with constant values */
    double delta = 2 * hphccorr;
    double denom = 4 - delta * delta;
    if (denom < 0)
    {
        fprintf(stderr, "DANGER WILL ROBINSON: CODE IS BROKEN!!\n");
    }

    /* Now the tricksy bit as we loop over time*/
    float complex *hpdata = ws1->zt->data;
    float complex *hcdata = ws2->zt->data;
    size_t k = n;
    /* FIXME: This is needed if we turn back on peak refinement. */
    /*ssize_t argmax = -1;*/
    double max = 0.;
    for (;k--;) {
        double complex ratio = hcdata[k] / hpdata[k];
        double ratio_real = creal(ratio);
        double ratio_imag = cimag(ratio);
        double beta = 2 * ratio_real;
        double alpha = ratio_real * ratio_real + ratio_imag * ratio_imag;
        double sqroot = alpha*alpha + alpha * (delta*delta - 2) + 1;
        sqroot += beta * (beta - delta * (1 + alpha));
        sqroot = sqrt(sqroot);
        double brckt = 2*(alpha + 1) - beta*delta + 2*sqroot;
        brckt = brckt / denom;
        double det_stat_sq = abs2(hpdata[k]) * brckt;

        if (det_stat_sq > max) {
            /*argmax = k;*/
            max = det_stat_sq;
        }
    }
    if (max == 0.) return 0.;

    /* FIXME: For now do *not* refine estimate of peak. */
    /* double result;
    if (argmax == 0 || argmax == (ssize_t) n - 1)
        result = max;
    else
        result = vector_peak_interp(abs2(zdata[argmax - 1]), abs2(zdata[argmax]), abs2(zdata[argmax + 1])); */

    /* Return match */
    return 4. * delta_f * sqrt(max);
}

double _SBankComputeMatchMaxSkyLocNoPhase(complex *hp, complex *hc, const double hphccorr, complex *proposal, size_t min_len, double delta_f, WS *workspace_cache1, WS *workspace_cache2) {

    /* get workspace for + and - frequencies */
    size_t n = 2 * (min_len - 1);   /* no need to integrate implicit zeros */
    WS *ws1 = get_workspace(workspace_cache1, n);
    if (!ws1) {
        XLALPrintError("out of space in the workspace_cache\n");
        exit(-1);
    }
    WS *ws2 = get_workspace(workspace_cache2, n);
    if (!ws2) {
        XLALPrintError("out of space in the workspace_cache\n");
        exit(-1);
    }


    /* compute complex SNR time-series in freq-domain, then time-domain */
    /* Note that findchirp paper eq 4.2 defines a positive-frequency integral,
       so we should only fill the positive frequencies (first half of zf). */
    multiply_conjugate(ws1->zf->data, hp, proposal, min_len);
    XLALCOMPLEX8VectorFFT(ws1->zt, ws1->zf, ws1->plan); /* plan is reverse */
    multiply_conjugate(ws2->zf->data, hc, proposal, min_len);
    XLALCOMPLEX8VectorFFT(ws2->zt, ws2->zf, ws2->plan);


    /* COMPUTE DETECTION STATISTIC */

    /* First start with constant values */
    double denom = 1. - (hphccorr*hphccorr);
    if (denom < 0)
    {
        fprintf(stderr, "DANGER WILL ROBINSON: CODE IS BROKEN!!\n");
    }

    /* Now the tricksy bit as we loop over time*/
    float complex *hpdata = ws1->zt->data;
    float complex *hcdata = ws2->zt->data;
    size_t k = n;
    /* FIXME: This is needed if we turn back on peak refinement. */
    /*ssize_t argmax = -1;*/
    double max = 0.;
    double det_stat_sq;

    for (;k--;) {
        det_stat_sq = creal(hpdata[k])*creal(hpdata[k]);
        det_stat_sq += creal(hcdata[k])*creal(hcdata[k]);
        det_stat_sq -= 2*creal(hpdata[k])*creal(hcdata[k])*hphccorr;

        det_stat_sq = det_stat_sq / denom;

        if (det_stat_sq > max) {
            /*argmax = k;*/
            max = det_stat_sq;
        }
    }
    if (max == 0.) return 0.;

    /* FIXME: For now do *not* refine estimate of peak. */
    /* double result;
    if (argmax == 0 || argmax == (ssize_t) n - 1)
        result = max;
    else
        result = vector_peak_interp(abs2(zdata[argmax - 1]), abs2(zdata[argmax])
, abs2(zdata[argmax + 1])); */

    /* Return match */
    return 4. * delta_f * sqrt(max);
}