source/libtomo/gridrec/gridrec.c
// Copyright (c) 2015, UChicago Argonne, LLC. All rights reserved.
// Copyright 2015. UChicago Argonne, LLC. This software was produced
// under U.S. Government contract DE-AC02-06CH11357 for Argonne National
// Laboratory (ANL), which is operated by UChicago Argonne, LLC for the
// U.S. Department of Energy. The U.S. Government has rights to use,
// reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR
// UChicago Argonne, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
// ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is
// modified to produce derivative works, such modified software should
// be clearly marked, so as not to confuse it with the version available
// from ANL.
// Additionally, redistribution and use in source and binary forms, with
// or without modification, are permitted provided that the following
// conditions are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in
// the documentation and/or other materials provided with the
// distribution.
// * Neither the name of UChicago Argonne, LLC, Argonne National
// Laboratory, ANL, the U.S. Government, nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY UChicago Argonne, LLC AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UChicago
// Argonne, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Possible speedups:
// * Profile code and check adding SIMD to various functions (from OpenMP)
#include <math.h>
#include "libtomo/filters.h"
#include "libtomo/gridrec.h"
#include "pal.h"
#ifdef TOMOPY_USE_MKL
# include "mkl.h"
#endif
#ifdef TOMOPY_USE_FFTW
// Import pal.h first to import <complex.h> first so fftw uses native complex types
# include "fftw3.h"
#endif
float
legendre(int n, const float* coefs, float x)
{
// Compute SUM(coefs(k)*P(2*k,x), for k=0,n/2)
// where P(j,x) is the jth Legendre polynomial.
// x must be between -1 and 1.
float penult, last, cur, y, mxlast;
y = coefs[0];
penult = 1.0;
last = x;
for(int j = 2; j <= n; j++)
{
mxlast = -(x * last);
cur = -(2 * mxlast + penult) + (penult + mxlast) / j;
// cur = (x*(2*j-1)*last-(j-1)*penult)/j;
if(!(j & 1)) // if j is even
{
y += cur * coefs[j >> 1];
}
penult = last;
last = cur;
}
return y;
}
void
set_pswf_tables(float C, int nt, float lambda, const float* coefs, int ltbl, int linv,
float* wtbl, float* winv)
{
// Set up lookup tables for convolvent (used in Phase 1 of
// do_recon()), and for the final correction factor (used in
// Phase 3).
int i;
float norm;
const float fac = (float) ltbl / (linv + 0.5);
const float polyz = legendre(nt, coefs, 0.);
wtbl[0] = 1.0;
for(i = 1; i <= ltbl; i++)
{
wtbl[i] = legendre(nt, coefs, (float) i / ltbl) / polyz;
}
// Note the final result at end of Phase 3 contains the factor,
// norm^2. This incorporates the normalization of the 2D
// inverse FFT in Phase 2 as well as scale factors involved
// in the inverse Fourier transform of the convolvent.
norm = sqrt(M_PI / 2 / C / lambda) / 1.2;
winv[linv] = norm / wtbl[0];
__PRAGMA_IVDEP
for(i = 1; i <= linv; i++)
{
// Minus sign for alternate entries
// corrects for "natural" data layout
// in array H at end of Phase 1.
norm = -norm;
winv[linv + i] = winv[linv - i] = norm / wtbl[(int) roundf(i * fac)];
}
}
void
set_trig_tables(int dt, const float* theta, float** sine, float** cose)
{
// Set up tables of sines and cosines.
float *s, *c;
*sine = s = malloc_vector_f(dt);
__ASSSUME_64BYTES_ALIGNED(s);
*cose = c = malloc_vector_f(dt);
__ASSSUME_64BYTES_ALIGNED(c);
__PRAGMA_SIMD
for(int j = 0; j < dt; j++)
{
s[j] = sinf(theta[j]);
c[j] = cosf(theta[j]);
}
}
void
set_filter_tables(int dt, int pd, float center, filter_func pf, const float* filter_par,
PAL_COMPLEX* A, unsigned char filter2d)
{
// Set up the complex array, filphase[], each element of which
// consists of a real filter factor [obtained from the function,
// pf(...)], multiplying a complex phase factor (derived from the
// parameter, center}. See Phase 1 comments.
const float norm = M_PI / pd / dt;
const float rtmp1 = 2 * M_PI * center / pd;
int j, i;
int pd2 = pd / 2;
float x;
if(!filter2d)
{
for(j = 0; j < pd2; j++)
{
A[j] = pf((float) j / pd, j, 0, pd2, filter_par);
}
__PRAGMA_SIMD
for(j = 0; j < pd2; j++)
{
x = j * rtmp1;
A[j] *= (cosf(x) - I * sinf(x)) * norm;
}
}
else
{
for(i = 0; i < dt; i++)
{
int j0 = i * pd2;
for(j = 0; j < pd2; j++)
{
A[j0 + j] = pf((float) j / pd, j, i, pd2, filter_par);
}
__PRAGMA_SIMD
for(j = 0; j < pd2; j++)
{
x = j * rtmp1;
A[j0 + j] *= (cosf(x) - I * sinf(x)) * norm;
}
}
}
}
void
gridrec(const float* data, int dy, int dt, int dx, const float* center,
const float* theta, float* recon, int ngridx, int ngridy, const char* fname,
const float* filter_par)
{
if(dy == 0 || dt == 0 || dx == 0)
return;
int s, p, iu, iv;
int j;
float *sine, *cose, *wtbl, *winv;
float *work, *work2;
filter_func filter = get_filter(fname);
const float C = 7.0;
const float nt = 20.0;
const float lambda = 0.99998546;
const unsigned int L = (int) (2 * C / M_PI);
const int ltbl = 512;
int pdim;
PAL_COMPLEX * sino, *filphase, *filphase_iter = NULL, **H;
PAL_COMPLEX ** U_d, **V_d;
float * J_z, *P_z;
const float coefs[11] = { 0.5767616E+02, -0.8931343E+02, 0.4167596E+02,
-0.1053599E+02, 0.1662374E+01, -0.1780527E-00,
0.1372983E-01, -0.7963169E-03, 0.3593372E-04,
-0.1295941E-05, 0.3817796E-07 };
// Compute pdim = next power of 2 >= dx
for(pdim = 16; pdim < dx; pdim *= 2)
;
const int pdim2 = pdim >> 1;
const int M02 = pdim2 - 1;
const int M2 = pdim2;
unsigned char filter2d = filter_is_2d(fname);
// Allocate storage for various arrays.
sino = malloc_vector_c(pdim);
if(!filter2d)
{
filphase = malloc_vector_c(pdim2);
filphase_iter = filphase;
}
else
{
filphase = malloc_vector_c(dt * (pdim2));
}
__ASSSUME_64BYTES_ALIGNED(filphase);
H = malloc_matrix_c(pdim, pdim);
__ASSSUME_64BYTES_ALIGNED(H);
wtbl = malloc_vector_f(ltbl + 1);
__ASSSUME_64BYTES_ALIGNED(wtbl);
winv = malloc_vector_f(pdim - 1);
__ASSSUME_64BYTES_ALIGNED(winv);
J_z = malloc_vector_f(pdim2 * dt);
__ASSSUME_64BYTES_ALIGNED(J_z);
P_z = malloc_vector_f(pdim2 * dt);
__ASSSUME_64BYTES_ALIGNED(P_z);
U_d = malloc_matrix_c(dt, pdim);
__ASSSUME_64BYTES_ALIGNED(U_d);
V_d = malloc_matrix_c(dt, pdim);
__ASSSUME_64BYTES_ALIGNED(V_d);
work = malloc_vector_f(L + 1);
__ASSSUME_64BYTES_ALIGNED(work);
work2 = malloc_vector_f(L + 1);
__ASSSUME_64BYTES_ALIGNED(work2);
// Set up table of sines and cosines.
set_trig_tables(dt, theta, &sine, &cose);
__ASSSUME_64BYTES_ALIGNED(sine);
__ASSSUME_64BYTES_ALIGNED(cose);
// Set up PSWF lookup tables.
set_pswf_tables(C, nt, lambda, coefs, ltbl, M02, wtbl, winv);
#ifdef TOMOPY_USE_MKL
DFTI_DESCRIPTOR_HANDLE reverse_1d;
MKL_LONG length_1d = (MKL_LONG) pdim;
DftiCreateDescriptor(&reverse_1d, DFTI_SINGLE, DFTI_COMPLEX, 1, length_1d);
DftiSetValue(reverse_1d, DFTI_THREAD_LIMIT,
1); // FFT should run sequentially to avoid oversubscription
DftiCommitDescriptor(reverse_1d);
DFTI_DESCRIPTOR_HANDLE forward_2d;
MKL_LONG length_2d[2] = { (MKL_LONG) pdim, (MKL_LONG) pdim };
DftiCreateDescriptor(&forward_2d, DFTI_SINGLE, DFTI_COMPLEX, 2, length_2d);
DftiSetValue(forward_2d, DFTI_THREAD_LIMIT,
1); // FFT should run sequentially to avoid oversubscription
DftiCommitDescriptor(forward_2d);
#endif
#ifdef TOMOPY_USE_FFTW
fftwf_make_planner_thread_safe();
fftwf_plan reverse_1d;
reverse_1d = fftwf_plan_dft_1d(pdim, sino, sino, FFTW_BACKWARD, FFTW_ESTIMATE);
fftwf_plan forward_2d;
forward_2d = fftwf_plan_dft_2d(pdim, pdim, H[0], H[0], FFTW_FORWARD, FFTW_ESTIMATE);
#endif
for(p = 0; p < dt; p++)
{
for(j = 1; j < pdim2; j++)
{
U_d[p][j] = j * cose[p] + M2;
V_d[p][j] = j * sine[p] + M2;
}
}
float U, V;
const float L2 = (int) (C / M_PI);
const float tblspcg = 2 * ltbl / L;
int iul, iuh, ivl, ivh;
int k, k2;
// For each slice.
for(s = 0; s < dy; s += 2)
{
// Set up table of combined filter-phase factors.
set_filter_tables(dt, pdim, center[s], filter, filter_par, filphase, filter2d);
// First clear the array H
memset(H[0], 0, pdim * pdim * sizeof(H[0][0]));
// Loop over the dt projection angles. For each angle, do the following:
// 1. Copy the real projection data from the two slices into the
// real and imaginary parts of the first dx elements of the
// complex array, sino[]. Set the remaining pdim-dx elements
// to zero (zero-padding).
// 2. Carry out a (1D) Fourier transform on the complex data.
// This results in transform data that is arranged in
// "wrap-around" order, with non-negative spatial frequencies
// occupying the first half, and negative frequencies the second
// half, of the array, sino[].
// 3. Multiply each element of the 1-D transform by a complex,
// frequency dependent factor, filphase[]. These factors were
// precomputed as part of recofour1((float*)sino-1,pdim,1);n_init()
// and combine the tomographic filtering with a phase factor which
// shifts the origin in configuration space to the projection of
// the rotation axis as defined by the parameter, "center". If a
// region of interest (ROI) centered on a different origin has
// been specified [(X0,Y0)!=(0,0)], multiplication by an
// additional phase factor, dependent on angle as well as
// frequency, is required.
// 4. For each data element, find the Cartesian coordinates,
// <U,V>, of the corresponding point in the 2D frequency plane,
// in units of the spacing in the MxM rectangular grid placed
// thereon; then calculate the upper and lower limits in each
// coordinate direction of the integer coordinates for the
// grid points contained in an LxL box centered on <U,V>.
// Using a precomputed table of the (1-D) convolving function,
// W, calculate the contribution of this data element to the
// (2-D) convolvent (the 2_D convolvent is the product of
// 1_D convolvents in the X and Y directions) at each of these
// grid points, and update the complex 2D array H accordingly.
// At the end of Phase 1, the array H[][] contains data arranged in
// "natural", rather than wrap-around order -- that is, the origin in
// the spatial frequency plane is situated in the middle, rather than
// at the beginning, of the array, H[][]. This simplifies the code
// for carrying out the convolution (step 4 above), but necessitates
// an additional correction -- See Phase 3 below.
PAL_COMPLEX Cdata1, Cdata2;
// For each projection
for(p = 0; p < dt; p++)
{
float sine_p = sine[p], cose_p = cose[p];
const unsigned int j0 = dx * (p + s * dt), delta_index = dx * dt;
__PRAGMA_SIMD_VECREMAINDER
for(j = 0; j < dx; j++)
{
// Add data from both slices
float second_sino = 0.0;
const unsigned int index = j + j0;
if(__LIKELY((s + 1) < dy))
{
second_sino = data[index + delta_index];
}
sino[j] = data[index] + I * second_sino;
}
__PRAGMA_SIMD_VECREMAINDER
for(j = dx; j < pdim; j++)
{
// Zero fill the rest of the array
sino[j] = 0.0;
}
#ifdef TOMOPY_USE_MKL
DftiComputeBackward(reverse_1d, sino);
#endif
#ifdef TOMOPY_USE_FFTW
fftwf_execute(reverse_1d);
#endif
if(filter2d)
filphase_iter = filphase + pdim2 * p;
// For each FFT(projection)
for(j = 1; j < pdim2; j++)
{
Cdata1 = filphase_iter[j] * sino[j];
Cdata2 = conjf(filphase_iter[j]) * sino[pdim - j];
U = j * cose_p + M2;
V = j * sine_p + M2;
// Note freq space origin is at (M2,M2), but we
// offset the indices U, V, etc. to range from 0 to M-1.
iul = ceilf(U - L2);
iuh = floorf(U + L2);
ivl = ceilf(V - L2);
ivh = floorf(V + L2);
if(iul < 1)
iul = 1;
if(iuh >= pdim)
iuh = pdim - 1;
if(ivl < 1)
ivl = 1;
if(ivh >= pdim)
ivh = pdim - 1;
// Note aliasing value (at index=0) is forced to zero.
__PRAGMA_SIMD_VECREMAINDER_VECLEN8
for(iv = ivl, k = 0; iv <= ivh; iv++, k++)
{
work[k] = wtbl[(int) roundf(fabsf(V - iv) * tblspcg)];
}
__PRAGMA_SIMD_VECREMAINDER_VECLEN8
for(iu = iul, k = 0; iu <= iuh; iu++, k++)
{
work2[k] = wtbl[(int) roundf(fabsf(U - iu) * tblspcg)];
}
__PRAGMA_OMP_SIMD_COLLAPSE
for(iu = iul, k2 = 0; iu <= iuh; iu++, k2++)
{
for(iv = ivl, k = 0; iv <= ivh; iv++, k++)
{
const float rtmp = work2[k2];
const float convolv = rtmp * work[k];
H[iu][iv] += convolv * Cdata1;
H[pdim - iu][pdim - iv] += convolv * Cdata2;
}
}
}
}
// Carry out a 2D inverse FFT on the array H.
// At the conclusion of this phase, the configuration
// space data is arranged in wrap-around order with the origin
// (center of reconstructed images) situated at the start of the
// array. The first (resp. second) half of the array contains the
// lower, Y<0 (resp, upper Y>0) part of the image, and within each row
// of the array, the first (resp. second) half contains data for the
// right [X>0] (resp. left [X<0]) half of the image.
#ifdef TOMOPY_USE_MKL
DftiComputeForward(forward_2d, H[0]);
#endif
#ifdef TOMOPY_USE_FFTW
fftwf_execute(forward_2d);
#endif
// Copy the real and imaginary parts of the complex data from H[][],
// into the output buffers for the two reconstructed real images,
// simultaneously carrying out a final multiplicative correction.
// The correction factors are taken from the array, winv[], previously
// computed in set_pswf_tables(), and consist logically of three parts,
// namely:
// 1. A positive real factor, corresponding to the reciprocal
// of the inverse Fourier transform, of the convolving
// function, W, and
// 2. Multiplication by the cell size, (1/D1)^2, in 2D frequency
// space. This correctly normalizes the 2D inverse FFT carried
// out in Phase 2. (Note that all quantities are expressed in
// units in which the detector spacing is one.)
// 3. A sign change for the "odd-numbered" elements (in a
// checkerboard pattern) of the array. This compensates
// for the fact that the 2-D Fourier transform (Phase 2)
// started with a frequency array in which the zero frequency
// point appears in the middle of the array instead of at
// its start.
// Only the elements in the square M0xM0 subarray of H[][], centered
// about the origin, are utilized. The other elements are not part of
// the actual region being reconstructed and are discarded. Because of
// the wrap-around ordering, the subarray must actually be taken from
// the four corners" of the 2D array, H[][] -- See Phase 2 description,
// above.
// The final data corresponds physically to the linear X-ray absorption
// coefficient expressed in units of the inverse detector spacing -- to
// convert to inverse cm (say), one must divide the data by the detector
// spacing in cm.
int ustart, vstart, ufin, vfin;
const int padx = (pdim - ngridx) / 2;
const int pady = (pdim - ngridy) / 2;
const int offsetx = M02 + 1 - padx;
const int offsety = M02 + 1 - pady;
const int islc1 = s * ngridx * ngridy; // index slice 1
const int islc2 = (s + 1) * ngridx * ngridy; // index slice 2
ustart = pdim - offsety;
ufin = pdim;
j = 0;
while(j < ngridy)
{
for(iu = ustart; iu < ufin; j++, iu++)
{
const float corrn_u = winv[j + pady];
vstart = pdim - offsetx;
vfin = pdim;
k = 0;
while(k < ngridx)
{
__PRAGMA_SIMD
for(iv = vstart; iv < vfin; k++, iv++)
{
const float corrn = corrn_u * winv[k + padx];
recon[islc1 + ngridy * (ngridx - 1 - k) + j] =
corrn * crealf(H[iu][iv]);
if(__LIKELY((s + 1) < dy))
{
recon[islc2 + ngridy * (ngridx - 1 - k) + j] =
corrn * cimagf(H[iu][iv]);
}
}
if(k < ngridx)
{
vstart = 0;
vfin = ngridx - offsetx;
}
}
}
if(j < ngridy)
{
ustart = 0;
ufin = ngridy - offsety;
}
}
}
free_vector_f(sine);
free_vector_f(cose);
free_vector_c(sino);
free_vector_f(wtbl);
free_vector_c(filphase);
free_vector_f(winv);
free_vector_f(work);
free_matrix_c(H);
free_vector_f(J_z);
free_vector_f(P_z);
free_matrix_c(U_d);
free_matrix_c(V_d);
#ifdef TOMOPY_USE_MKL
DftiFreeDescriptor(&reverse_1d);
DftiFreeDescriptor(&forward_2d);
#endif
#ifdef TOMOPY_USE_FFTW
fftwf_destroy_plan(reverse_1d);
fftwf_destroy_plan(forward_2d);
#endif
return;
}