217
217
// Computing FFT dimensions, hard to understand why the -1 is there but it works nicely
218
218
// DS: This is actually two times more than the closest power of 2. Do we need that?
219
219
ctx->setup.dim_fft = 1<<((int)(log((1. * ctx->setup.fft_oversampling * ctx->setup.num_bins - 1)) / log(2.0) + 0.9999));
220
printf("FFT convolution: %u\n", ctx->setup.dim_fft);
220
221
ctx->setup.filter = malloc((2 * ctx->setup.dim_fft) * sizeof(float));
221
222
check_alloc(ctx->setup.filter, " FILTER");
518
#include "astra_fourier.h"
518
519
int hst_set_filter(HSTContext *ctx, HSTFilterFunction func, void *func_param) {
525
526
dim_fft = ctx->setup.dim_fft;
526
527
FILTER = ctx->setup.filter;
529
#ifdef PYHST_ASTRA_SCALING
530
// dim_fft is next power of two of (2 * num_proj)
531
// num_bins is dim_fft/2 + 1
532
int num_bins = dim_fft / 2 + 1; // number of non-symmetric coefficients
534
memset(FILTER, 0, 2 * dim_fft * sizeof(float));
537
for (j = 1; j < dim_fft; j++) {
541
coef = M_PI * (dim_fft - j);
544
FILTER[2 * j] = -1. / (coef * coef);
548
FILTER[2 * j + 1] = 0;
551
int *ip = (int*)malloc((2 + sqrt(dim_fft) + 1)*sizeof(int)); ip[0] = 0;
552
float *w = (float*)malloc(dim_fft * sizeof(float) / 2);
553
cdft(2 * dim_fft, -1, FILTER, ip, w);
557
for (j = 0; j < num_bins; j++) {
559
float fRelIndex = (float)j / (float)dim_fft;
560
float pfW = 2. * M_PI * fRelIndex;
562
// On last iteration it is actually equal. So, rounding error works differently with ASTRA and here.... Therefore, we require a bit on top.
563
if (pfW > (M_PI * _fD + 1E-6)) {
565
FILTER[2 * j + 1] = 0;
567
FILTER[2 * j] = 2.0 * FILTER[2 * j];
568
FILTER[2 * j + 1] = FILTER[2 * j];
572
for (j = num_bins; j < dim_fft; j++) {
574
FILTER[2 * j + 1] = 0;
578
for (j = 0; j < num_bins; j++) {
579
printf("%8.4f %8.4f ", FILTER[2 * j], FILTER[2 * j + 1]);
583
#else /* ASTRA_SCALING */
529
585
func(func_param, dim_fft, FILTER);
532
FILTER[1]= 1.0/dim_fft ; /* This is the real part of the highest frequency */
588
FILTER[1]= 1.0/dim_fft ; // This is the real part of the highest frequency
534
590
for (j=1;j<dim_fft / 2; ++j) {
535
FILTER[2 * j] = j * 2.0 / dim_fft/ dim_fft;
591
FILTER[2 * j] = j * 2.0 / dim_fft / dim_fft;
536
592
FILTER[2 * j + 1] = FILTER[2 * j];
544
600
if (ctx->setup.no_filtering) {
545
601
FILTER[1] = -9999.9999;
548
603
/* This is due to different organization of data representation used after by
549
604
real to complex FFT transfer. In both cases only non-redundant coefficients
550
605
are stored however, the current CPU code stores High Frequency Term (HFT) in the
551
606
res[1], while cuFFT (and FFTW as well) have always res[1] = 0 and stores
552
607
the HFT in res[dim_fft] */
554
FILTER[dim_fft] = FILTER[1];
609
FILTER[dim_fft] = FILTER[1]; // or should it be 0?
556
611
/* CUDA code now includes optimization allowing to compute two convolutions
557
612
parallely using single Complex-Complex Fourier transform. For that we need