/tomo/pyhst

To get this branch, use:
bzr branch http://darksoft.org/webbzr/tomo/pyhst
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*
 * The PyHST program is Copyright (C) 2002-2011 of the
 * European Synchrotron Radiation Facility (ESRF) and
 * Karlsruhe Institute of Technology (KIT).
 *
 * PyHST is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * hst is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

texture<cufftComplex, cudaTextureType2D, cudaReadModeElementType> tex_projections;
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_ktbl;

static __device__ inline cufftComplex ComplexMul(cufftComplex a, cufftComplex b)
{
    cufftComplex c;
    c.x = a.x * b.x - a.y * b.y;
    c.y = a.x * b.y + a.y * b.x;
    return c;
}

static __device__ inline cufftComplex ComplexScale(cufftComplex a, float s)
{
    cufftComplex c;
    c.x = s * a.x;
    c.y = s * a.y;
    return c;
}

__global__ static void  dfi_cuda_zeropadding_complex(cufftReal *input, int sino_width, cufftComplex *output) {
    
    const int dim_x = gridDim.x * blockDim.x;
    int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    int idy = blockIdx.y * BLOCK_SIZE + threadIdx.y;
    long out_idx = idy * dim_x + idx;

    int lpart = sino_width/2;
    int rpart = sino_width/2;
    int dim_x2 = dim_x/2;
    int len_to_lpart = dim_x - lpart;

    output[out_idx].x = 
            (idx < rpart) ? input[idy * sino_width + (lpart + idx)] : 
                            (idx < (dim_x2 + (dim_x2 - lpart))) ? 0.0f : 
                                                                  input[idy * sino_width + (idx - len_to_lpart)];
    output[out_idx].y = 0.0f;                                                              
}

__global__ static void  dfi_cuda_zeropadding_real(cufftReal *input, int sino_width, int dim2_fft, cufftReal *output) {
    
    const int dim_x = gridDim.x * blockDim.x;
    int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    int idy = blockIdx.y * BLOCK_SIZE + threadIdx.y;
    long out_idx = idy * dim2_fft + idx;

    int lpart = sino_width/2;
    int rpart = sino_width/2;
    int dim_x2 = dim_x/2;
    int len_to_lpart = dim_x - lpart;

    output[out_idx] = 
            (idx < rpart) ? input[idy * sino_width + (lpart + idx)] : 
                            (idx < (dim_x2 + (dim_x2 - lpart))) ? 0.0f : 
                                                                  input[idy * sino_width + (idx - len_to_lpart)];                                                          
}

__global__ static void  dfi_cuda_interpolation_sinc(int spectrum_offset,
                                                    float L2, 
                                                    int ktbl_len2, 
                                                    int raster_size, 
                                                    int raster_size2, 
                                                    float table_spacing, 
                                                    float angle_step_rad,
                                                    float theta_max,
                                                    float rho_max,
                                                    int dim_x,
                                                    float max_radius,
                                                    cufftComplex *output) {
    //variables
    float2 out_coord, norm_gl_coord, in_coord;
    int iul, iuh, ivl, ivh, sign, i, j, k;
    float res_real, res_imag, weight, kernel_x_val;
    long out_idx;

    sign = 1;
    res_real = 0.0f, res_imag = 0.0f;
    out_idx = 0;

    out_coord.x = (blockIdx.x * BLOCK_SIZE + threadIdx.x);
    out_coord.y = (blockIdx.y * BLOCK_SIZE + threadIdx.y) + spectrum_offset;

    out_idx = out_coord.y * dim_x + out_coord.x;
    
    //calculate coordinates
    norm_gl_coord.x = out_coord.x - 1;
    norm_gl_coord.y = out_coord.y - raster_size2;

    float radius = sqrt(norm_gl_coord.x * norm_gl_coord.x + norm_gl_coord.y * norm_gl_coord.y);

    if (radius <= max_radius) {
    	in_coord.y = atan2(norm_gl_coord.y,norm_gl_coord.x);
    	in_coord.y = -in_coord.y; // spike here! (mirroring along y-axis)

        sign = (in_coord.y < 0.0) ? -1 : 1;
            
        in_coord.y = (in_coord.y < 0.0f) ? in_coord.y+= M_PI : in_coord.y;
        in_coord.y = (float) min(1.0f + in_coord.y/angle_step_rad, theta_max - 1);

        in_coord.x = (float) min(radius, (float)raster_size2);

    	//sinc interpoaltion
        iul = (int)ceil(in_coord.x - L2);
        iul = (iul < 0) ? 0 : iul;

        iuh = (int)floor(in_coord.x + L2);
        iuh = (iuh > rho_max - 1) ? iuh = rho_max - 1 : iuh;

        ivl = (int)ceil(in_coord.y - L2);
        ivl = (ivl < 0) ? 0 : ivl;

        ivh = (int)floor(in_coord.y + L2);
        ivh = (ivh > theta_max - 1) ? ivh = theta_max - 1 : ivh;
                 
        float kernel_y[20];
        for (i = ivl, j = 0; i <= ivh; ++i, ++j) {
        	kernel_y[j] = tex2D(tex_ktbl, ktbl_len2 + (in_coord.y - (float)i) * table_spacing, 0.5f);
        }

        for (i = iul; i <= iuh; ++i) {
        	kernel_x_val = tex2D(tex_ktbl, ktbl_len2 + (in_coord.x - (float)i) * table_spacing, 0.5f);

                for (k = ivl, j = 0; k <= ivh; ++k, ++j) {
                    weight = kernel_y[j] * kernel_x_val;

                    cufftComplex data_val = tex2D(tex_projections, (float)i, (float)k);

                    res_real += data_val.x * weight;
                    res_imag += data_val.y * weight;
                }
        }

        output[out_idx].x = res_real;
        output[out_idx].y = sign * res_imag;
    }
}

__global__ void dfi_cuda_swap_quadrants_complex(cufftComplex *input, cufftComplex *output, int dim_x)
{
    int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    int idy = blockIdx.y * BLOCK_SIZE + threadIdx.y;

    const int dim_y = gridDim.y * blockDim.y; //a half of real length

    output[idy * dim_x + idx] = input[(dim_y + idy) * dim_x + idx + 1];
    output[(dim_y + idy) * dim_x + idx] = input[idy * dim_x + idx + 1];
}

__global__ void dfi_cuda_swap_quadrants_real(cufftReal *output)
{
    int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    int idy = blockIdx.y * BLOCK_SIZE + threadIdx.y;

    const int dim_x = gridDim.x * blockDim.x;
    int dim_x2 = dim_x/2, dim_y2 = dim_x2;
    long sw_idx1, sw_idx2;

    sw_idx1 = idy * dim_x + idx;

    cufftReal temp = output[sw_idx1];
    if (idx < dim_x2) {
        sw_idx2 = (dim_y2 + idy) * dim_x + (dim_x2 + idx);
        output[sw_idx1] = output[sw_idx2];
        output[sw_idx2] = temp;
    }
    else {
        sw_idx2 = (dim_y2 + idy) * dim_x + (idx - dim_x2);
        output[sw_idx1] = output[sw_idx2];
        output[sw_idx2] = temp;
    }
}

__global__ void dfi_cuda_crop_roi(cufftReal *input, int x, int y, int roi_x, int roi_y, int raster_size, float scale, cufftReal *output)
{
    const int dim_x = gridDim.x * blockDim.x;
    int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    int idy = blockIdx.y * BLOCK_SIZE + threadIdx.y;

    if (idx < roi_x && idy < roi_y) {
        output[idy * roi_x + idx] = input[(idy + y) * raster_size + (idx + x)] * scale;
    }
}