1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
__global__ static void hst_cuda_unpack_kernel_fai360(cufftReal *out, int dpitch, cufftReal *data, int spitch, int half, float *params, int batch) {
float tmp1, tmp2;
int idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int idy = blockIdx.y * BLOCK_SIZE + threadIdx.y;
int dest_vector = (idx >= half)?1:0;
int dest_pos = 2 * (idx - dest_vector * half);
int src_pos = 2 * (idy * spitch + dest_pos) + dest_vector;
int src_vector = dest_vector;//(src_pos >= 0);
tmp1 = data[src_pos];
tmp2 = data[src_pos + 2];
float axis_position_corr = c_all[batch + 2*idy + src_vector].z;
float flat_zone = params[idy * 4 + 2*src_vector];
float param = params[idy * 4 + 2*src_vector + 1];
int pos = (2 * idy + dest_vector)*dpitch + dest_pos;
float apc_plus_flat = axis_position_corr + flat_zone;
float apc_param_flat = apc_plus_flat + param - dest_pos;
int flag1 = dest_pos < apc_plus_flat;
float multiplier1 = __fdividef((flag1?(apc_param_flat - 2 * flat_zone):apc_param_flat),param);
int flag2 =(dest_pos + 1) < apc_plus_flat;
float multiplier2 = __fdividef((flag2?(apc_param_flat - 2 * flat_zone):apc_param_flat) - 1,param);
if (param > 0) {
out[pos] = tmp1 * min(2., max(flag1?1.:0., multiplier1));
out[pos+1] = tmp2 * min(2., max(flag2?1.:0., multiplier2));
} else {
out[pos] = tmp1 * max(0., min(flag1?1.:2., multiplier1));
out[pos+1] = tmp2 * max(0., min(flag2?1.:2., multiplier2));
}
}
|