21
/* global variables */
23
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
26
void statusinfo(IppStatus st)
30
printf("%d : %s\n", st, ippGetStatusString(st));
37
/* canonical multiplication of square matrices */
38
int mult(Ipp32f *a, Ipp32f *b, Ipp32f *c)
42
for (k = 0; k < 4; k++)
44
for (i = 0; i < 4; i++)
46
for (j = 0; j < 4; j++)
48
c[i * 4 + j] += a[i * 4 + k] * b[k * 4 + j];
56
void *process (void *args)
58
struct thread_info *t_info = (struct thread_info*) args;
60
Ipp32f *px_map = NULL, *py_map = NULL;
61
Ipp32f *tmp_1 = NULL, *tmp_2 = NULL, *w_x = NULL, *w_y = NULL, *w_z = NULL;
62
Ipp32f *current_slice = NULL;
66
IppiSize im_size = {t_info->n_elements, t_info->n_elements};
67
IppiRect im_roi_size = {0, 0, t_info->n_elements, t_info->n_elements};
69
int imStepBytes, ippStepBytes;
71
int i_angle, slice_number;
74
imStepBytes = t_info->n_elements * sizeof(float);
76
/* allocate temporal arrays */
77
tmp_1 = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
80
printf("Cannot allocate tmp_1");
84
tmp_2 = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
87
printf("Cannot allocate tmp_2");
91
w_x = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
94
printf("Cannot allocate w_x");
98
w_y = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
101
printf("Cannot allocate w_y");
105
w_z = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
108
printf("Cannot allocate w_z");
112
current_slice = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
113
if (current_slice == NULL)
115
printf("Cannot allocate current_slice");
119
/* allocate interpolation maps */
120
px_map = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
123
printf("Cannot allocate px_map");
127
py_map = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
130
printf("Cannot allocate py_map");
137
/* counter increment */
139
if (pthread_mutex_lock(&mutex))
141
printf("Retrying\n");
145
slice_number = counter;
147
if (slice_number >= t_info->n_elements)
149
pthread_mutex_unlock(&mutex);
153
counter += t_info->slices_per_iter;
155
pthread_mutex_unlock(&mutex);
157
/* z coordinate of current slice */
158
z = t_info->slice_coord_z[slice_number];
160
/* set current slice to zero */
161
statusinfo(ippiSet_32f_C1R((Ipp32f)0, current_slice, ippStepBytes, im_size));
163
for (i_angle = 0; i_angle < t_info->n_proj; i_angle++)
165
/* set temporal variable to zero */
166
statusinfo(ippiSet_32f_C1R((Ipp32f)0, tmp_2, ippStepBytes, im_size));
168
/* current rotation angle */
169
angle = 2*M_PI/t_info->n_proj*i_angle;
171
/* set some matrices to calculate forward projection matrix operator in homogeneous coordinates*/
172
Ipp32f P1[4 * 4] = {(t_info->d + t_info->detector_offset_z) / t_info->pixel_size, 0, (t_info->detector_size / 2 - t_info->detector_offset_u) / t_info->pixel_size, 0,
173
0, -(t_info->d + t_info->detector_offset_z) / t_info->pixel_size, (t_info->detector_size / 2 - t_info->detector_offset_v) / t_info->pixel_size, 0,
177
Ipp32f P2[4 * 4] = {t_info->u_detector[0], t_info->u_detector[1], t_info->u_detector[2], 0,
178
t_info->v_detector[0], t_info->v_detector[1], t_info->v_detector[2], 0,
179
t_info->n_detector[0], t_info->n_detector[1], t_info->n_detector[2], 0,
182
Ipp32f P3[4 * 4] = {1, 0, 0, -t_info->source[0],
183
0, 1, 0, -t_info->source[1],
184
0, 0, 1, -t_info->source[2],
187
Ipp32f P4[4 * 4] = {1, 0, 0, 0,
189
0, 0, 1, t_info->cor + t_info->cor_offset,
192
Ipp32f P5[4 * 4] = {(Ipp32f)(cosf(angle)), 0, (Ipp32f)(-sinf(angle)), 0,
194
(Ipp32f)(sinf(angle)), 0, (Ipp32f)(cosf(angle)), 0,
197
/* set to zero temporal arrays */
198
Ipp32f P_tmp_1[4 * 4] = {0};
199
Ipp32f P_tmp_2[4 * 4] = {0};
200
Ipp32f P_tmp_3[4 * 4] = {0};
201
Ipp32f P[4 * 4] = {0};
203
/* forward projection operator */
204
mult(P1, P2, P_tmp_1);
205
mult(P_tmp_1, P3, P_tmp_2);
206
mult(P_tmp_2, P4, P_tmp_3);
207
mult(P_tmp_3, P5, P);
209
/* forward projetion in homogeneous coordinates *
210
* in MATLAB notation this is equivalent to following
211
* w_x = P(1,1)*slice_x+P(1,2)*slice_y+P(1,3)*z(i)+P(1,4);
212
* w_y = P(2,1)*slice_x+P(2,2)*slice_y+P(2,3)*z(i)+P(2,4);
213
* w_z = P(3,1)*slice_x+P(3,2)*slice_y+P(3,3)*z(i)+P(3,4);
215
* py_map = w_y./w_z; */
218
/* not-in-place mutiplication of matrix by constant */
219
statusinfo(ippiMulC_32f_C1R(t_info->slice_x, ippStepBytes, P[0], tmp_1, ippStepBytes, im_size));
220
statusinfo(ippiMulC_32f_C1R(t_info->slice_y, ippStepBytes, P[1], w_x, ippStepBytes, im_size));
222
/* in-place addition of two matrices*/
223
statusinfo(ippiAdd_32f_C1IR(tmp_1, ippStepBytes, w_x, ippStepBytes, im_size));
225
/* in-place addition of matrix and constant */
226
statusinfo(ippiAddC_32f_C1IR(P[2]*z + P[3], w_x, ippStepBytes, im_size));
229
statusinfo(ippiMulC_32f_C1R(t_info->slice_x, ippStepBytes, P[4], tmp_1, ippStepBytes, im_size));
230
statusinfo(ippiMulC_32f_C1R(t_info->slice_y, ippStepBytes, P[5], w_y, ippStepBytes, im_size));
232
statusinfo(ippiAdd_32f_C1IR(tmp_1, ippStepBytes, w_y, ippStepBytes, im_size));
234
statusinfo(ippiAddC_32f_C1IR(P[6]*z + P[7], w_y, ippStepBytes, im_size));
237
statusinfo(ippiMulC_32f_C1R(t_info->slice_x, ippStepBytes, P[8], tmp_1, ippStepBytes, im_size));
238
statusinfo(ippiMulC_32f_C1R(t_info->slice_y, ippStepBytes, P[9], w_z, ippStepBytes, im_size));
240
statusinfo(ippiAdd_32f_C1IR(tmp_1, ippStepBytes, w_z, ippStepBytes, im_size));
242
statusinfo(ippiAddC_32f_C1IR(P[10]*z + P[11], w_z, ippStepBytes, im_size));
244
/* calculate px_map and py_map = dehomogenize */
245
/* not-in-place devision of two matrices */
246
statusinfo(ippiDiv_32f_C1R(w_z, ippStepBytes, w_x, ippStepBytes, px_map, ippStepBytes, im_size));
247
statusinfo(ippiDiv_32f_C1R(w_z, ippStepBytes, w_y, ippStepBytes, py_map, ippStepBytes, im_size));
250
statusinfo(ippiRemap_32f_C1R(t_info->projections + (ippStepBytes / sizeof(float) * (long)t_info->n_elements * i_angle), im_size, ippStepBytes, im_roi_size, px_map, ippStepBytes, py_map, ippStepBytes, tmp_2, ippStepBytes, im_size, IPPI_INTER_LINEAR));
253
/* in-place addition of two matrices */
254
statusinfo(ippiAdd_32f_C1IR(tmp_2, ippStepBytes, current_slice, ippStepBytes, im_size));
257
/* write current slice to final array */
258
//memcpy(t_info->out_volume + (t_info->n_elements * t_info->n_elements * slice_number), current_slice, t_info->n_elements * t_info->n_elements * sizeof(float));
259
statusinfo(ippiCopy_32f_C1R(current_slice, ippStepBytes, t_info->out_volume + ((long)t_info->n_elements * t_info->n_elements * slice_number), imStepBytes, im_size));
270
ippiFree(current_slice);