13
#include "/opt/intel/ipp/include/ippi.h"
14
#include "/opt/intel/ipp/include/ipps.h"
15
#include "/opt/intel/ipp/include/ippcore.h"
18
/* structure to pass arguments to thread */
19
struct thread_info /* Used as argument to thread_start() */
21
pthread_t thread_id; /* ID returned by pthread_create() */
22
int thread_num; /* Application-defined thread # */
30
Ipp32f detector_offset_u;
31
Ipp32f detector_offset_v;
32
Ipp32f detector_offset_z;
40
Ipp32f *slice_coord_z;
44
/* global variables */
45
pthread_mutex_t mutex;
49
void statusinfo(IppStatus st)
53
printf("%d : %s\n", st, ippGetStatusString(st));
58
bool is_aligned(void *p, int N)
60
return (uintptr_t)p % N == 0;
64
/* canonical multiplication of square matrices */
65
int mult(Ipp32f *a, Ipp32f *b, Ipp32f *c)
69
for (k = 0; k < 4; k++)
71
for (i = 0; i < 4; i++)
73
for (j = 0; j < 4; j++)
75
c[i * 4 + j] += a[i * 4 + k] * b[k * 4 + j];
84
void *process (void *args)
86
struct thread_info *t_info = (thread_info*) args;
88
Ipp32f *px_map = NULL, *py_map = NULL;
90
Ipp32f *current_slice = NULL;
92
Ipp32f angle, z, w_x, w_y, w_z;
97
int imStepBytes, ippStepBytes;
99
int i_angle, slice_number;
104
im_size = {t_info->n_elements, t_info->n_elements};
106
/* region of interest = radisograph and slice size */
107
im_roi_size = {0, 0, t_info->n_elements, t_info->n_elements};
110
imStepBytes = t_info->n_elements * sizeof(float);
112
/* allocate temporal array */
113
tmp_2 = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
116
printf("Cannot allocate tmp_2");
120
current_slice = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
121
if (current_slice == NULL)
123
printf("Cannot allocate current_slice");
127
/* allocate interpolation maps */
128
px_map = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
131
printf("Cannot allocate px_map");
135
py_map = ippiMalloc_32f_C1(t_info->n_elements, t_info->n_elements, &ippStepBytes);
138
printf("Cannot allocate py_map");
145
/* counter increment */
147
int err = pthread_mutex_lock(&mutex);
150
printf("Retrying\n");
154
slice_number = counter++;
155
//if (counter > t_info->n_elements)
158
pthread_mutex_unlock(&mutex);
162
pthread_mutex_unlock(&mutex);
164
/* z coordinate of current slice */
165
z = t_info->slice_coord_z[slice_number];
167
/* set current slice to zero */
168
statusinfo(ippiSet_32f_C1R((Ipp32f)0, current_slice, ippStepBytes, im_size));
170
for (i_angle = 0; i_angle < t_info->n_proj; i_angle++)
172
/* set temporal variable to zero */
173
statusinfo(ippiSet_32f_C1R((Ipp32f)0, tmp_2, ippStepBytes, im_size));
175
/* current rotation angle */
176
angle = 2*M_PI/t_info->n_proj*i_angle;
178
/* set some matrices to calculate forward projection matrix operator in homogeneous coordinates*/
179
Ipp32f P1[4 * 4] = {(t_info->d + t_info->detector_offset_z) / t_info->pixel_size, 0, (t_info->detector_size / 2 - t_info->detector_offset_u) / t_info->pixel_size, 0,
180
0, -(t_info->d + t_info->detector_offset_z) / t_info->pixel_size, (t_info->detector_size / 2 - t_info->detector_offset_v) / t_info->pixel_size, 0,
184
Ipp32f P2[4 * 4] = {t_info->u_detector[0], t_info->u_detector[1], t_info->u_detector[2], 0,
185
t_info->v_detector[0], t_info->v_detector[1], t_info->v_detector[2], 0,
186
t_info->n_detector[0], t_info->n_detector[1], t_info->n_detector[2], 0,
189
Ipp32f P3[4 * 4] = {1, 0, 0, -t_info->source[0],
190
0, 1, 0, -t_info->source[1],
191
0, 0, 1, -t_info->source[2],
194
Ipp32f P4[4 * 4] = {1, 0, 0, 0,
196
0, 0, 1, t_info->cor + t_info->cor_offset,
199
Ipp32f P5[4 * 4] = {(Ipp32f)(cosf(angle)), 0, (Ipp32f)(-sinf(angle)), 0,
201
(Ipp32f)(sinf(angle)), 0, (Ipp32f)(cosf(angle)), 0,
204
/* set to zero temporal arrays */
205
Ipp32f P_tmp_1[4 * 4] = {0};
206
Ipp32f P_tmp_2[4 * 4] = {0};
207
Ipp32f P_tmp_3[4 * 4] = {0};
208
Ipp32f P[4 * 4] = {0};
210
/* forward projection operator */
211
mult(P1, P2, P_tmp_1);
212
mult(P_tmp_1, P3, P_tmp_2);
213
mult(P_tmp_2, P4, P_tmp_3);
214
mult(P_tmp_3, P5, P);
216
for (int i = 0; i < t_info->n_elements; i++)
218
for (int j = 0; j < t_info->n_elements; j++)
220
idx = i * t_info->n_elements + j;
222
w_x = P[0] * t_info->slice_x[idx] + P[1] * t_info->slice_y[idx] + P[2] * z + P[3];
223
w_y = P[4] * t_info->slice_x[idx] + P[5] * t_info->slice_y[idx] + P[6] * z + P[7];
224
w_z = P[8] * t_info->slice_x[idx] + P[9] * t_info->slice_y[idx] + P[10] * z + P[11];
226
px_map[idx] = w_x / w_z;
227
py_map[idx] = w_y / w_z;
232
statusinfo(ippiRemap_32f_C1R(t_info->projections + (ippStepBytes / sizeof(float) * (long)t_info->n_elements * i_angle), im_size, ippStepBytes, im_roi_size, px_map, ippStepBytes, py_map, ippStepBytes, tmp_2, ippStepBytes, im_size, IPPI_INTER_LINEAR));
235
/* in-place addition of two matrices */
236
statusinfo(ippiAdd_32f_C1IR(tmp_2, ippStepBytes, current_slice, ippStepBytes, im_size));
239
/* write current slice to final array */
240
//memcpy(t_info->out_volume + (t_info->n_elements * t_info->n_elements * slice_number), current_slice, t_info->n_elements * t_info->n_elements * sizeof(float));
241
statusinfo(ippiCopy_32f_C1R(current_slice, ippStepBytes, t_info->out_volume + ((long)t_info->n_elements * t_info->n_elements * slice_number), imStepBytes, im_size));
248
ippiFree(current_slice);
253
void mexFunction( int nlhs, mxArray *plhs[],
254
int nrhs, const mxArray *prhs[])
256
/* prhs[0] int n_voxels */
257
/* prhs[1] int n_proj */
258
/* prhs[2] float cor */
259
/* prhs[3] float d */
260
/* prhs[4] float detector_size */
261
/* prhs[5] float pixel_size */
262
/* prhs[6] float cor_offset */
263
/* prhs[7] float detector_offset_u */
264
/* prhs[8] float detector_offset_v */
265
/* prhs[9] float detector_offset_z */
266
/* prhs[10] float s */
267
/* prhs[11] float u_detector */
268
/* prhs[12] float v_detector */
269
/* prhs[13] float n_detector_r */
270
/* prhs[14] float projections */
271
/* prhs[15] float slice_x */
272
/* prhs[16] float slice_y */
273
/* prhs[17] float slice_coord_z */
274
/* prhs[18] int n_threads */
276
int n_proj, n_threads;
278
Ipp32f cor, d, detector_size, pixel_size, cor_offset, detector_offset_u, detector_offset_v, detector_offset_z;
279
Ipp32f *source = NULL, *u_detector = NULL, *v_detector = NULL, *n_detector = NULL;
280
Ipp32f *projections = NULL;
281
Ipp32f *slice_x = NULL, *slice_y = NULL, *slice_coord_z = NULL;
282
Ipp32f *out_volume = NULL;
285
struct thread_info *t_info;
290
//////////// PARSE INPUT //////////////////
292
/* radiograph and slice size */
293
/* check n_voxels data type */
294
if (mxGetNumberOfElements(prhs[0])!=1 || !mxIsInt32(prhs[0]))
296
mexErrMsgTxt("mex: Input n_voxels is not scalar or int32.");
299
n_elements = (int)mxGetScalar(prhs[0]);
301
if (n_elements % 16 != 0)
303
printf("n_elements has to be multiple of 16 since IPPI uses 64 bytes alignment");
308
/* number of projections */
309
/* check n_proj data type */
310
if (mxGetNumberOfElements(prhs[1])!=1 || !mxIsInt32(prhs[1]))
312
mexErrMsgTxt("mex: Input n_proj is not scalar or int32.");
315
n_proj = (int)mxGetScalar(prhs[1]);
318
/* source to rotation distance */
319
/* check input cor data type */
320
if (!mxIsSingle(prhs[2]) || mxIsComplex(prhs[2]) || mxGetNumberOfElements(prhs[2])!=1)
322
mexErrMsgTxt("mex: Input cor must be scalar and have type single.");
325
cor = (Ipp32f)mxGetScalar(prhs[2]);
327
/* source to detector distance */
328
/* check input d data type */
329
if (!mxIsSingle(prhs[3]) || mxIsComplex(prhs[3]) || mxGetNumberOfElements(prhs[3])!=1)
331
mexErrMsgTxt("mex: Input d must be scalar and have type single.");
334
d = (Ipp32f)mxGetScalar(prhs[3]);
337
/* physical size of detector (assumed to be square) */
338
/* check input detector_size data type */
339
if (!mxIsSingle(prhs[4]) || mxIsComplex(prhs[4]) || mxGetNumberOfElements(prhs[4])!=1)
341
mexErrMsgTxt("mex: Input detector_size must be scalar and have type single.");
344
detector_size = (Ipp32f)mxGetScalar(prhs[4]);
347
/* physical pixel size */
348
/* check input pixel_size data type */
349
if (!mxIsSingle(prhs[5]) || mxIsComplex(prhs[5]) || mxGetNumberOfElements(prhs[5])!=1)
351
mexErrMsgTxt("mex: Input pixel_size must be scalar and have type single.");
354
pixel_size = (Ipp32f)mxGetScalar(prhs[5]);
358
/* check input cor_offset data type */
359
if (!mxIsSingle(prhs[6]) || mxIsComplex(prhs[6]) || mxGetNumberOfElements(prhs[6])!=1)
361
mexErrMsgTxt("mex: Input cor_offset must be scalar and have type single.");
364
cor_offset = (Ipp32f)mxGetScalar(prhs[6]);
367
/* detector offset in U direction */
368
/* check input detector_offset_u data type */
369
if (!mxIsSingle(prhs[7]) || mxIsComplex(prhs[7]) || mxGetNumberOfElements(prhs[7])!=1)
371
mexErrMsgTxt("mex: Input detector_offset_u must be scalar and have type single.");
374
detector_offset_u = (Ipp32f)mxGetScalar(prhs[7]);
377
/* detector offset in V direction */
378
/* check input detector_offset_v data type */
379
if (!mxIsSingle(prhs[8]) || mxIsComplex(prhs[8]) || mxGetNumberOfElements(prhs[8])!=1)
381
mexErrMsgTxt("mex: Input detector_offset_v must be scalar and have type single.");
384
detector_offset_v = (Ipp32f)mxGetScalar(prhs[8]);
387
/* detector offset along magnification line */
388
/* check input detector_offset_z data type */
389
if (!mxIsSingle(prhs[9]) || mxIsComplex(prhs[9]) || mxGetNumberOfElements(prhs[9])!=1)
391
mexErrMsgTxt("mex: Input detector_offset_z must be scalar and have type single.");
394
detector_offset_z = (Ipp32f)mxGetScalar(prhs[9]);
397
/* source position */
398
/* check input source data type */
399
if (!mxIsSingle(prhs[10]) || mxIsComplex(prhs[10]) || mxGetNumberOfElements(prhs[10])!=3)
401
mexErrMsgTxt("mex: Input source must be type single and contain 3 elements.");
404
source = (Ipp32f *)mxGetData(prhs[10]);
408
printf("Cannot get source");
413
/* detector coordinate frame */
414
/* check input u_detector data type */
415
if( !mxIsSingle(prhs[11]) || mxIsComplex(prhs[11]) || mxGetNumberOfElements(prhs[11])!=3)
417
mexErrMsgTxt("mex: Input u_detector must be type single and contain 3 elements.");
420
/* check input v_detector data type */
421
if( !mxIsSingle(prhs[12]) || mxIsComplex(prhs[12]) || mxGetNumberOfElements(prhs[12])!=3)
423
mexErrMsgTxt("mex: Input v_detector must be type single and contain 3 elements.");
426
/* check input n_detector data type */
427
if( !mxIsSingle(prhs[13]) || mxIsComplex(prhs[13]) || mxGetNumberOfElements(prhs[13])!=3)
429
mexErrMsgTxt("mex: Input n_detector must be type single and contain 3 elements.");
432
u_detector = (Ipp32f *)mxGetData(prhs[11]);
434
if (u_detector == NULL)
436
printf("Cannot get u_detector");
440
v_detector = (Ipp32f *)mxGetData(prhs[12]);
442
if (u_detector == NULL)
444
printf("Cannot get v_detector");
448
n_detector = (Ipp32f *)mxGetData(prhs[13]);
450
if (n_detector == NULL)
452
printf("Cannot get n_detector");
457
/* volume with projections, x-y-angle */
458
/* check input projections data type */
459
if (!mxIsSingle(prhs[14]) || mxIsComplex(prhs[14]))
461
mexErrMsgTxt("mex: Input projections array must be type single.");
464
/* check that number of elements in projections is equal to n_voxels x n_voxels x n_proj */
465
if ((mxGetNumberOfElements(prhs[14]) != (long)n_elements * n_elements * n_proj))
467
mexErrMsgTxt("mex: Input projections must be a n_elements x n_elements x n_proj array.");
470
projections = (Ipp32f *)mxGetData(prhs[14]);
472
if (projections == NULL) {
473
printf("Can not get projections");
477
/* check if dataset is aligned by 32 bytes */
478
if (is_aligned(projections, (int)alignment) == 0)
480
printf("Projections have to be aligned by 32 bytes\n");
486
/* check input slice_x data type */
487
if ( !mxIsSingle(prhs[15]) || mxIsComplex(prhs[15]))
489
mexErrMsgTxt("mex: Input slice_x array must be type single.");
492
/* check that number of rows and columns in slice_x is equal to n_voxels */
493
if(mxGetM(prhs[15]) != n_elements || mxGetN(prhs[15]) != n_elements)
495
mexErrMsgTxt("mex: Input slice_x must be a n_voxels x n_elements array.");
498
/* check input slice_y data type */
499
if( !mxIsSingle(prhs[16]) || mxIsComplex(prhs[16]))
501
mexErrMsgTxt("mex: Input slice_y array must be type single.");
504
/* check that number of rows and columns in slice_y is equal to n_voxels */
505
if(mxGetM(prhs[16]) != n_elements || mxGetN(prhs[16]) != n_elements)
507
mexErrMsgTxt("mex: Input slice_y must be a n_voxels x n_elements array.");
510
/* check input slice_coord_z data type */
511
if( !mxIsSingle(prhs[17]) || mxIsComplex(prhs[17]))
513
mexErrMsgTxt("mex: Input slice_coord_z array must be type single.");
516
/* check that number of elements in slice_coord_z is equal to n_voxels */
517
if(mxGetNumberOfElements(prhs[17]) != n_elements)
519
mexErrMsgTxt("mex: Input slice_coord_z must be a n_voxels array.");
522
slice_x = (Ipp32f *)mxGetData(prhs[15]);
526
printf("Cannot get slice_x");
530
/* check if slice_x is aligned by 32 bytes */
531
if (is_aligned(slice_x, (int)alignment) == 0)
533
printf("slice_x have to be aligned by 32 bytes\n");
537
slice_y = (Ipp32f *)mxGetData(prhs[16]);
541
printf("Cannot get slice_y");
545
/* check if slice_y is aligned by 32 bytes */
546
if (is_aligned(slice_y, (int)alignment) == 0)
548
printf("slice_y have to be aligned by 32 bytes\n");
552
slice_coord_z = (Ipp32f *)mxGetData(prhs[17]);
554
if (slice_coord_z == NULL) {
555
printf("Cannot get slice_coord_z");
561
/* check input n_threads data type */
562
if(mxGetNumberOfElements(prhs[18])!=1 || !mxIsInt32(prhs[18]))
564
mexErrMsgTxt("mex: Input n_threads is not scalar or int32.");
567
n_threads = (int)mxGetScalar(prhs[18]);
570
/* create the output matrix im_size x im_size*/
572
dims[0] = n_elements;
573
dims[1] = n_elements;
574
dims[2] = n_elements;
575
plhs[0] = mxCreateNumericArray((mwSize)3,dims,mxSINGLE_CLASS,mxREAL);
577
/* get a pointer to the data in the output matrix */
578
out_volume = (Ipp32f *)mxGetData(plhs[0]);
580
if (out_volume == NULL) {
581
printf("Cannot get out_volume");
585
///////// INITIALIZE THREADS //////////////
587
/* set global variable counter to zero */
590
/* initialize mutex */
591
m = pthread_mutex_init(&mutex, NULL);
595
printf("Mutex initialization failed with error %i\n", m);
599
/* allocate memory for threads */
600
t_info = (thread_info*) calloc(n_threads, sizeof(struct thread_info));
604
printf("Structure t_info memory allocation error\n");
608
/* fill tread_info structure and create threads */
609
for (tnum = 0; tnum < n_threads; tnum++)
611
t_info[tnum].thread_num = tnum; /* Application-defined thread # */
612
t_info[tnum].n_elements = n_elements;
613
t_info[tnum].n_proj = n_proj;
614
t_info[tnum].cor = cor;
616
t_info[tnum].detector_size = detector_size;
617
t_info[tnum].pixel_size = pixel_size;
618
t_info[tnum].cor_offset = cor_offset;
619
t_info[tnum].detector_offset_u = detector_offset_u;
620
t_info[tnum].detector_offset_v = detector_offset_v;
621
t_info[tnum].detector_offset_z = detector_offset_z;
622
t_info[tnum].source = source;
623
t_info[tnum].u_detector = u_detector;
624
t_info[tnum].v_detector = v_detector;
625
t_info[tnum].n_detector = n_detector;
626
t_info[tnum].projections = projections;
627
t_info[tnum].slice_x = slice_x;
628
t_info[tnum].slice_y = slice_y;
629
t_info[tnum].slice_coord_z = slice_coord_z;
630
t_info[tnum].out_volume = out_volume;
632
m = pthread_create(&t_info[tnum].thread_id, NULL, &process, &t_info[tnum]);
636
printf("Cannot create thread with error %i\n", m);
642
for (tnum = 0; tnum < n_threads; tnum++)
644
m = pthread_join(t_info[tnum].thread_id, &res);
648
printf("Cannot join threads with error %i\n", m);
654
pthread_mutex_destroy(&mutex);
b'\\ No newline at end of file'