/perf/fdk

To get this branch, use:
bzr branch http://darksoft.org/webbzr/perf/fdk

« back to all changes in this revision

Viewing changes to process.c

  • Committer: Suren A. Chilingaryan
  • Date: 2017-02-09 00:44:25 UTC
  • Revision ID: csa@suren.me-20170209004425-4dt67qhxz9ibdehy
Intel compiler

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
#include <stdio.h>
2
2
#include <stdlib.h>
3
3
 
4
 
#define __USE_BSD
 
4
#ifndef __USE_BSD
 
5
# define __USE_BSD
 
6
#endif
5
7
#include <math.h>
6
8
 
7
9
#include <pthread.h>
55
57
void *process (void *args) 
56
58
{
57
59
    int err;
 
60
    int i, j;
58
61
 
59
62
    struct thread_info *t_info = (struct thread_info*) args;
60
63
 
62
65
    Ipp32f *tmp_2 = NULL;
63
66
    Ipp32f *current_slice = NULL;
64
67
 
65
 
    Ipp32f angle, z, w_x, w_y, w_z;
 
68
    Ipp32f angle, z;
66
69
 
67
70
    IppiSize im_size = {t_info->n_elements, t_info->n_elements};
68
71
    IppiRect im_roi_size = {0, 0, t_info->n_elements, t_info->n_elements};
71
74
 
72
75
    int i_angle, slice_number;
73
76
 
74
 
    long idx;
75
 
 
76
77
    /* step in bytes */
77
78
    imStepBytes = t_info->n_elements * sizeof(float);
78
79
 
116
117
            goto retry;
117
118
        }
118
119
 
119
 
        slice_number = counter++;
120
 
        //if (counter > t_info->n_elements) 
121
 
        if (counter > 2) 
 
120
        slice_number = counter;
 
121
 
 
122
        if (slice_number >= t_info->n_elements) 
122
123
        {
123
124
            pthread_mutex_unlock(&mutex);
124
125
            break;
125
126
        }
126
 
        
 
127
 
 
128
        counter += t_info->slices_per_iter;
 
129
 
127
130
        pthread_mutex_unlock(&mutex);
128
 
        
 
131
 
129
132
        /* z coordinate of current slice */
130
133
        z = t_info->slice_coord_z[slice_number];
131
 
    
 
134
 
132
135
        /* set current slice to zero */ 
133
136
        statusinfo(ippiSet_32f_C1R((Ipp32f)0, current_slice, ippStepBytes, im_size));
134
137
        
178
181
            mult(P_tmp_2, P4, P_tmp_3);
179
182
            mult(P_tmp_3, P5, P);
180
183
            
181
 
            for (int i = 0; i < t_info->n_elements; i++)
 
184
            int n_elements = t_info->n_elements;
 
185
            for (i = 0; i < n_elements; i++)
182
186
            {
183
 
                for (int j = 0; j < t_info->n_elements; j++)
 
187
#pragma simd 
 
188
                for (j = 0; j < n_elements; j++)
184
189
                {
185
 
                    idx = i * t_info->n_elements + j;
 
190
                    int idx = i * n_elements + j;
186
191
                    
187
 
                    w_x = P[0] * t_info->slice_x[idx] + P[1] * t_info->slice_y[idx] + P[2] * z + P[3];
188
 
                    w_y = P[4] * t_info->slice_x[idx] + P[5] * t_info->slice_y[idx] + P[6] * z + P[7];
189
 
                    w_z = P[8] * t_info->slice_x[idx] + P[9] * t_info->slice_y[idx] + P[10] * z + P[11];
 
192
                    float w_x = P[0] * t_info->slice_x[idx] + P[1] * t_info->slice_y[idx] + P[2] * z + P[3];
 
193
                    float w_y = P[4] * t_info->slice_x[idx] + P[5] * t_info->slice_y[idx] + P[6] * z + P[7];
 
194
                    float w_z = P[8] * t_info->slice_x[idx] + P[9] * t_info->slice_y[idx] + P[10] * z + P[11];
190
195
                    
191
196
                    px_map[idx] =  w_x / w_z;
192
197
                    py_map[idx] =  w_y / w_z;