bzr branch
http://darksoft.org/webbzr/ani/mrses
1
by Suren A. Chilingaryan
Initial import |
1 |
#include <stdio.h> |
2 |
#include <string.h> |
|
3 |
#include <assert.h> |
|
4 |
||
5 |
#include <libspe2.h> |
|
6 |
||
7 |
#include "msg.h" |
|
8 |
#include "mrses_spu.h" |
|
9 |
||
10 |
int mrses_spu_run(HWThread thr __attribute__ ((unused)), spe_context_ptr_t hwctx __attribute__ ((unused)), int block __attribute__ ((unused)), MRSESContext mrses __attribute__ ((unused))) { |
|
11 |
// Have no sence to implement
|
|
12 |
assert(0); |
|
13 |
return 0; |
|
14 |
}
|
|
15 |
||
16 |
int mrses_spu_iterate(HWThread thr, spe_context_ptr_t hwctx, int block_group, MRSESContext mrses) { |
|
17 |
int err; |
|
18 |
||
19 |
int i; |
|
20 |
unsigned int *hist; |
|
21 |
||
22 |
MRSESIntType *index; |
|
23 |
||
24 |
||
25 |
MRSESIntType *ires = mrses->ires; |
|
26 |
||
27 |
int iterate_size = mrses->iterate_size; |
|
28 |
int properties = mrses->properties; |
|
29 |
int width = mrses->width; |
|
2
by Suren A. Chilingaryan
Fix SPE crashes on big (above 4096) and non-power-of-2 number of properties |
30 |
|
31 |
int palloc = mrses->palloc; |
|
1
by Suren A. Chilingaryan
Initial import |
32 |
|
33 |
int block = block_group * iterate_size; |
|
34 |
int block_end = block + iterate_size; |
|
35 |
||
36 |
SPUParameters param __attribute__ ((aligned (16))); |
|
37 |
spe_stop_info_t stop_info; |
|
38 |
// We need this variable renewed before each execution of spe_context_run!!!
|
|
39 |
unsigned int spe_entry = SPE_DEFAULT_ENTRY; |
|
40 |
||
41 |
//printf("running spu thread: %p\n", hwctx);
|
|
42 |
||
43 |
||
44 |
if (thr->data) hist = (unsigned int*)(thr->data); |
|
45 |
else { |
|
46 |
posix_memalign((void*)&hist, HW_ALIGN, calc_alloc(properties * sizeof(uint32_t), HW_ALIGN)); |
|
47 |
memset(hist, 0, calc_alloc(properties * sizeof(uint32_t), HW_ALIGN)); |
|
48 |
if (hist) thr->data = hist; |
|
49 |
else return 1; |
|
50 |
}
|
|
51 |
||
52 |
||
53 |
param.mrses = mrses; |
|
54 |
param.block_group = block_group; |
|
55 |
||
56 |
err = spe_context_run(hwctx, &spe_entry, 0, ¶m, NULL, &stop_info); |
|
57 |
if (err < 0) { |
|
58 |
reportError ("Failed to run SPE program, error: %i", err); |
|
59 |
return 1; |
|
60 |
}
|
|
61 |
||
62 |
if (stop_info.stop_reason != SPE_EXIT) { |
|
63 |
reportError ("SPE program terminated with non-exit stop reason: %i", stop_info.stop_reason); |
|
64 |
return 1; |
|
65 |
}
|
|
66 |
||
67 |
err = stop_info.result.spe_exit_code; |
|
68 |
if (err) { |
|
69 |
reportError ("SPE program terminated with error code: %i", err); |
|
70 |
return 1; |
|
71 |
}
|
|
72 |
||
73 |
||
74 |
for (; block < block_end; ++block) { |
|
2
by Suren A. Chilingaryan
Fix SPE crashes on big (above 4096) and non-power-of-2 number of properties |
75 |
index = mrses->index + block * palloc; |
1
by Suren A. Chilingaryan
Initial import |
76 |
if (ires) memcpy(ires + block * width, index, width * sizeof(MRSESIntType)); |
77 |
for (i = 0; i < width; i++) { |
|
78 |
hist[index[i]]++; |
|
79 |
}
|
|
80 |
}
|
|
81 |
||
82 |
||
83 |
/*
|
|
84 |
{
|
|
85 |
int i;
|
|
86 |
int iterate_size = mrses->iterate_size;
|
|
87 |
MRSESIntType *index = mrses->index;
|
|
88 |
mrses->result = malloc(iterate_size * (block_group + 1) * sizeof(MRSESDataType));
|
|
89 |
mrses->index = malloc(mrses->width * iterate_size * (block_group + 1) * sizeof(MRSESIntType));
|
|
90 |
||
91 |
for (i = 0; i < iterate_size; i++) {
|
|
2
by Suren A. Chilingaryan
Fix SPE crashes on big (above 4096) and non-power-of-2 number of properties |
92 |
memcpy(mrses->index + (iterate_size * block_group + i) * mrses->width, index + (iterate_size * block_group + i) * palloc, mrses->width * sizeof(MRSESIntType));
|
1
by Suren A. Chilingaryan
Initial import |
93 |
}
|
94 |
||
95 |
for (i = 0; i < iterate_size * mrses->width; i++) {
|
|
96 |
++mrses->index[iterate_size * block_group * mrses->width + i];
|
|
97 |
}
|
|
98 |
||
99 |
float res, mahal, corcor;
|
|
100 |
MRSESDistance dist = mrses->dist;
|
|
101 |
void * data = thr->data;
|
|
102 |
||
103 |
thr->data = NULL;
|
|
104 |
for (i = 0; i < iterate_size; i++) {
|
|
105 |
mrses->dist = dist;
|
|
106 |
mrses_ppu_run(thr, hwctx, block_group * iterate_size + i, mrses);
|
|
107 |
res = mrses->result[block_group * iterate_size + i];
|
|
108 |
mrses->dist = MAHALANOBIS;
|
|
109 |
mrses_ppu_run(thr, hwctx, block_group * iterate_size + i, mrses);
|
|
110 |
mahal = mrses->result[block_group * iterate_size + i];
|
|
111 |
mrses->dist = CORCOR;
|
|
112 |
mrses_ppu_run(thr, hwctx, block_group * iterate_size + i, mrses);
|
|
113 |
corcor = mrses->result[block_group * iterate_size + i];
|
|
114 |
||
115 |
printf("PPU result, block %i: %e (mahal: %e, corcor: %e)\n", i, res, mahal, corcor);
|
|
116 |
int j;
|
|
117 |
for (j = 0; j < width; j++) {
|
|
118 |
printf("%i ", mrses->index[(iterate_size * block_group + i)*mrses->width + j]);
|
|
119 |
}
|
|
120 |
printf("\n");
|
|
121 |
}
|
|
122 |
||
123 |
free(thr->data);
|
|
124 |
thr->data = data;
|
|
125 |
||
126 |
free(mrses->index); mrses->index = index;
|
|
127 |
free(mrses->result); mrses->result = NULL;
|
|
128 |
mrses->dist = dist;
|
|
129 |
}
|
|
130 |
*/
|
|
131 |
||
132 |
return 0; |
|
133 |
}
|