bzr branch
http://darksoft.org/webbzr/tomo/pyhst
78
by Suren A. Chilingaryan
Add COPYING and fix license statements |
1 |
/*
|
2 |
* The PyHST program is Copyright (C) 2002-2011 of the
|
|
3 |
* European Synchrotron Radiation Facility (ESRF) and
|
|
4 |
* Karlsruhe Institute of Technology (KIT).
|
|
5 |
*
|
|
6 |
* PyHST is free software: you can redistribute it and/or modify it
|
|
7 |
* under the terms of the GNU General Public License as published by the
|
|
8 |
* Free Software Foundation, either version 3 of the License, or
|
|
9 |
* (at your option) any later version.
|
|
10 |
*
|
|
11 |
* hst is distributed in the hope that it will be useful, but
|
|
12 |
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
14 |
* See the GNU General Public License for more details.
|
|
15 |
*
|
|
16 |
* You should have received a copy of the GNU General Public License along
|
|
17 |
* with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18 |
*/
|
|
19 |
||
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
20 |
#define _GNU_SOURCE
|
21 |
#include <stdio.h> |
|
22 |
#include <stdlib.h> |
|
23 |
#include <string.h> |
|
24 |
||
25 |
#ifdef HW_HAVE_SCHED_HEADERS
|
|
26 |
# include <sys/types.h>
|
|
27 |
# include <unistd.h>
|
|
28 |
# include <sched.h>
|
|
29 |
#endif /* HW_HAVE_SCHED_HEADERS */ |
|
30 |
||
31 |
#include "debug.h" |
|
32 |
#include "hw_sched.h" |
|
33 |
||
34 |
||
35 |
||
36 |
||
37 |
#ifdef HW_USE_THREADS
|
|
38 |
# define MUTEX_INIT(ctx, name) \
|
|
39 |
if (!err) { \
|
|
40 |
ctx->name##_mutex = g_mutex_new(); \
|
|
41 |
if (!ctx->name##_mutex) err = 1; \
|
|
42 |
}
|
|
43 |
||
44 |
# define MUTEX_FREE(ctx, name) \
|
|
45 |
if (ctx->name##_mutex) g_mutex_free(ctx->name##_mutex);
|
|
46 |
||
47 |
# define COND_INIT(ctx, name) \
|
|
48 |
MUTEX_INIT(ctx, name##_cond) \
|
|
49 |
if (!err) { \
|
|
50 |
ctx->name##_cond = g_cond_new(); \
|
|
51 |
if (!ctx->name##_cond) { \
|
|
52 |
err = 1; \
|
|
53 |
MUTEX_FREE(ctx, name##_cond) \
|
|
54 |
} \
|
|
55 |
}
|
|
56 |
||
57 |
# define COND_FREE(ctx, name) \
|
|
58 |
if (ctx->name##_cond) g_cond_free(ctx->name##_cond); \
|
|
59 |
MUTEX_FREE(ctx, name##_cond)
|
|
60 |
#else /* HW_USE_THREADS */ |
|
61 |
# define MUTEX_INIT(ctx, name)
|
|
62 |
# define MUTEX_FREE(ctx, name)
|
|
63 |
# define COND_INIT(ctx, name)
|
|
64 |
# define COND_FREE(ctx, name)
|
|
65 |
#endif /* HW_USE_THREADS */ |
|
66 |
||
67 |
||
68 |
HWRunFunction ppu_run[] = { |
|
69 |
(HWRunFunction)NULL |
|
70 |
};
|
|
71 |
||
72 |
static int hw_sched_initialized = 0; |
|
73 |
||
49
by root
Merge /home/matthias/dev/pyHST |
74 |
int hw_sched_init(void) { |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
75 |
if (!hw_sched_initialized) { |
76 |
#ifdef HW_USE_THREADS
|
|
77 |
g_thread_init(NULL); |
|
78 |
#endif /* HW_USE_THREADS */ |
|
79 |
hw_sched_initialized = 1; |
|
80 |
}
|
|
81 |
||
82 |
return 0; |
|
83 |
}
|
|
84 |
||
85 |
||
49
by root
Merge /home/matthias/dev/pyHST |
86 |
int hw_sched_get_cpu_count(void) { |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
87 |
#ifdef HW_HAVE_SCHED_HEADERS
|
88 |
int err; |
|
89 |
||
90 |
int cpu_count; |
|
91 |
cpu_set_t mask; |
|
92 |
||
93 |
err = sched_getaffinity(getpid(), sizeof(mask), &mask); |
|
94 |
if (err) return 1; |
|
95 |
||
96 |
# ifdef CPU_COUNT
|
|
97 |
cpu_count = CPU_COUNT(&mask); |
|
98 |
# else
|
|
99 |
for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) { |
|
100 |
if (!CPU_ISSET(cpu_count, &mask)) break; |
|
101 |
}
|
|
102 |
# endif
|
|
103 |
||
104 |
if (!cpu_count) cpu_count = 1; |
|
105 |
return cpu_count; |
|
106 |
#else /* HW_HAVE_SCHED_HEADERS */ |
|
107 |
return 1; |
|
108 |
#endif /* HW_HAVE_SCHED_HEADERS */ |
|
109 |
}
|
|
110 |
||
111 |
||
112 |
HWSched hw_sched_create(int cpu_count) { |
|
113 |
int i; |
|
114 |
int err = 0; |
|
115 |
||
116 |
HWSched ctx; |
|
117 |
||
41
by csa
Bug fixes |
118 |
//hw_sched_init();
|
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
119 |
|
120 |
ctx = (HWSched)malloc(sizeof(HWSchedS)); |
|
121 |
if (!ctx) return NULL; |
|
122 |
||
123 |
memset(ctx, 0, sizeof(HWSchedS)); |
|
124 |
||
125 |
ctx->status = 1; |
|
126 |
||
32
by csa
Fix crash in FFTW3 initialization and cleanup in multi-threaded case |
127 |
MUTEX_INIT(ctx, sync); |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
128 |
MUTEX_INIT(ctx, data); |
129 |
COND_INIT(ctx, compl); |
|
130 |
COND_INIT(ctx, job); |
|
131 |
||
132 |
if (err) { |
|
133 |
pyhst_error("Error initializing conditions and mutexes"); |
|
134 |
hw_sched_destroy(ctx); |
|
135 |
return NULL; |
|
136 |
}
|
|
137 |
||
138 |
if (!cpu_count) cpu_count = hw_sched_get_cpu_count(); |
|
139 |
if (cpu_count > HW_MAX_THREADS) { |
|
140 |
pyhst_warning("Amount of requested threads %i is above limit, using %i", cpu_count, HW_MAX_THREADS); |
|
141 |
cpu_count = HW_MAX_THREADS; |
|
142 |
}
|
|
143 |
||
144 |
ctx->n_threads = 0; |
|
145 |
for (i = 0; i < cpu_count; i++) { |
|
146 |
ctx->thread[ctx->n_threads] = hw_thread_create(ctx, ctx->n_threads, NULL, ppu_run, NULL); |
|
147 |
if (ctx->thread[ctx->n_threads]) { |
|
148 |
#ifndef HW_USE_THREADS
|
|
173
by Suren A. Chilingaryan
Fix few bugs in scheduller causing crashes in non-threaded mode (still inoperational) |
149 |
ctx->thread[ctx->n_threads]->status = HW_THREAD_STATUS_STARTING; |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
150 |
#endif /* HW_USE_THREADS */ |
151 |
++ctx->n_threads; |
|
152 |
}
|
|
153 |
}
|
|
154 |
||
155 |
if (!ctx->n_threads) { |
|
156 |
hw_sched_destroy(ctx); |
|
157 |
return NULL; |
|
158 |
}
|
|
159 |
||
160 |
return ctx; |
|
161 |
}
|
|
162 |
||
163 |
static int hw_sched_wait_threads(HWSched ctx) { |
|
164 |
#ifdef HW_USE_THREADS
|
|
165 |
int i = 0; |
|
166 |
||
167 |
hw_sched_lock(ctx, compl_cond); |
|
168 |
while (i < ctx->n_threads) { |
|
169 |
for (; i < ctx->n_threads; i++) { |
|
170 |
if (ctx->thread[i]->status == HW_THREAD_STATUS_INIT) { |
|
171 |
hw_sched_wait(ctx, compl); |
|
172 |
break; |
|
173 |
}
|
|
174 |
}
|
|
175 |
||
176 |
}
|
|
177 |
hw_sched_unlock(ctx, compl_cond); |
|
178 |
#endif /* HW_USE_THREADS */ |
|
179 |
||
180 |
ctx->started = 1; |
|
181 |
||
182 |
return 0; |
|
183 |
}
|
|
184 |
||
185 |
void hw_sched_destroy(HWSched ctx) { |
|
186 |
int i; |
|
187 |
||
188 |
if (ctx->n_threads > 0) { |
|
189 |
if (!ctx->started) { |
|
190 |
hw_sched_wait_threads(ctx); |
|
191 |
}
|
|
192 |
||
193 |
ctx->status = 0; |
|
194 |
hw_sched_lock(ctx, job_cond); |
|
195 |
hw_sched_broadcast(ctx, job); |
|
196 |
hw_sched_unlock(ctx, job_cond); |
|
197 |
||
198 |
for (i = 0; i < ctx->n_threads; i++) { |
|
199 |
hw_thread_destroy(ctx->thread[i]); |
|
200 |
}
|
|
201 |
}
|
|
202 |
||
203 |
COND_FREE(ctx, job); |
|
204 |
COND_FREE(ctx, compl); |
|
205 |
MUTEX_FREE(ctx, data); |
|
32
by csa
Fix crash in FFTW3 initialization and cleanup in multi-threaded case |
206 |
MUTEX_FREE(ctx, sync); |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
207 |
|
208 |
free(ctx); |
|
209 |
}
|
|
210 |
||
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
211 |
int hw_sched_set_sequential_mode(HWSched ctx, int *n_blocks, int *cur_block, HWSchedFlags flags) { |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
212 |
ctx->mode = HW_SCHED_MODE_SEQUENTIAL; |
213 |
ctx->n_blocks = n_blocks; |
|
214 |
ctx->cur_block = cur_block; |
|
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
215 |
ctx->flags = flags; |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
216 |
|
217 |
return 0; |
|
218 |
}
|
|
219 |
||
220 |
int hw_sched_get_chunk(HWSched ctx, int thread_id) { |
|
221 |
int block; |
|
222 |
||
223 |
switch (ctx->mode) { |
|
224 |
case HW_SCHED_MODE_PREALLOCATED: |
|
156
by Suren A. Chilingaryan
Really-really return terminator chunk in all cases |
225 |
if (ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING) { |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
226 |
#ifndef HW_USE_THREADS
|
227 |
ctx->thread[thread_id]->status = HW_THREAD_STATUS_DONE; |
|
228 |
#endif /* HW_USE_THREADS */ |
|
173
by Suren A. Chilingaryan
Fix few bugs in scheduller causing crashes in non-threaded mode (still inoperational) |
229 |
return thread_id; |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
230 |
} else { |
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
231 |
return HW_SCHED_CHUNK_INVALID; |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
232 |
}
|
233 |
case HW_SCHED_MODE_SEQUENTIAL: |
|
156
by Suren A. Chilingaryan
Really-really return terminator chunk in all cases |
234 |
if ((ctx->flags&HW_SCHED_FLAG_INIT_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING)) { |
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
235 |
return HW_SCHED_CHUNK_INIT; |
236 |
}
|
|
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
237 |
hw_sched_lock(ctx, data); |
238 |
block = *ctx->cur_block; |
|
239 |
if (block < *ctx->n_blocks) { |
|
240 |
*ctx->cur_block = *ctx->cur_block + 1; |
|
241 |
} else { |
|
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
242 |
block = HW_SCHED_CHUNK_INVALID; |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
243 |
}
|
244 |
hw_sched_unlock(ctx, data); |
|
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
245 |
if (block == HW_SCHED_CHUNK_INVALID) { |
246 |
if (((ctx->flags&HW_SCHED_FLAG_FREE_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING))) { |
|
247 |
ctx->thread[thread_id]->status = HW_THREAD_STATUS_FINISHING; |
|
248 |
return HW_SCHED_CHUNK_FREE; |
|
249 |
}
|
|
250 |
if ((ctx->flags&HW_SCHED_FLAG_TERMINATOR_CALL)&&((ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING)||(ctx->thread[thread_id]->status == HW_THREAD_STATUS_FINISHING))) { |
|
251 |
int i; |
|
252 |
hw_sched_lock(ctx, data); |
|
253 |
for (i = 0; i < ctx->n_threads; i++) { |
|
254 |
if (thread_id == i) continue; |
|
156
by Suren A. Chilingaryan
Really-really return terminator chunk in all cases |
255 |
if ((ctx->thread[i]->status != HW_THREAD_STATUS_DONE)&&(ctx->thread[i]->status != HW_THREAD_STATUS_FINISHING2)&&(ctx->thread[i]->status != HW_THREAD_STATUS_IDLE)) { |
154
by Suren A. Chilingaryan
Really return terminator chunk from scheduler |
256 |
break; |
257 |
}
|
|
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
258 |
}
|
259 |
ctx->thread[thread_id]->status = HW_THREAD_STATUS_FINISHING2; |
|
260 |
hw_sched_unlock(ctx, data); |
|
261 |
if (i == ctx->n_threads) { |
|
262 |
return HW_SCHED_CHUNK_TERMINATOR; |
|
156
by Suren A. Chilingaryan
Really-really return terminator chunk in all cases |
263 |
}
|
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
264 |
}
|
265 |
}
|
|
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
266 |
return block; |
267 |
default: |
|
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
268 |
return HW_SCHED_CHUNK_INVALID; |
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
269 |
}
|
270 |
||
271 |
return -1; |
|
272 |
}
|
|
273 |
||
274 |
||
275 |
int hw_sched_schedule_task(HWSched ctx, void *appctx, HWEntry entry) { |
|
276 |
#ifdef HW_USE_THREADS
|
|
277 |
if (!ctx->started) { |
|
278 |
hw_sched_wait_threads(ctx); |
|
279 |
}
|
|
280 |
#else /* HW_USE_THREADS */ |
|
281 |
int err; |
|
282 |
int i, chunk_id, n_threads; |
|
283 |
HWRunFunction run; |
|
284 |
HWThread thrctx; |
|
285 |
#endif /* HW_USE_THREADS */ |
|
286 |
||
287 |
ctx->ctx = appctx; |
|
288 |
ctx->entry = entry; |
|
289 |
||
290 |
switch (ctx->mode) { |
|
291 |
case HW_SCHED_MODE_SEQUENTIAL: |
|
292 |
*ctx->cur_block = 0; |
|
293 |
break; |
|
294 |
default: |
|
295 |
;
|
|
296 |
}
|
|
297 |
||
298 |
#ifdef HW_USE_THREADS
|
|
299 |
hw_sched_lock(ctx, compl_cond); |
|
300 |
||
301 |
hw_sched_lock(ctx, job_cond); |
|
302 |
hw_sched_broadcast(ctx, job); |
|
303 |
hw_sched_unlock(ctx, job_cond); |
|
304 |
#else /* HW_USE_THREADS */ |
|
305 |
n_threads = ctx->n_threads; |
|
306 |
||
307 |
for (i = 0; i < n_threads; i++) { |
|
308 |
thrctx = ctx->thread[i]; |
|
309 |
thrctx->err = 0; |
|
310 |
}
|
|
311 |
||
312 |
i = 0; |
|
313 |
thrctx = ctx->thread[i]; |
|
314 |
chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id); |
|
315 |
||
316 |
while (chunk_id >= 0) { |
|
317 |
run = hw_run_entry(thrctx->runs, entry); |
|
318 |
err = run(thrctx, thrctx->hwctx, chunk_id, appctx); |
|
319 |
if (err) { |
|
320 |
thrctx->err = err; |
|
321 |
break; |
|
322 |
}
|
|
323 |
||
324 |
if ((++i) == n_threads) i = 0; |
|
325 |
thrctx = ctx->thread[i]; |
|
326 |
chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id); |
|
327 |
}
|
|
328 |
#endif /* HW_USE_THREADS */ |
|
329 |
||
330 |
return 0; |
|
331 |
}
|
|
332 |
||
333 |
int hw_sched_wait_task(HWSched ctx) { |
|
334 |
int err = 0; |
|
335 |
int i = 0, n_threads = ctx->n_threads; |
|
336 |
||
337 |
#ifdef HW_USE_THREADS
|
|
338 |
while (i < ctx->n_threads) { |
|
339 |
for (; i < ctx->n_threads; i++) { |
|
340 |
if (ctx->thread[i]->status == HW_THREAD_STATUS_DONE) { |
|
341 |
ctx->thread[i]->status = HW_THREAD_STATUS_IDLE; |
|
342 |
} else { |
|
343 |
hw_sched_wait(ctx, compl); |
|
344 |
break; |
|
345 |
}
|
|
346 |
}
|
|
347 |
||
348 |
}
|
|
349 |
||
350 |
hw_sched_unlock(ctx, compl_cond); |
|
351 |
#endif /* HW_USE_THREADS */ |
|
352 |
||
353 |
for (i = 0; i < n_threads; i++) { |
|
354 |
HWThread thrctx = ctx->thread[i]; |
|
355 |
if (thrctx->err) return err = thrctx->err; |
|
356 |
||
357 |
#ifndef HW_USE_THREADS
|
|
358 |
thrctx->status = HW_THREAD_STATUS_IDLE; |
|
359 |
#endif /* HW_USE_THREADS */ |
|
360 |
}
|
|
361 |
||
362 |
return err; |
|
363 |
}
|
|
364 |
||
365 |
int hw_sched_execute_task(HWSched ctx, void *appctx, HWEntry entry) { |
|
366 |
int err; |
|
367 |
||
368 |
err = hw_sched_schedule_task(ctx, appctx, entry); |
|
369 |
if (err) return err; |
|
370 |
||
371 |
return hw_sched_wait_task(ctx); |
|
372 |
}
|
|
373 |
||
374 |
int hw_sched_schedule_thread_task(HWSched ctx, void *appctx, HWEntry entry) { |
|
375 |
int err; |
|
376 |
||
377 |
ctx->saved_mode = ctx->mode; |
|
378 |
ctx->mode = HW_SCHED_MODE_PREALLOCATED; |
|
379 |
err = hw_sched_schedule_task(ctx, appctx, entry); |
|
380 |
||
381 |
return err; |
|
382 |
}
|
|
383 |
||
384 |
||
385 |
int hw_sched_wait_thread_task(HWSched ctx) { |
|
386 |
int err; |
|
387 |
||
388 |
err = hw_sched_wait_task(ctx); |
|
389 |
ctx->mode = ctx->saved_mode; |
|
151
by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices |
390 |
|
30
by csa
Multi-GPU, Multi-CPU, and Hybrid modes support |
391 |
return err; |
392 |
}
|
|
393 |
||
394 |
int hw_sched_execute_thread_task(HWSched ctx, void *appctx, HWEntry entry) { |
|
395 |
int err; |
|
396 |
int saved_mode = ctx->mode; |
|
397 |
||
398 |
ctx->mode = HW_SCHED_MODE_PREALLOCATED; |
|
399 |
err = hw_sched_execute_task(ctx, appctx, entry); |
|
400 |
ctx->mode = saved_mode; |
|
401 |
||
402 |
return err; |
|
403 |
}
|