/tomo/pyhst

To get this branch, use:
bzr branch http://darksoft.org/webbzr/tomo/pyhst
78 by Suren A. Chilingaryan
Add COPYING and fix license statements
1
/*
2
 * The PyHST program is Copyright (C) 2002-2011 of the
3
 * European Synchrotron Radiation Facility (ESRF) and
4
 * Karlsruhe Institute of Technology (KIT).
5
 *
6
 * PyHST is free software: you can redistribute it and/or modify it
7
 * under the terms of the GNU General Public License as published by the
8
 * Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * 
11
 * hst is distributed in the hope that it will be useful, but
12
 * WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14
 * See the GNU General Public License for more details.
15
 * 
16
 * You should have received a copy of the GNU General Public License along
17
 * with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 */
19
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
20
#define _GNU_SOURCE
21
#include <stdio.h>
22
#include <stdlib.h>
23
#include <string.h>
24
25
#ifdef HW_HAVE_SCHED_HEADERS
26
# include <sys/types.h>
27
# include <unistd.h>
28
# include <sched.h>
29
#endif /* HW_HAVE_SCHED_HEADERS */
30
31
#include "debug.h"
32
#include "hw_sched.h"
33
34
35
36
37
#ifdef HW_USE_THREADS
38
# define MUTEX_INIT(ctx, name) \
39
    if (!err) { \
40
	ctx->name##_mutex = g_mutex_new(); \
41
	if (!ctx->name##_mutex) err = 1; \
42
    }
43
    
44
# define MUTEX_FREE(ctx, name) \
45
    if (ctx->name##_mutex) g_mutex_free(ctx->name##_mutex);
46
47
# define COND_INIT(ctx, name) \
48
    MUTEX_INIT(ctx, name##_cond) \
49
    if (!err) { \
50
	ctx->name##_cond = g_cond_new(); \
51
	if (!ctx->name##_cond) { \
52
	    err = 1; \
53
	    MUTEX_FREE(ctx, name##_cond) \
54
	} \
55
    }
56
57
# define COND_FREE(ctx, name) \
58
    if (ctx->name##_cond) g_cond_free(ctx->name##_cond); \
59
    MUTEX_FREE(ctx, name##_cond)
60
#else /* HW_USE_THREADS */
61
# define MUTEX_INIT(ctx, name)
62
# define MUTEX_FREE(ctx, name)
63
# define COND_INIT(ctx, name)
64
# define COND_FREE(ctx, name)
65
#endif /* HW_USE_THREADS */
66
67
68
HWRunFunction ppu_run[] = {
69
    (HWRunFunction)NULL
70
};
71
72
static int hw_sched_initialized = 0;
73
49 by root
Merge /home/matthias/dev/pyHST
74
int hw_sched_init(void) {
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
75
    if (!hw_sched_initialized) {
76
#ifdef HW_USE_THREADS
77
	g_thread_init(NULL);
78
#endif /* HW_USE_THREADS */
79
	hw_sched_initialized = 1;
80
    }
81
82
    return 0;
83
}
84
85
49 by root
Merge /home/matthias/dev/pyHST
86
int hw_sched_get_cpu_count(void) {
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
87
#ifdef HW_HAVE_SCHED_HEADERS
88
    int err;
89
90
    int cpu_count;
91
    cpu_set_t mask;
92
93
    err = sched_getaffinity(getpid(), sizeof(mask), &mask);
94
    if (err) return 1;
95
96
# ifdef CPU_COUNT
97
    cpu_count = CPU_COUNT(&mask);
98
# else
99
    for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) {
100
	if (!CPU_ISSET(cpu_count, &mask)) break;
101
    }
102
# endif
103
104
    if (!cpu_count) cpu_count = 1;
105
    return cpu_count;    
106
#else /* HW_HAVE_SCHED_HEADERS */
107
    return 1;
108
#endif /* HW_HAVE_SCHED_HEADERS */
109
}
110
111
112
HWSched hw_sched_create(int cpu_count) {
113
    int i;
114
    int err = 0;
115
116
    HWSched ctx;
117
41 by csa
Bug fixes
118
    //hw_sched_init();
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
119
    
120
    ctx = (HWSched)malloc(sizeof(HWSchedS));
121
    if (!ctx) return NULL;
122
123
    memset(ctx, 0, sizeof(HWSchedS));
124
125
    ctx->status = 1;
126
32 by csa
Fix crash in FFTW3 initialization and cleanup in multi-threaded case
127
    MUTEX_INIT(ctx, sync);
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
128
    MUTEX_INIT(ctx, data);
129
    COND_INIT(ctx, compl);
130
    COND_INIT(ctx, job);
131
    
132
    if (err) {
133
	pyhst_error("Error initializing conditions and mutexes");
134
	hw_sched_destroy(ctx);
135
	return NULL;
136
    }
137
    
138
    if (!cpu_count) cpu_count = hw_sched_get_cpu_count();
139
    if (cpu_count > HW_MAX_THREADS) {
140
	pyhst_warning("Amount of requested threads %i is above limit, using %i", cpu_count, HW_MAX_THREADS);
141
	cpu_count = HW_MAX_THREADS;
142
    }
143
144
    ctx->n_threads = 0;
145
    for (i = 0; i < cpu_count; i++) {
146
	ctx->thread[ctx->n_threads] = hw_thread_create(ctx, ctx->n_threads, NULL, ppu_run, NULL);
147
	if (ctx->thread[ctx->n_threads]) {
148
#ifndef HW_USE_THREADS
173 by Suren A. Chilingaryan
Fix few bugs in scheduller causing crashes in non-threaded mode (still inoperational)
149
	    ctx->thread[ctx->n_threads]->status = HW_THREAD_STATUS_STARTING;
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
150
#endif /* HW_USE_THREADS */
151
	    ++ctx->n_threads;
152
	}
153
    }
154
    
155
    if (!ctx->n_threads) {
156
	hw_sched_destroy(ctx);
157
	return NULL;
158
    }
159
    
160
    return ctx;
161
}
162
163
static int hw_sched_wait_threads(HWSched ctx) {
164
#ifdef HW_USE_THREADS
165
    int i = 0;
166
    
167
    hw_sched_lock(ctx, compl_cond);
168
    while (i < ctx->n_threads) {
169
        for (; i < ctx->n_threads; i++) {
170
	    if (ctx->thread[i]->status == HW_THREAD_STATUS_INIT) {
171
		hw_sched_wait(ctx, compl);
172
		break;
173
	    }
174
	}
175
	
176
    }
177
    hw_sched_unlock(ctx, compl_cond);
178
#endif /* HW_USE_THREADS */
179
    
180
    ctx->started = 1;
181
182
    return 0;
183
}
184
185
void hw_sched_destroy(HWSched ctx) {
186
    int i;
187
188
    if (ctx->n_threads > 0) {
189
	if (!ctx->started) {
190
	    hw_sched_wait_threads(ctx);
191
	}
192
193
	ctx->status = 0;
194
	hw_sched_lock(ctx, job_cond);
195
	hw_sched_broadcast(ctx, job);
196
	hw_sched_unlock(ctx, job_cond);
197
    
198
	for (i = 0; i < ctx->n_threads; i++) {
199
	    hw_thread_destroy(ctx->thread[i]);
200
	}
201
    }
202
203
    COND_FREE(ctx, job);
204
    COND_FREE(ctx, compl);
205
    MUTEX_FREE(ctx, data);
32 by csa
Fix crash in FFTW3 initialization and cleanup in multi-threaded case
206
    MUTEX_FREE(ctx, sync);
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
207
208
    free(ctx);
209
}
210
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
211
int hw_sched_set_sequential_mode(HWSched ctx, int *n_blocks, int *cur_block, HWSchedFlags flags) {
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
212
    ctx->mode = HW_SCHED_MODE_SEQUENTIAL;
213
    ctx->n_blocks = n_blocks;
214
    ctx->cur_block = cur_block;
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
215
    ctx->flags = flags;
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
216
    
217
    return 0;
218
}
219
220
int hw_sched_get_chunk(HWSched ctx, int thread_id) {
221
    int block;
222
223
    switch (ctx->mode) {
224
	case HW_SCHED_MODE_PREALLOCATED:
156 by Suren A. Chilingaryan
Really-really return terminator chunk in all cases
225
	    if (ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING) {
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
226
#ifndef HW_USE_THREADS
227
	        ctx->thread[thread_id]->status = HW_THREAD_STATUS_DONE;
228
#endif /* HW_USE_THREADS */
173 by Suren A. Chilingaryan
Fix few bugs in scheduller causing crashes in non-threaded mode (still inoperational)
229
                return thread_id;
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
230
	    } else {
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
231
		return HW_SCHED_CHUNK_INVALID;
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
232
	    }
233
	case HW_SCHED_MODE_SEQUENTIAL:
156 by Suren A. Chilingaryan
Really-really return terminator chunk in all cases
234
	    if ((ctx->flags&HW_SCHED_FLAG_INIT_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING)) {
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
235
	        return HW_SCHED_CHUNK_INIT;
236
	    }
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
237
	    hw_sched_lock(ctx, data);
238
	    block = *ctx->cur_block;
239
	    if (block < *ctx->n_blocks) {
240
		*ctx->cur_block = *ctx->cur_block + 1;
241
	    } else {
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
242
		block = HW_SCHED_CHUNK_INVALID;
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
243
	    }
244
	    hw_sched_unlock(ctx, data);
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
245
	    if (block == HW_SCHED_CHUNK_INVALID) {
246
	        if (((ctx->flags&HW_SCHED_FLAG_FREE_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING))) {
247
	            ctx->thread[thread_id]->status = HW_THREAD_STATUS_FINISHING;
248
	            return HW_SCHED_CHUNK_FREE;
249
	        }
250
	        if ((ctx->flags&HW_SCHED_FLAG_TERMINATOR_CALL)&&((ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING)||(ctx->thread[thread_id]->status == HW_THREAD_STATUS_FINISHING))) {
251
	            int i;
252
	            hw_sched_lock(ctx, data);
253
	            for (i = 0; i < ctx->n_threads; i++) {
254
	                if (thread_id == i) continue;
156 by Suren A. Chilingaryan
Really-really return terminator chunk in all cases
255
	                if ((ctx->thread[i]->status != HW_THREAD_STATUS_DONE)&&(ctx->thread[i]->status != HW_THREAD_STATUS_FINISHING2)&&(ctx->thread[i]->status != HW_THREAD_STATUS_IDLE)) {
154 by Suren A. Chilingaryan
Really return terminator chunk from scheduler
256
	            	    break;
257
	            	}
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
258
	            }
259
	            ctx->thread[thread_id]->status  = HW_THREAD_STATUS_FINISHING2;
260
	            hw_sched_unlock(ctx, data);
261
	            if (i == ctx->n_threads) {
262
	                return HW_SCHED_CHUNK_TERMINATOR;
156 by Suren A. Chilingaryan
Really-really return terminator chunk in all cases
263
	            } 
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
264
	        }
265
	    }
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
266
	    return block;
267
	default:
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
268
	    return HW_SCHED_CHUNK_INVALID;
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
269
    }
270
271
    return -1;
272
}
273
274
    
275
int hw_sched_schedule_task(HWSched ctx, void *appctx, HWEntry entry) {
276
#ifdef HW_USE_THREADS
277
    if (!ctx->started) {
278
	hw_sched_wait_threads(ctx);
279
    }
280
#else /* HW_USE_THREADS */
281
    int err;
282
    int i, chunk_id, n_threads;
283
    HWRunFunction run;
284
    HWThread thrctx;
285
#endif /* HW_USE_THREADS */
286
    
287
    ctx->ctx = appctx;
288
    ctx->entry = entry;
289
290
    switch (ctx->mode) {
291
	case HW_SCHED_MODE_SEQUENTIAL:
292
	    *ctx->cur_block = 0;
293
	break;
294
	default:
295
	    ;
296
    }
297
        
298
#ifdef HW_USE_THREADS
299
    hw_sched_lock(ctx, compl_cond);
300
301
    hw_sched_lock(ctx, job_cond);
302
    hw_sched_broadcast(ctx, job);
303
    hw_sched_unlock(ctx, job_cond);
304
#else  /* HW_USE_THREADS */
305
    n_threads = ctx->n_threads;
306
    
307
    for (i = 0; i < n_threads; i++) {
308
	thrctx = ctx->thread[i];
309
	thrctx->err = 0;
310
    }
311
312
    i = 0;
313
    thrctx = ctx->thread[i];
314
    chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id);
315
316
    while (chunk_id >= 0) {
317
	run = hw_run_entry(thrctx->runs, entry);
318
        err = run(thrctx, thrctx->hwctx, chunk_id, appctx);
319
	if (err) {
320
	    thrctx->err = err;
321
	    break;
322
	}
323
	
324
	if ((++i) == n_threads) i = 0;
325
	thrctx = ctx->thread[i];
326
        chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id);
327
    }
328
#endif /* HW_USE_THREADS */
329
330
    return 0;
331
}
332
333
int hw_sched_wait_task(HWSched ctx) {
334
    int err = 0;
335
    int i = 0, n_threads = ctx->n_threads;
336
337
#ifdef HW_USE_THREADS
338
    while (i < ctx->n_threads) {
339
        for (; i < ctx->n_threads; i++) {
340
	    if (ctx->thread[i]->status == HW_THREAD_STATUS_DONE) {
341
		ctx->thread[i]->status = HW_THREAD_STATUS_IDLE;
342
	    } else {
343
		hw_sched_wait(ctx, compl);
344
		break;
345
	    }
346
	}
347
	
348
    }
349
350
    hw_sched_unlock(ctx, compl_cond);
351
#endif /* HW_USE_THREADS */
352
353
    for (i = 0; i < n_threads; i++) {
354
	HWThread thrctx = ctx->thread[i];
355
	if (thrctx->err) return err = thrctx->err;
356
357
#ifndef HW_USE_THREADS
358
        thrctx->status = HW_THREAD_STATUS_IDLE;
359
#endif /* HW_USE_THREADS */
360
    }
361
362
    return err;
363
}
364
365
int hw_sched_execute_task(HWSched ctx, void *appctx, HWEntry entry) {
366
    int err;
367
    
368
    err = hw_sched_schedule_task(ctx, appctx, entry);
369
    if (err) return err;
370
    
371
    return hw_sched_wait_task(ctx);
372
}
373
374
int hw_sched_schedule_thread_task(HWSched ctx, void *appctx, HWEntry entry) {
375
    int err;
376
    
377
    ctx->saved_mode = ctx->mode;
378
    ctx->mode = HW_SCHED_MODE_PREALLOCATED;
379
    err = hw_sched_schedule_task(ctx, appctx, entry);
380
    
381
    return err;
382
}
383
384
385
int hw_sched_wait_thread_task(HWSched ctx) {
386
    int err;
387
388
    err = hw_sched_wait_task(ctx);
389
    ctx->mode = ctx->saved_mode;
151 by Suren A. Chilingaryan
Multislice mode: preload into the GPU memory complete slices
390
30 by csa
Multi-GPU, Multi-CPU, and Hybrid modes support
391
    return err;
392
}
393
394
int hw_sched_execute_thread_task(HWSched ctx, void *appctx, HWEntry entry) {
395
    int err;
396
    int saved_mode = ctx->mode;
397
398
    ctx->mode = HW_SCHED_MODE_PREALLOCATED;
399
    err = hw_sched_execute_task(ctx, appctx, entry);
400
    ctx->mode = saved_mode;
401
    
402
    return err;
403
}