/tomo/pyhst : contents of hw_sched.c at revision 276

: (revision 276)

To get this branch, use:

bzr branch
http://darksoft.org/webbzr/tomo/pyhst

/*
 * The PyHST program is Copyright (C) 2002-2011 of the
 * European Synchrotron Radiation Facility (ESRF) and
 * Karlsruhe Institute of Technology (KIT).
 *
 * PyHST is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * hst is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifdef HW_HAVE_SCHED_HEADERS
# include <sys/types.h>
# include <unistd.h>
# include <sched.h>
#endif /* HW_HAVE_SCHED_HEADERS */

#include "debug.h"
#include "hw_sched.h"




#ifdef HW_USE_THREADS
# define MUTEX_INIT(ctx, name) \
    if (!err) { \
	ctx->name##_mutex = g_mutex_new(); \
	if (!ctx->name##_mutex) err = 1; \
    }
    
# define MUTEX_FREE(ctx, name) \
    if (ctx->name##_mutex) g_mutex_free(ctx->name##_mutex);

# define COND_INIT(ctx, name) \
    MUTEX_INIT(ctx, name##_cond) \
    if (!err) { \
	ctx->name##_cond = g_cond_new(); \
	if (!ctx->name##_cond) { \
	    err = 1; \
	    MUTEX_FREE(ctx, name##_cond) \
	} \
    }

# define COND_FREE(ctx, name) \
    if (ctx->name##_cond) g_cond_free(ctx->name##_cond); \
    MUTEX_FREE(ctx, name##_cond)
#else /* HW_USE_THREADS */
# define MUTEX_INIT(ctx, name)
# define MUTEX_FREE(ctx, name)
# define COND_INIT(ctx, name)
# define COND_FREE(ctx, name)
#endif /* HW_USE_THREADS */


HWRunFunction ppu_run[] = {
    (HWRunFunction)NULL
};

static int hw_sched_initialized = 0;

int hw_sched_init(void) {
    if (!hw_sched_initialized) {
#ifdef HW_USE_THREADS
	g_thread_init(NULL);
#endif /* HW_USE_THREADS */
	hw_sched_initialized = 1;
    }

    return 0;
}


int hw_sched_get_cpu_count(void) {
#ifdef HW_HAVE_SCHED_HEADERS
    int err;

    int cpu_count;
    cpu_set_t mask;

    err = sched_getaffinity(getpid(), sizeof(mask), &mask);
    if (err) return 1;

# ifdef CPU_COUNT
    cpu_count = CPU_COUNT(&mask);
# else
    for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) {
	if (!CPU_ISSET(cpu_count, &mask)) break;
    }
# endif

    if (!cpu_count) cpu_count = 1;
    return cpu_count;    
#else /* HW_HAVE_SCHED_HEADERS */
    return 1;
#endif /* HW_HAVE_SCHED_HEADERS */
}


HWSched hw_sched_create(int cpu_count) {
    int i;
    int err = 0;

    HWSched ctx;

    //hw_sched_init();
    
    ctx = (HWSched)malloc(sizeof(HWSchedS));
    if (!ctx) return NULL;

    memset(ctx, 0, sizeof(HWSchedS));

    ctx->status = 1;

    MUTEX_INIT(ctx, sync);
    MUTEX_INIT(ctx, data);
    COND_INIT(ctx, compl);
    COND_INIT(ctx, job);
    
    if (err) {
	pyhst_error("Error initializing conditions and mutexes");
	hw_sched_destroy(ctx);
	return NULL;
    }
    
    if (!cpu_count) cpu_count = hw_sched_get_cpu_count();
    if (cpu_count > HW_MAX_THREADS) {
	pyhst_warning("Amount of requested threads %i is above limit, using %i", cpu_count, HW_MAX_THREADS);
	cpu_count = HW_MAX_THREADS;
    }

    ctx->n_threads = 0;
    for (i = 0; i < cpu_count; i++) {
	ctx->thread[ctx->n_threads] = hw_thread_create(ctx, ctx->n_threads, NULL, ppu_run, NULL);
	if (ctx->thread[ctx->n_threads]) {
#ifndef HW_USE_THREADS
	    ctx->thread[ctx->n_threads]->status = HW_THREAD_STATUS_STARTING;
#endif /* HW_USE_THREADS */
	    ++ctx->n_threads;
	}
    }
    
    if (!ctx->n_threads) {
	hw_sched_destroy(ctx);
	return NULL;
    }
    
    return ctx;
}

static int hw_sched_wait_threads(HWSched ctx) {
#ifdef HW_USE_THREADS
    int i = 0;
    
    hw_sched_lock(ctx, compl_cond);
    while (i < ctx->n_threads) {
        for (; i < ctx->n_threads; i++) {
	    if (ctx->thread[i]->status == HW_THREAD_STATUS_INIT) {
		hw_sched_wait(ctx, compl);
		break;
	    }
	}
	
    }
    hw_sched_unlock(ctx, compl_cond);
#endif /* HW_USE_THREADS */
    
    ctx->started = 1;

    return 0;
}

void hw_sched_destroy(HWSched ctx) {
    int i;

    if (ctx->n_threads > 0) {
	if (!ctx->started) {
	    hw_sched_wait_threads(ctx);
	}

	ctx->status = 0;
	hw_sched_lock(ctx, job_cond);
	hw_sched_broadcast(ctx, job);
	hw_sched_unlock(ctx, job_cond);
    
	for (i = 0; i < ctx->n_threads; i++) {
	    hw_thread_destroy(ctx->thread[i]);
	}
    }

    COND_FREE(ctx, job);
    COND_FREE(ctx, compl);
    MUTEX_FREE(ctx, data);
    MUTEX_FREE(ctx, sync);

    free(ctx);
}

int hw_sched_set_sequential_mode(HWSched ctx, int *n_blocks, int *cur_block, HWSchedFlags flags) {
    ctx->mode = HW_SCHED_MODE_SEQUENTIAL;
    ctx->n_blocks = n_blocks;
    ctx->cur_block = cur_block;
    ctx->flags = flags;
    
    return 0;
}

int hw_sched_get_chunk(HWSched ctx, int thread_id) {
    int block;

    switch (ctx->mode) {
	case HW_SCHED_MODE_PREALLOCATED:
	    if (ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING) {
#ifndef HW_USE_THREADS
	        ctx->thread[thread_id]->status = HW_THREAD_STATUS_DONE;
#endif /* HW_USE_THREADS */
                return thread_id;
	    } else {
		return HW_SCHED_CHUNK_INVALID;
	    }
	case HW_SCHED_MODE_SEQUENTIAL:
	    if ((ctx->flags&HW_SCHED_FLAG_INIT_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING)) {
	        return HW_SCHED_CHUNK_INIT;
	    }
	    hw_sched_lock(ctx, data);
	    block = *ctx->cur_block;
	    if (block < *ctx->n_blocks) {
		*ctx->cur_block = *ctx->cur_block + 1;
	    } else {
		block = HW_SCHED_CHUNK_INVALID;
	    }
	    hw_sched_unlock(ctx, data);
	    if (block == HW_SCHED_CHUNK_INVALID) {
	        if (((ctx->flags&HW_SCHED_FLAG_FREE_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING))) {
	            ctx->thread[thread_id]->status = HW_THREAD_STATUS_FINISHING;
	            return HW_SCHED_CHUNK_FREE;
	        }
	        if ((ctx->flags&HW_SCHED_FLAG_TERMINATOR_CALL)&&((ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING)||(ctx->thread[thread_id]->status == HW_THREAD_STATUS_FINISHING))) {
	            int i;
	            hw_sched_lock(ctx, data);
	            for (i = 0; i < ctx->n_threads; i++) {
	                if (thread_id == i) continue;
	                if ((ctx->thread[i]->status != HW_THREAD_STATUS_DONE)&&(ctx->thread[i]->status != HW_THREAD_STATUS_FINISHING2)&&(ctx->thread[i]->status != HW_THREAD_STATUS_IDLE)) {
	            	    break;
	            	}
	            }
	            ctx->thread[thread_id]->status  = HW_THREAD_STATUS_FINISHING2;
	            hw_sched_unlock(ctx, data);
	            if (i == ctx->n_threads) {
	                return HW_SCHED_CHUNK_TERMINATOR;
	            } 
	        }
	    }
	    return block;
	default:
	    return HW_SCHED_CHUNK_INVALID;
    }

    return -1;
}

    
int hw_sched_schedule_task(HWSched ctx, void *appctx, HWEntry entry) {
#ifdef HW_USE_THREADS
    if (!ctx->started) {
	hw_sched_wait_threads(ctx);
    }
#else /* HW_USE_THREADS */
    int err;
    int i, chunk_id, n_threads;
    HWRunFunction run;
    HWThread thrctx;
#endif /* HW_USE_THREADS */
    
    ctx->ctx = appctx;
    ctx->entry = entry;

    switch (ctx->mode) {
	case HW_SCHED_MODE_SEQUENTIAL:
	    *ctx->cur_block = 0;
	break;
	default:
	    ;
    }
        
#ifdef HW_USE_THREADS
    hw_sched_lock(ctx, compl_cond);

    hw_sched_lock(ctx, job_cond);
    hw_sched_broadcast(ctx, job);
    hw_sched_unlock(ctx, job_cond);
#else  /* HW_USE_THREADS */
    n_threads = ctx->n_threads;
    
    for (i = 0; i < n_threads; i++) {
	thrctx = ctx->thread[i];
	thrctx->err = 0;
    }

    i = 0;
    thrctx = ctx->thread[i];
    chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id);

    while (chunk_id >= 0) {
	run = hw_run_entry(thrctx->runs, entry);
        err = run(thrctx, thrctx->hwctx, chunk_id, appctx);
	if (err) {
	    thrctx->err = err;
	    break;
	}
	
	if ((++i) == n_threads) i = 0;
	thrctx = ctx->thread[i];
        chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id);
    }
#endif /* HW_USE_THREADS */

    return 0;
}

int hw_sched_wait_task(HWSched ctx) {
    int err = 0;
    int i = 0, n_threads = ctx->n_threads;

#ifdef HW_USE_THREADS
    while (i < ctx->n_threads) {
        for (; i < ctx->n_threads; i++) {
	    if (ctx->thread[i]->status == HW_THREAD_STATUS_DONE) {
		ctx->thread[i]->status = HW_THREAD_STATUS_IDLE;
	    } else {
		hw_sched_wait(ctx, compl);
		break;
	    }
	}
	
    }

    hw_sched_unlock(ctx, compl_cond);
#endif /* HW_USE_THREADS */

    for (i = 0; i < n_threads; i++) {
	HWThread thrctx = ctx->thread[i];
	if (thrctx->err) return err = thrctx->err;

#ifndef HW_USE_THREADS
        thrctx->status = HW_THREAD_STATUS_IDLE;
#endif /* HW_USE_THREADS */
    }

    return err;
}

int hw_sched_execute_task(HWSched ctx, void *appctx, HWEntry entry) {
    int err;
    
    err = hw_sched_schedule_task(ctx, appctx, entry);
    if (err) return err;
    
    return hw_sched_wait_task(ctx);
}

int hw_sched_schedule_thread_task(HWSched ctx, void *appctx, HWEntry entry) {
    int err;
    
    ctx->saved_mode = ctx->mode;
    ctx->mode = HW_SCHED_MODE_PREALLOCATED;
    err = hw_sched_schedule_task(ctx, appctx, entry);
    
    return err;
}


int hw_sched_wait_thread_task(HWSched ctx) {
    int err;

    err = hw_sched_wait_task(ctx);
    ctx->mode = ctx->saved_mode;

    return err;
}

int hw_sched_execute_thread_task(HWSched ctx, void *appctx, HWEntry entry) {
    int err;
    int saved_mode = ctx->mode;

    ctx->mode = HW_SCHED_MODE_PREALLOCATED;
    err = hw_sched_execute_task(ctx, appctx, entry);
    ctx->mode = saved_mode;
    
    return err;
}

78 by Suren A. Chilingaryan Add COPYING and fix license statements	1	/*
	2	* The PyHST program is Copyright (C) 2002-2011 of the
	3	* European Synchrotron Radiation Facility (ESRF) and
	4	* Karlsruhe Institute of Technology (KIT).
	5	*
	6	* PyHST is free software: you can redistribute it and/or modify it
	7	* under the terms of the GNU General Public License as published by the
	8	* Free Software Foundation, either version 3 of the License, or
	9	* (at your option) any later version.
	10	*
	11	* hst is distributed in the hope that it will be useful, but
	12	* WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	14	* See the GNU General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU General Public License along
	17	* with this program. If not, see <http://www.gnu.org/licenses/>.
	18	*/
	19
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	20	#define _GNU_SOURCE
	21	#include <stdio.h>
	22	#include <stdlib.h>
	23	#include <string.h>
	24
	25	#ifdef HW_HAVE_SCHED_HEADERS
	26	# include <sys/types.h>
	27	# include <unistd.h>
	28	# include <sched.h>
	29	#endif /* HW_HAVE_SCHED_HEADERS */
	30
	31	#include "debug.h"
	32	#include "hw_sched.h"
	33
	34
	35
	36
	37	#ifdef HW_USE_THREADS
	38	# define MUTEX_INIT(ctx, name) \
	39	if (!err) { \
	40	ctx->name##_mutex = g_mutex_new(); \
	41	if (!ctx->name##_mutex) err = 1; \
	42	}
	43
	44	# define MUTEX_FREE(ctx, name) \
	45	if (ctx->name##_mutex) g_mutex_free(ctx->name##_mutex);
	46
	47	# define COND_INIT(ctx, name) \
	48	MUTEX_INIT(ctx, name##_cond) \
	49	if (!err) { \
	50	ctx->name##_cond = g_cond_new(); \
	51	if (!ctx->name##_cond) { \
	52	err = 1; \
	53	MUTEX_FREE(ctx, name##_cond) \
	54	} \
	55	}
	56
	57	# define COND_FREE(ctx, name) \
	58	if (ctx->name##_cond) g_cond_free(ctx->name##_cond); \
	59	MUTEX_FREE(ctx, name##_cond)
	60	#else /* HW_USE_THREADS */
	61	# define MUTEX_INIT(ctx, name)
	62	# define MUTEX_FREE(ctx, name)
	63	# define COND_INIT(ctx, name)
	64	# define COND_FREE(ctx, name)
	65	#endif /* HW_USE_THREADS */
	66
	67
	68	HWRunFunction ppu_run[] = {
	69	(HWRunFunction)NULL
	70	};
	71
	72	static int hw_sched_initialized = 0;
	73
49 by root Merge /home/matthias/dev/pyHST	74	int hw_sched_init(void) {
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	75	if (!hw_sched_initialized) {
	76	#ifdef HW_USE_THREADS
	77	g_thread_init(NULL);
	78	#endif /* HW_USE_THREADS */
	79	hw_sched_initialized = 1;
	80	}
	81
	82	return 0;
	83	}
	84
	85
49 by root Merge /home/matthias/dev/pyHST	86	int hw_sched_get_cpu_count(void) {
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	87	#ifdef HW_HAVE_SCHED_HEADERS
	88	int err;
	89
	90	int cpu_count;
	91	cpu_set_t mask;
	92
	93	err = sched_getaffinity(getpid(), sizeof(mask), &mask);
	94	if (err) return 1;
	95
	96	# ifdef CPU_COUNT
	97	cpu_count = CPU_COUNT(&mask);
	98	# else
	99	for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) {
	100	if (!CPU_ISSET(cpu_count, &mask)) break;
	101	}
	102	# endif
	103
	104	if (!cpu_count) cpu_count = 1;
	105	return cpu_count;
	106	#else /* HW_HAVE_SCHED_HEADERS */
	107	return 1;
	108	#endif /* HW_HAVE_SCHED_HEADERS */
	109	}
	110
	111
	112	HWSched hw_sched_create(int cpu_count) {
	113	int i;
	114	int err = 0;
	115
	116	HWSched ctx;
	117
41 by csa Bug fixes	118	//hw_sched_init();
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	119
	120	ctx = (HWSched)malloc(sizeof(HWSchedS));
	121	if (!ctx) return NULL;
	122
	123	memset(ctx, 0, sizeof(HWSchedS));
	124
	125	ctx->status = 1;
	126
32 by csa Fix crash in FFTW3 initialization and cleanup in multi-threaded case	127	MUTEX_INIT(ctx, sync);
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	128	MUTEX_INIT(ctx, data);
	129	COND_INIT(ctx, compl);
	130	COND_INIT(ctx, job);
	131
	132	if (err) {
	133	pyhst_error("Error initializing conditions and mutexes");
	134	hw_sched_destroy(ctx);
	135	return NULL;
	136	}
	137
	138	if (!cpu_count) cpu_count = hw_sched_get_cpu_count();
	139	if (cpu_count > HW_MAX_THREADS) {
	140	pyhst_warning("Amount of requested threads %i is above limit, using %i", cpu_count, HW_MAX_THREADS);
	141	cpu_count = HW_MAX_THREADS;
	142	}
	143
	144	ctx->n_threads = 0;
	145	for (i = 0; i < cpu_count; i++) {
	146	ctx->thread[ctx->n_threads] = hw_thread_create(ctx, ctx->n_threads, NULL, ppu_run, NULL);
	147	if (ctx->thread[ctx->n_threads]) {
	148	#ifndef HW_USE_THREADS
173 by Suren A. Chilingaryan Fix few bugs in scheduller causing crashes in non-threaded mode (still inoperational)	149	ctx->thread[ctx->n_threads]->status = HW_THREAD_STATUS_STARTING;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	150	#endif /* HW_USE_THREADS */
	151	++ctx->n_threads;
	152	}
	153	}
	154
	155	if (!ctx->n_threads) {
	156	hw_sched_destroy(ctx);
	157	return NULL;
	158	}
	159
	160	return ctx;
	161	}
	162
	163	static int hw_sched_wait_threads(HWSched ctx) {
	164	#ifdef HW_USE_THREADS
	165	int i = 0;
	166
	167	hw_sched_lock(ctx, compl_cond);
	168	while (i < ctx->n_threads) {
	169	for (; i < ctx->n_threads; i++) {
	170	if (ctx->thread[i]->status == HW_THREAD_STATUS_INIT) {
	171	hw_sched_wait(ctx, compl);
	172	break;
	173	}
	174	}
	175
	176	}
	177	hw_sched_unlock(ctx, compl_cond);
	178	#endif /* HW_USE_THREADS */
	179
	180	ctx->started = 1;
	181
	182	return 0;
	183	}
	184
	185	void hw_sched_destroy(HWSched ctx) {
	186	int i;
	187
	188	if (ctx->n_threads > 0) {
	189	if (!ctx->started) {
	190	hw_sched_wait_threads(ctx);
	191	}
	192
	193	ctx->status = 0;
	194	hw_sched_lock(ctx, job_cond);
	195	hw_sched_broadcast(ctx, job);
	196	hw_sched_unlock(ctx, job_cond);
	197
	198	for (i = 0; i < ctx->n_threads; i++) {
	199	hw_thread_destroy(ctx->thread[i]);
	200	}
	201	}
	202
	203	COND_FREE(ctx, job);
	204	COND_FREE(ctx, compl);
	205	MUTEX_FREE(ctx, data);
32 by csa Fix crash in FFTW3 initialization and cleanup in multi-threaded case	206	MUTEX_FREE(ctx, sync);
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	207
	208	free(ctx);
	209	}
	210
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	211	int hw_sched_set_sequential_mode(HWSched ctx, int n_blocks, int cur_block, HWSchedFlags flags) {
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	212	ctx->mode = HW_SCHED_MODE_SEQUENTIAL;
	213	ctx->n_blocks = n_blocks;
	214	ctx->cur_block = cur_block;
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	215	ctx->flags = flags;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	216
	217	return 0;
	218	}
	219
	220	int hw_sched_get_chunk(HWSched ctx, int thread_id) {
	221	int block;
	222
	223	switch (ctx->mode) {
	224	case HW_SCHED_MODE_PREALLOCATED:
156 by Suren A. Chilingaryan Really-really return terminator chunk in all cases	225	if (ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING) {
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	226	#ifndef HW_USE_THREADS
	227	ctx->thread[thread_id]->status = HW_THREAD_STATUS_DONE;
	228	#endif /* HW_USE_THREADS */
173 by Suren A. Chilingaryan Fix few bugs in scheduller causing crashes in non-threaded mode (still inoperational)	229	return thread_id;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	230	} else {
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	231	return HW_SCHED_CHUNK_INVALID;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	232	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	233	case HW_SCHED_MODE_SEQUENTIAL:
156 by Suren A. Chilingaryan Really-really return terminator chunk in all cases	234	if ((ctx->flags&HW_SCHED_FLAG_INIT_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_STARTING)) {
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	235	return HW_SCHED_CHUNK_INIT;
	236	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	237	hw_sched_lock(ctx, data);
	238	block = *ctx->cur_block;
	239	if (block < *ctx->n_blocks) {
	240	ctx->cur_block = ctx->cur_block + 1;
	241	} else {
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	242	block = HW_SCHED_CHUNK_INVALID;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	243	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	244	hw_sched_unlock(ctx, data);
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	245	if (block == HW_SCHED_CHUNK_INVALID) {
	246	if (((ctx->flags&HW_SCHED_FLAG_FREE_CALL)&&(ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING))) {
	247	ctx->thread[thread_id]->status = HW_THREAD_STATUS_FINISHING;
	248	return HW_SCHED_CHUNK_FREE;
	249	}
	250	if ((ctx->flags&HW_SCHED_FLAG_TERMINATOR_CALL)&&((ctx->thread[thread_id]->status == HW_THREAD_STATUS_RUNNING)\|\|(ctx->thread[thread_id]->status == HW_THREAD_STATUS_FINISHING))) {
	251	int i;
	252	hw_sched_lock(ctx, data);
	253	for (i = 0; i < ctx->n_threads; i++) {
	254	if (thread_id == i) continue;
156 by Suren A. Chilingaryan Really-really return terminator chunk in all cases	255	if ((ctx->thread[i]->status != HW_THREAD_STATUS_DONE)&&(ctx->thread[i]->status != HW_THREAD_STATUS_FINISHING2)&&(ctx->thread[i]->status != HW_THREAD_STATUS_IDLE)) {
154 by Suren A. Chilingaryan Really return terminator chunk from scheduler	256	break;
	257	}
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	258	}
	259	ctx->thread[thread_id]->status = HW_THREAD_STATUS_FINISHING2;
	260	hw_sched_unlock(ctx, data);
	261	if (i == ctx->n_threads) {
	262	return HW_SCHED_CHUNK_TERMINATOR;
156 by Suren A. Chilingaryan Really-really return terminator chunk in all cases	263	}
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	264	}
	265	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	266	return block;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	267	default:
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	268	return HW_SCHED_CHUNK_INVALID;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	269	}
	270
	271	return -1;
	272	}
	273
	274
	275	int hw_sched_schedule_task(HWSched ctx, void *appctx, HWEntry entry) {
	276	#ifdef HW_USE_THREADS
	277	if (!ctx->started) {
	278	hw_sched_wait_threads(ctx);
	279	}
	280	#else /* HW_USE_THREADS */
	281	int err;
	282	int i, chunk_id, n_threads;
	283	HWRunFunction run;
	284	HWThread thrctx;
	285	#endif /* HW_USE_THREADS */
	286
	287	ctx->ctx = appctx;
	288	ctx->entry = entry;
	289
	290	switch (ctx->mode) {
	291	case HW_SCHED_MODE_SEQUENTIAL:
	292	*ctx->cur_block = 0;
	293	break;
	294	default:
	295	;
	296	}
	297
	298	#ifdef HW_USE_THREADS
	299	hw_sched_lock(ctx, compl_cond);
	300
	301	hw_sched_lock(ctx, job_cond);
	302	hw_sched_broadcast(ctx, job);
	303	hw_sched_unlock(ctx, job_cond);
	304	#else /* HW_USE_THREADS */
	305	n_threads = ctx->n_threads;
	306
	307	for (i = 0; i < n_threads; i++) {
	308	thrctx = ctx->thread[i];
	309	thrctx->err = 0;
	310	}
	311
	312	i = 0;
	313	thrctx = ctx->thread[i];
	314	chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id);
	315
	316	while (chunk_id >= 0) {
	317	run = hw_run_entry(thrctx->runs, entry);
	318	err = run(thrctx, thrctx->hwctx, chunk_id, appctx);
	319	if (err) {
	320	thrctx->err = err;
	321	break;
	322	}
	323
	324	if ((++i) == n_threads) i = 0;
	325	thrctx = ctx->thread[i];
	326	chunk_id = hw_sched_get_chunk(ctx, thrctx->thread_id);
	327	}
	328	#endif /* HW_USE_THREADS */
	329
	330	return 0;
	331	}
	332
333	int hw_sched_wait_task(HWSched ctx) {
334	int err = 0;
335	int i = 0, n_threads = ctx->n_threads;
336
337	#ifdef HW_USE_THREADS
338	while (i < ctx->n_threads) {
339	for (; i < ctx->n_threads; i++) {
340	if (ctx->thread[i]->status == HW_THREAD_STATUS_DONE) {
341	ctx->thread[i]->status = HW_THREAD_STATUS_IDLE;
342	} else {
343	hw_sched_wait(ctx, compl);
344	break;
345	}
346	}
347
348	}
349
350	hw_sched_unlock(ctx, compl_cond);
351	#endif /* HW_USE_THREADS */
352
353	for (i = 0; i < n_threads; i++) {
354	HWThread thrctx = ctx->thread[i];
355	if (thrctx->err) return err = thrctx->err;
356
357	#ifndef HW_USE_THREADS
358	thrctx->status = HW_THREAD_STATUS_IDLE;
359	#endif /* HW_USE_THREADS */
360	}
361
362	return err;
363	}
364
365	int hw_sched_execute_task(HWSched ctx, void *appctx, HWEntry entry) {
366	int err;
367
368	err = hw_sched_schedule_task(ctx, appctx, entry);
369	if (err) return err;
370
371	return hw_sched_wait_task(ctx);
372	}
373
374	int hw_sched_schedule_thread_task(HWSched ctx, void *appctx, HWEntry entry) {
375	int err;
376
377	ctx->saved_mode = ctx->mode;
378	ctx->mode = HW_SCHED_MODE_PREALLOCATED;
379	err = hw_sched_schedule_task(ctx, appctx, entry);
380
381	return err;
382	}
383
384
385	int hw_sched_wait_thread_task(HWSched ctx) {
386	int err;
387
388	err = hw_sched_wait_task(ctx);
389	ctx->mode = ctx->saved_mode;
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	390
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	391	return err;
	392	}
	393
	394	int hw_sched_execute_thread_task(HWSched ctx, void *appctx, HWEntry entry) {
	395	int err;
	396	int saved_mode = ctx->mode;
	397
	398	ctx->mode = HW_SCHED_MODE_PREALLOCATED;
	399	err = hw_sched_execute_task(ctx, appctx, entry);
	400	ctx->mode = saved_mode;
	401
	402	return err;
	403	}