/tomo/pyhst : contents of hst.c at revision 276

: (revision 276)

To get this branch, use:

bzr branch
http://darksoft.org/webbzr/tomo/pyhst

28 by csa Use pinned result buffer to perform device2host memory transfer parallel with computations, add ESRF copyright information in files appeared after redesign	1	/*
78 by Suren A. Chilingaryan Add COPYING and fix license statements	2	* The PyHST program is Copyright (C) 2002-2011 of the
	3	* European Synchrotron Radiation Facility (ESRF) and
	4	* Karlsruhe Institute of Technology (KIT).
28 by csa Use pinned result buffer to perform device2host memory transfer parallel with computations, add ESRF copyright information in files appeared after redesign	5	*
	6	* PyHST is free software: you can redistribute it and/or modify it
	7	* under the terms of the GNU General Public License as published by the
	8	* Free Software Foundation, either version 3 of the License, or
	9	* (at your option) any later version.
	10	*
	11	* hst is distributed in the hope that it will be useful, but
	12	* WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	14	* See the GNU General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU General Public License along
	17	* with this program. If not, see <http://www.gnu.org/licenses/>.
	18	*/
	19
18 by csa Big redesign (early commit)	20	#include <stdio.h>
	21	#include <stdlib.h>
	22	#include <string.h>
	23	#include <assert.h>
28 by csa Use pinned result buffer to perform device2host memory transfer parallel with computations, add ESRF copyright information in files appeared after redesign	24	#include <errno.h>
18 by csa Big redesign (early commit)	25	#include <math.h>
18 by csa Big redesign (early commit)	26
28 by csa Use pinned result buffer to perform device2host memory transfer parallel with computations, add ESRF copyright information in files appeared after redesign	27	#include <sys/mman.h>
	28
168 by Suren A. Chilingaryan Placeholders for DFI implementation	29
	30	#undef HW_USE_SIMD
	31
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	32	#ifdef HW_USE_SIMD
	33	# include <xmmintrin.h>
	34	#endif /* HW_USE_SIMD */
	35
18 by csa Big redesign (early commit)	36	#include "Vhst_calculate_limits.h"
	37
	38	#include "debug.h"
	39	#include "hst.h"
	40	#include "hst_reconstructor.h"
	41
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	42	#ifdef HW_USE_CUDA
21 by csa Make gputomo library to follow reconstruction modules naming conventions (hst_cuda now)	43	# include "hst_cuda/hst_cuda.h"
175 by Suren A. Chilingaryan Make DFI optional	44	# ifdef PYHST_ENABLE_DFI
	45	# include "dfi_cuda/dfi_cuda.h"
	46	# endif /* PYHST_ENABLE_DFI */
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	47	#endif /* HW_USE_CUDA */
18 by csa Big redesign (early commit)	48
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	49	#ifdef HW_USE_OPENCL
57 by Suren A. Chilingaryan Separate CPU code into the hst_cpu	50	# include "hst_opencl/hst_opencl.h"
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	51	#endif /* HW_USE_OPENCL */
	52
	53	#ifdef HW_USE_CPU
	54	# include "hst_cpu/hst_cpu.h"
	55	#endif /* HW_USE_CPU */
49 by root Merge /home/matthias/dev/pyHST	56
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	57	#include "hw_sched.h"
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	58
18 by csa Big redesign (early commit)	59	#define PRE_RADIUS 1
	60
	61	static int hst_copies = 0;
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	62
	63	#ifdef HW_USE_CUDA
	64	static HSTReconstructor *cuda_recon = NULL;
168 by Suren A. Chilingaryan Placeholders for DFI implementation	65	static HSTReconstructor *cuda_fbp_recon = NULL;
175 by Suren A. Chilingaryan Make DFI optional	66	# ifdef PYHST_ENABLE_DFI
168 by Suren A. Chilingaryan Placeholders for DFI implementation	67	static HSTReconstructor *cuda_dfi_recon = NULL;
175 by Suren A. Chilingaryan Make DFI optional	68	# endif /* PYHST_ENABLE_DFI */
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	69	#endif /* HW_USE_CUDA */
	70
	71	#ifdef HW_USE_OPENCL
	72	static HSTReconstructor *opencl_recon = NULL;
	73	#endif /* HW_USE_OPENCL */
	74
	75	#ifdef HW_USE_CPU
18 by csa Big redesign (early commit)	76	static HSTReconstructor *cpu_recon = NULL;
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	77	#endif /* HW_USE_CPU */
49 by root Merge /home/matthias/dev/pyHST	78
49 by root Merge /home/matthias/dev/pyHST	79	int hst_init(void) {
41 by csa Bug fixes	80	hw_sched_init();
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	81
	82	#ifdef HW_USE_CUDA
168 by Suren A. Chilingaryan Placeholders for DFI implementation	83	cuda_fbp_recon = hst_cuda_init(HW_USE_CUDA);
175 by Suren A. Chilingaryan Make DFI optional	84	# ifdef PYHST_ENABLE_DFI
168 by Suren A. Chilingaryan Placeholders for DFI implementation	85	cuda_dfi_recon = dfi_cuda_init(HW_USE_CUDA);
175 by Suren A. Chilingaryan Make DFI optional	86	# endif /* PYHST_ENABLE_DFI */
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	87	#endif /* HW_USE_CUDA */
	88
	89	#ifdef HW_USE_OPENCL
	90	opencl_recon = hst_opencl_init(HW_USE_OPENCL);
	91	#endif /* HW_USE_OPENCL */
	92
	93	#ifdef HW_USE_CPU
	94	cpu_recon = hst_cpu_init(HST_RECONSTRUCTOR_USE_CPU);
	95	#endif /* HW_USE_CPU */
	96
18 by csa Big redesign (early commit)	97	return 0;
	98	}
	99
	100	void hst_free() {
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	101	#ifdef HW_USE_CPU
18 by csa Big redesign (early commit)	102	hst_cpu_free();
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	103	#endif /* HW_USE_CPU */
	104
	105	#ifdef HW_USE_CUDA
175 by Suren A. Chilingaryan Make DFI optional	106	# ifdef PYHST_ENABLE_DFI
168 by Suren A. Chilingaryan Placeholders for DFI implementation	107	dfi_cuda_free();
175 by Suren A. Chilingaryan Make DFI optional	108	# endif /* PYHST_ENABLE_DFI */
21 by csa Make gputomo library to follow reconstruction modules naming conventions (hst_cuda now)	109	hst_cuda_free();
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	110	#endif /* HW_USE_CUDA */
49 by root Merge /home/matthias/dev/pyHST	111
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	112	#ifdef HW_USE_OPENCL
49 by root Merge /home/matthias/dev/pyHST	113	hst_opencl_free();
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	114	#endif /* HW_USE_OPENCL */
18 by csa Big redesign (early commit)	115	}
18 by csa Big redesign (early commit)	116
142 by Suren A. Chilingaryan Adjust number of slices per block to number of reconstructors	117	size_t hst_get_balanced_number_of_reconstructors() {
	118	size_t count = 0;
	119
	120	#ifdef HW_USE_THREADS
	121	# ifdef HW_USE_CUDA
168 by Suren A. Chilingaryan Placeholders for DFI implementation	122	count += cuda_fbp_recon->devices;
142 by Suren A. Chilingaryan Adjust number of slices per block to number of reconstructors	123	# endif /* HW_USE_OPENCL */
	124
	125	# ifdef HW_USE_OPENCL
	126	count += opencl_recon->devices;
	127	# endif /* HW_USE_CUDA */
	128
	129	# if HW_USE_CPU == 1
	130	count += cpu_recon->devices - 1; // one core reserved for data preloading
	131	# elif defined(HW_USE_CPU)
	132	if (!count) count += cpu_recon->devices - 1;
	133	# endif /* HW_USE_CPU */
	134	#else
	135	count = 1;
	136	#endif
	137
	138	// we should try to estimate relative performance here and compute LCM
	139	// this should allow PyHST to allocate such number of slices, that all
	140	// reconstructors terminate simultaneously
	141	return count;
	142	}
	143
22 by csa Timing reporting, few bug fixes	144	/**
	145	* HST Context
	146	*/
18 by csa Big redesign (early commit)	147	struct HSTContextT {
22 by csa Timing reporting, few bug fixes	148	HSTSetup setup; //!< HST paramaters
18 by csa Big redesign (early commit)	149
22 by csa Timing reporting, few bug fixes	150	int num_recon; //!< Number of initialized reconstructors
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	151	HWSched sched; //!< Scheduller
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	152	HWSched cpu_sched; //!< Preprocessing Scheduller
22 by csa Timing reporting, few bug fixes	153	HSTReconstructorContext **recon; //!< Pool of reconstructors (NULL-terminated)
18 by csa Big redesign (early commit)	154
18 by csa Big redesign (early commit)	155	// some cached values set in hst_set_projections
22 by csa Timing reporting, few bug fixes	156	const float *custom_angles; //!< angles of projection axis (per projection)
	157	const float *axis_corrections; //!< rotation axis corrections (per projection)
	158
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	159	float *result_tmpbuf; //!< Temporary buffer for reconstructed slices
	160	float *result_reqbuf; //!< Result buffer supplied in reconstruction request
	161	const float *sinograms; //!< Sinograms for processing
	162	HWMutex output_mutex; //!< Output serializer
22 by csa Timing reporting, few bug fixes	163	FILE *output; //!< Output file (open and close is done in PyHST_c)
152 by Suren A. Chilingaryan Support fast writter	164	#ifdef HST_USE_FASTWRITER
	165	fastwriter_t *fw; //!< Output with fastwriter
	166	#else /* HST_USE_FASTWRITER */
153 by Suren A. Chilingaryan Compilation without fastwriter	167	void *fw;
152 by Suren A. Chilingaryan Support fast writter	168	#endif /* HST_USE_FASTWRITER */
22 by csa Timing reporting, few bug fixes	169
	170	int limits_configured; //!< Flag indicating if limits are already configured
	171	int filter_configured; //!< Flag indicating if filter is already configured
18 by csa Big redesign (early commit)	172
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	173	int num_projections;
	174	int current_projection;
	175	int preproc_slices;
	176	float *preproc_sinograms;
	177	float **projection_data;
	178
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	179	int num_slices;
	180	int current_slice;
	181
	182	int slice_offset; //!< Number of slices already processed (for navigation in output file)
38 by csa Preload data from EDF images while computing slices	183	int processing; //!< Flag indicating the working threads are running and processing slices
43 by csa Option to disable preloading of image files, additional timers	184
	185	void *mmap;
	186
	187	#ifdef PYHST_MEASURE_TIMINGS
	188	double comp_timer;
	189	double io_timer;
	190	#endif /* PYHST_MEASURE_TIMINGS */
	191
18 by csa Big redesign (early commit)	192	};
22 by csa Timing reporting, few bug fixes	193	typedef struct HSTContextT HSTContext; //!< a short name
18 by csa Big redesign (early commit)	194
18 by csa Big redesign (early commit)	195
20 by csa do more correct initialization	196	static int hst_configure_data_structures(HSTContext ctx, const float custom_angles, const float *axis_corrections) {
	197	int projection; // projection counter
	198	int num_projections; // Number of projections
	199
	200	float angle; // projections angle
	201	float angle_offset; // Offset rotation angle in radians, could be overriden with custom_angles
	202	float angle_increment; // Angle in radians between each, could be overriden with custom_angles
	203	float cos_s, sin_s;
	204
	205	float axis_position; // Position of rotation axis
	206	float *axis_position_corr_s;
	207
	208	ctx->custom_angles = custom_angles;
	209	ctx->axis_corrections = axis_corrections;
	210
	211	num_projections = ctx->setup.num_projections;
	212
	213	angle_offset = ctx->setup.angle_offset;
	214	angle_increment = ctx->setup.angle_increment;
	215	axis_position = ctx->setup.axis_position;
	216
	217	// Computing FFT dimensions, hard to understand why the -1 is there but it works nicely
124 by Suren A. Chilingaryan To prevent silent segmentations during fatal errors executed within multithreaded context, as a temporal solution just print error message before calling g_log	218	// DS: This is actually two times more than the closest power of 2. Do we need that?
168 by Suren A. Chilingaryan Placeholders for DFI implementation	219	ctx->setup.dim_fft = 1<<((int)(log((1. * ctx->setup.fft_oversampling * ctx->setup.num_bins - 1)) / log(2.0) + 0.9999));
228 by Suren A. Chilingaryan Quality fixes (tex-based)	220	printf("FFT convolution: %u\n", ctx->setup.dim_fft);
23 by csa Perform pair of convolutions using a single complex fourier transformation in CUDA reconstruction module (early commit)	221	ctx->setup.filter = malloc((2 * ctx->setup.dim_fft) * sizeof(float));
20 by csa do more correct initialization	222	check_alloc(ctx->setup.filter, " FILTER");
	223
	224	axis_position_corr_s = (float ) malloc(num_projections sizeof(float));
	225	check_alloc(axis_position_corr_s, "axis_position_corr_s");
	226	ctx->setup.axis_position_corr_s = axis_position_corr_s;
	227
	228	cos_s = (float ) malloc(num_projections sizeof(float));
	229	check_alloc(cos_s, "cos_s");
	230	ctx->setup.cos_s = cos_s;
	231
	232	sin_s = (float ) malloc(num_projections sizeof(float));
	233	check_alloc(sin_s, "sin_s");
	234	ctx->setup.sin_s = sin_s;
	235
	236	for (projection = 0; projection < num_projections; projection++) {
	237	if (custom_angles) {
	238	angle = custom_angles[projection];
	239	} else {
	240	angle = angle_offset + projection * angle_increment ;
	241	}
	242
	243	cos_s[projection] = cos(angle);
	244	sin_s[projection] = sin(angle);
	245
	246	if (axis_corrections) axis_position_corr_s[projection] = axis_position + axis_corrections[projection];
	247	else axis_position_corr_s[projection] = axis_position;
	248	}
	249
	250	return 0;
	251	}
	252
	253	static int hst_configure_limits(HSTContext *ctx) {
18 by csa Big redesign (early commit)	254	int i;
	255
	256	int projection; // projection counter
	257
	258	// Some variables from configuration
	259	int num_projections; // Number of projections
	260	int num_bins; // Number of bins in sinograms
	261
	262	int start_x; // First X-pixel in region required by user
	263	int start_y; // First Y-pixel in region required by user
	264	int num_x; // Number of X-pixels in reconstruction region
	265	int num_y; // Number of Y-pixels in reconstruction region
	266
	267	float axis_position; // Position of rotation axis
	268	float angle_increment; // Angle in radians between each, could be overriden with custom_angles
	269
	270	int axis_to_the_center;
	271
	272	int zerooffmask;
	273
	274	int minX = NULL, maxX = NULL;
	275	float cos_s, sin_s;
	276	float *axis_position_corr_s;
	277
	278	// The block of variables for reconstruction limits
	279	int status;
	280	int X_STARTS, X_ENDS;
	281	int y_start;
	282	int y_end;
	283	int startxdum, startydum, finxdum, finydum;
	284
	285	// The next block are variables needed for zeroofmask-mode computations
	286	#if PRE_RADIUS
	287	float * RRadius = NULL;
	288	#endif /* PRE_RADIUS */
	289	float yy, rradius;
	290	int yoristart, yoriend;
	291	int yorigin ;
	292	int x_start, x_end ;
	293	int y;
	294	float EXT1, EXT2;
	295	int XH;
	296
	297
	298	/* The block of variables describing the position of axes (and used to move
	299	the axis to the center of the reconstructed zone) as well as projection
	300	angles */
	301	float MOVE_X;
	302	float MOVE_Y;
	303
20 by csa do more correct initialization	304	int fai360;
18 by csa Big redesign (early commit)	305	float axis_position_corr = 0;
18 by csa Big redesign (early commit)	306	float cos_angle , sin_angle;
20 by csa do more correct initialization	307	float corre; // axis correction
18 by csa Big redesign (early commit)	308
	309	assert(ctx);
	310
	311	num_bins = ctx->setup.num_bins;
	312	num_projections = ctx->setup.num_projections;
	313
	314	start_x = ctx->setup.start_x;
	315	num_x = ctx->setup.num_x;
	316	start_y = ctx->setup.start_y;
	317	num_y = ctx->setup.num_y;
20 by csa do more correct initialization	318
	319	axis_position_corr_s = ctx->setup.axis_position_corr_s;
	320	cos_s = ctx->setup.cos_s;
	321	sin_s = ctx->setup.sin_s;
18 by csa Big redesign (early commit)	322
20 by csa do more correct initialization	323	axis_position = ctx->setup.axis_position;
18 by csa Big redesign (early commit)	324	angle_increment = ctx->setup.angle_increment;
20 by csa do more correct initialization	325
18 by csa Big redesign (early commit)	326	axis_to_the_center = ctx->setup.center_axis;
	327	zerooffmask = ctx->setup.zerooffmask;
	328
	329
	330	/*************************************************************
	331	* Depending on geometry ( dimensions, axis positions .... )*
	332	* only a restraint part of the slice can be back projected *
	333	* from all the sinograms. *
	334	* X_Start tells the limits for each y ( idem for X_ENDS) *
	335	*************************************************************/
	336	X_STARTS = malloc(num_bins * sizeof(int));
	337	check_alloc(X_STARTS," X_STARTS allocation in c_hst_recon_overslices");
	338	X_ENDS = malloc(num_bins * sizeof(int));
	339	check_alloc(X_ENDS," X_ENDS allocation in c_hst_recon_overslices");
	340
	341	/************************************************************
	342	* calculate limits of slices to reconstruct *
	343	************************************************************/
	344	/* Restrict the area if projections are covering more than 360 grads */
	345	startxdum=1;
	346	startydum=1;
	347	finxdum = num_bins ;
	348	finydum = num_bins ;
	349
	350	/* DSBug: case of custom angles is not handled here */
	351	if (fabs((num_projections)(angle_increment) - 2M_PI)>0.001 ) {
	352	hst_calculate_limits__(&num_bins, &startxdum, &startydum, &finxdum, &finydum, &axis_position, &y_start, &y_end, X_STARTS, X_ENDS, &status);
	353	y_start+=1;
	354	y_end -=1;
	355	} else {
	356	y_start=-1;
	357	y_end =-1;
	358	}
	359
	360	fai360 = (y_start == -1 && y_end == -1);
	361	ctx->setup.fai360 = fai360;
	362
	363
	364	if (axis_to_the_center) {
	365	MOVE_X = (((start_x - 1) + num_x / 2.0) - axis_position);
	366	MOVE_Y = (((start_y - 1) + num_y / 2.0) - axis_position);
	367	} else {
	368	MOVE_X = 0;
	369	MOVE_Y = 0;
	370	}
	371
	372	ctx->setup.offset_x = start_x - MOVE_X ;
	373	ctx->setup.offset_y = start_y - MOVE_Y ;
	374
	375	if (zerooffmask) {
	376	/* these should be allocated once and for all out of the routine */
	377	if (ctx->setup.minX) minX = ctx->setup.minX;
	378	else {
	379	minX = (int) malloc(num_y sizeof(int));
	380	check_alloc(minX, "minX");
	381	ctx->setup.minX = minX;
	382	}
	383
	384	if (ctx->setup.maxX) maxX = ctx->setup.maxX;
	385	else {
	386	maxX = (int) malloc(num_y sizeof(int));
	387	check_alloc(maxX, "maxX");
	388	ctx->setup.maxX = maxX;
	389	}
390
391	for (i = 0; i < num_y; i++) {
392	maxX[i] = 0;
393	minX[i] = 1000000;
394	}
395
396	#if PRE_RADIUS
397	/* The difference with !pre_radius what we do not suing axis_corrections,
398	i.e axis_position is used in place of axis_position_corr */
399	RRadius = (float ) malloc(num_y sizeof(float));
400	check_alloc(RRadius, "RRadius");
401
402	for (y = 0; y < num_y; y++) {
403	rradius = MAX(axis_position , num_bins - axis_position) ;
404	yy = ((y + start_y - MOVE_Y - 1) + 0.5 - axis_position);
405	if (fabs(yy) > rradius) {
406	RRadius[y] = 0.0;
407	} else {
408	RRadius[y] = sqrt(rradius * rradius - yy * yy);
409	}
410	}
411	#endif /* PRE_RADIUS */
412	}
413
414	for (projection = 0; projection < num_projections; projection++) {
20 by csa do more correct initialization	415	cos_angle = cos_s[projection];
	416	sin_angle = sin_s[projection];
	417	axis_position_corr = axis_position_corr_s[projection];
	418
	419	//corre = axis_position_corr - axis_position;
	420	corre = ctx->axis_corrections?ctx->axis_corrections[projection]:0;
18 by csa Big redesign (early commit)	421
	422	if (zerooffmask) {
	423	if (fai360) {
	424	yoristart = 0;
	425	yoriend = num_y ;
	426	} else {
	427	yoristart = y_start - 1 ;
	428	yoriend = y_end ;
	429	}
	430
	431	for (yorigin = yoristart ; yorigin < yoriend ; yorigin++) {
	432	if (fai360) {
	433	y = yorigin;
	434	} else {
	435	/* y_start e y_end sono calcolati per una maschera massimale */
	436	/* y e relativo alla finestra dell user */
	437	y = (yorigin + MOVE_Y + corre - start_y) + 100000; /* per evitare che -0.1 sia tagliato a 0 come 0.9 */
	438	y = y - 100000;
	439	}
	440
	441	/* si potrebbe eventualmente togliere il 2 se tutto va bene : ps 2 ->1*/
	442	if (y < 0 \|\| y >= num_y - 0) continue;
	443
	444	if (fai360) {
	445	#if PRE_RADIUS
	446	rradius = RRadius[y];
	447	#else /PRE_RADIUS /
	448	rradius = MAX(axis_position_corr , num_bins - axis_position_corr) ;
	449	yy = ((y + start_y - MOVE_Y - 1) + 0.5 - axis_position);
	450	if (fabs(yy) > rradius) {
	451	rradius = 0.0;
	452	} else {
	453	rradius = sqrt(rradius * rradius - yy * yy);
	454	}
	455	#endif /* PRE_RADIUS */
	456
	457
	458	if (fabs(cos_angle) > 1.0e-6) { /* s puo lasciare solo la parte in y */
	459	EXT1 = -((float)(((axis_position_corr +
	460	((start_x - MOVE_X - 1) + 0.5 - axis_position) * cos_angle -
	461	((y + start_y - MOVE_Y - 1) + 0.5 - axis_position) * sin_angle - 0.5)))) / cos_angle ;
	462	EXT2 = (num_bins - (float)(((axis_position_corr +
	463	((start_x - MOVE_X - 1) + 0.5 - axis_position) * cos_angle -
	464	((y + start_y - MOVE_Y - 1) + 0.5 - axis_position) * sin_angle - 0.5)))) / cos_angle ;
	465	} else {
	466	XH = ((float)(((axis_position_corr -
	467	((y + start_y - MOVE_Y - 1) + 0.5 - axis_position) * sin_angle - 0.5)))
	468	);
	469	if (XH <= 0 \|\| XH >= num_bins) continue;
	470	EXT1 = 0;
	471	EXT2 = num_x;
	472	}
	473	x_start = MIN(EXT1, EXT2) + 1;
	474	x_end = MAX(EXT1, EXT2) - 1;
	475
	476	/********************************************************************************************/
	477	x_start = MAX(x_start , axis_position - rradius - start_x); /* SOSPETTO */
	478	x_end = MIN(x_end , axis_position + rradius - start_x);
	479	/********************************************************************************************/
	480
	481	} else { // fai360 if
	482	x_start = (X_STARTS[yorigin] - 1) - start_x + MOVE_X + corre ;
	483	/* if( x_start< + DX0) x_start= DX0; */
	484	x_end = (X_ENDS[yorigin] - 1) - start_x + MOVE_X + corre ;
485	} // end of fai360 if
486
487	{
488	if (x_start < 0) x_start = 0;
489	if (x_end >= num_x - 0) x_end = num_x - 0;
490	/* anche qui se tutto va bene si puo provare ad essere piu larghi
491	x_start+=2;
492	x_end-=2; */
493	}
494
495	if (zerooffmask) {
496	minX[y] = MIN(minX[y], x_start);
497	maxX[y] = MAX(maxX[y], x_end);
498	}
499	} // end of yorigin loop
500	} // zerooffmask if
501	} // end of projections loop
502
20 by csa do more correct initialization	503	#if PRE_RADIUS
	504	if (zerooffmask) {
	505	free(RRadius);
	506	}
	507	#endif /* PRE_RADIUS */
18 by csa Big redesign (early commit)	508
	509	free(X_ENDS);
	510	free(X_STARTS);
	511
20 by csa do more correct initialization	512	ctx->limits_configured = 1;
20 by csa do more correct initialization	513	ctx->setup.what_changed \|= HST_LIMITS_CHANGED;
18 by csa Big redesign (early commit)	514
	515	return 0;
	516	}
	517
228 by Suren A. Chilingaryan Quality fixes (tex-based)	518	#include "astra_fourier.h"
18 by csa Big redesign (early commit)	519	int hst_set_filter(HSTContext ctx, HSTFilterFunction func, void func_param) {
	520	int j;
	521	int dim_fft;
	522	float *FILTER;
	523
	524	assert(ctx);
228 by Suren A. Chilingaryan Quality fixes (tex-based)	525
18 by csa Big redesign (early commit)	526	dim_fft = ctx->setup.dim_fft;
20 by csa do more correct initialization	527	FILTER = ctx->setup.filter;
18 by csa Big redesign (early commit)	528
228 by Suren A. Chilingaryan Quality fixes (tex-based)	529	#ifdef PYHST_ASTRA_SCALING
	530	// dim_fft is next power of two of (2 * num_proj)
	531	// num_bins is dim_fft/2 + 1
	532	int num_bins = dim_fft / 2 + 1; // number of non-symmetric coefficients
	533
	534	memset(FILTER, 0, 2 * dim_fft * sizeof(float));
	535	FILTER[0] = 0.25;
	536	FILTER[1] = 0;
	537	for (j = 1; j < dim_fft; j++) {
	538	if (j&1) {
	539	double coef;
	540	if (2 * j > dim_fft)
	541	coef = M_PI * (dim_fft - j);
	542	else
	543	coef = M_PI * j;
	544	FILTER[2 * j] = -1. / (coef * coef);
	545	} else {
	546	FILTER[2 * j] = 0;
	547	}
	548	FILTER[2 * j + 1] = 0;
	549	}
	550
	551	int ip = (int)malloc((2 + sqrt(dim_fft) + 1)*sizeof(int)); ip[0] = 0;
	552	float w = (float)malloc(dim_fft * sizeof(float) / 2);
	553	cdft(2 * dim_fft, -1, FILTER, ip, w);
	554	free(w);
	555	free(ip);
	556
	557	for (j = 0; j < num_bins; j++) {
	558	float _fD = 1.0f;
	559	float fRelIndex = (float)j / (float)dim_fft;
	560	float pfW = 2. * M_PI * fRelIndex;
	561
	562	// On last iteration it is actually equal. So, rounding error works differently with ASTRA and here.... Therefore, we require a bit on top.
	563	if (pfW > (M_PI * _fD + 1E-6)) {
	564	FILTER[2 * j] = 0;
	565	FILTER[2 * j + 1] = 0;
	566	} else {
	567	FILTER[2 * j] = 2.0 * FILTER[2 * j];
	568	FILTER[2 * j + 1] = FILTER[2 * j];
	569	}
	570	}
	571
	572	for (j = num_bins; j < dim_fft; j++) {
	573	FILTER[2 * j] = 0;
	574	FILTER[2 * j + 1] = 0;
	575	}
	576
	577	/*
	578	for (j = 0; j < num_bins; j++) {
	579	printf("%8.4f %8.4f ", FILTER[2 * j], FILTER[2 * j + 1]);
	580	}
	581	printf("\n");
	582	*/
	583	#else /* ASTRA_SCALING */
18 by csa Big redesign (early commit)	584	if (func) {
	585	func(func_param, dim_fft, FILTER);
	586	} else {
	587	FILTER[0]=0.0;
228 by Suren A. Chilingaryan Quality fixes (tex-based)	588	FILTER[1]= 1.0/dim_fft ; // This is the real part of the highest frequency
23 by csa Perform pair of convolutions using a single complex fourier transformation in CUDA reconstruction module (early commit)	589
	590	for (j=1;j<dim_fft / 2; ++j) {
228 by Suren A. Chilingaryan Quality fixes (tex-based)	591	FILTER[2 * j] = j * 2.0 / dim_fft / dim_fft;
18 by csa Big redesign (early commit)	592	FILTER[2 * j + 1] = FILTER[2 * j];
	593	}
	594	}
	595
	596	if (ctx->setup.sum_rule == 2) {
	597	FILTER[0]=FILTER[2]/4.0;
	598	}
	599
	600	if (ctx->setup.no_filtering) {
	601	FILTER[1] = -9999.9999;
	602	}
	603	/* This is due to different organization of data representation used after by
	604	real to complex FFT transfer. In both cases only non-redundant coefficients
	605	are stored however, the current CPU code stores High Frequency Term (HFT) in the
	606	res[1], while cuFFT (and FFTW as well) have always res[1] = 0 and stores
	607	the HFT in res[dim_fft] */
	608
228 by Suren A. Chilingaryan Quality fixes (tex-based)	609	FILTER[dim_fft] = FILTER[1]; // or should it be 0?
18 by csa Big redesign (early commit)	610
23 by csa Perform pair of convolutions using a single complex fourier transformation in CUDA reconstruction module (early commit)	611	/* CUDA code now includes optimization allowing to compute two convolutions
	612	parallely using single Complex-Complex Fourier transform. For that we need
	613	to feel complete FILTER matrix including redundant coefficients. Therfore,
	614	the filter is filled withcoefficients according to the cuFFT/FFTW approach.
	615	The case of embedded FFT code (when compressed FFT represntation is needed)
	616	is handled separately in hst.c/hst_cpu code. */
	617
	618	FILTER[dim_fft + 1] = FILTER[1];
	619	FILTER[1] = FILTER[0];
228 by Suren A. Chilingaryan Quality fixes (tex-based)	620	#endif /* ASTRA_SCALING */
23 by csa Perform pair of convolutions using a single complex fourier transformation in CUDA reconstruction module (early commit)	621
	622	for (j = dim_fft + 2; j < 2 * dim_fft; j+=2) {
	623	FILTER[j] = FILTER[2 * dim_fft - j];
	624	FILTER[j + 1] = FILTER[2 * dim_fft - j + 1];
	625	}
	626
	627
20 by csa do more correct initialization	628	ctx->filter_configured = 1;
	629	ctx->setup.what_changed \|= HST_FILTER_CHANGED;
	630
18 by csa Big redesign (early commit)	631	return 0;
	632	}
	633
	634	int hst_set_output_file(HSTContext ctx, FILE output) {
152 by Suren A. Chilingaryan Support fast writter	635	if ((ctx->fw)\|\|(output != ctx->output)) {
152 by Suren A. Chilingaryan Support fast writter	636	ctx->fw = NULL;
20 by csa do more correct initialization	637	ctx->output = output;
	638	ctx->setup.what_changed \|= HST_OUTPUT_CHANGED;
	639	}
18 by csa Big redesign (early commit)	640
	641	return 0;
	642	}
	643
152 by Suren A. Chilingaryan Support fast writter	644	#ifdef HST_USE_FASTWRITER
	645	int hst_set_fastwriter(HSTContext ctx, fastwriter_t output) {
	646	# ifndef HST_MULTISLICE_MODE
	647	pyhst_error("Fastwriter is not supported in non-multislice mode");
	648	return -1;
	649	# endif /* HST_MULTISLICE_MODE */
	650
	651	if ((ctx->output)\|\|(ctx->fw != output)) {
	652	ctx->output = NULL;
	653	ctx->fw = output;
	654	ctx->setup.what_changed \|= HST_OUTPUT_CHANGED;
	655	}
	656	}
	657	#endif /* HST_USE_FASTWRITER */
	658
	659
18 by csa Big redesign (early commit)	660	int hst_set_padding(HSTContext *ctx, int zero_padding) {
20 by csa do more correct initialization	661	if (zero_padding != ctx->setup.zero_padding) {
	662	ctx->setup.zero_padding = zero_padding;
	663	ctx->setup.what_changed \|= HST_PADDING_CHANGED;
	664	}
18 by csa Big redesign (early commit)	665	return 0;
	666	}
	667
	668	int hst_set_axis_mode(HSTContext *ctx, int axis_to_the_center) {
	669	if (axis_to_the_center != ctx->setup.center_axis) {
	670	ctx->setup.center_axis = axis_to_the_center;
	671	// The projections configuration is dependent on this parameter
20 by csa do more correct initialization	672	hst_configure_limits(ctx);
	673	}
	674	return 0;
	675	}
	676
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	677	static int hst_threads_configure(HWThread thr, void hwctx, int device_id, void data) {
	678	int err = 0;
	679	HSTContext ctx = (HSTContext)data;
	680
	681	//printf("Configure %i\n", device_id);
	682
	683	HSTReconstructorContextPtr rctx = ctx->recon[device_id];
	684
	685	if (rctx->recon.configure) {
	686	err = rctx->recon.configure(rctx, ctx->setup.what_changed);
	687	}
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	688
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	689	return err;
	690	}
	691
20 by csa do more correct initialization	692	static int hst_configure(HSTContext *ctx) {
	693	int err;
	694
	695	assert(ctx);
	696
	697	if (!ctx->filter_configured) {
	698	err = hst_set_filter(ctx, NULL, NULL);
	699	check_code(err, "hst_set_filter");
	700	}
	701
	702	if (!ctx->limits_configured) {
	703	err = hst_configure_limits(ctx);
	704	check_code(err, "hst_configure_limits");
	705	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	706
	707	err = hw_sched_execute_thread_task(ctx->sched, ctx, hst_threads_configure);
	708	check_code(err, "hst_configure");
20 by csa do more correct initialization	709
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	710	/*
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	711	int i;
20 by csa do more correct initialization	712	for (i = 0; i < ctx->num_recon; ++i) {
	713	rctx = ctx->recon[i];
	714	if (rctx->recon.configure) {
	715	err = rctx->recon.configure(rctx, ctx->setup.what_changed);
	716	check_code(err, "hst_configure");
	717	}
	718	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	719	*/
	720
	721	if (ctx->setup.what_changed&HST_OUTPUT_CHANGED) {
	722	ctx->slice_offset = 0;
	723	}
20 by csa do more correct initialization	724
	725	ctx->setup.what_changed = 0;
	726
	727	return 0;
	728	}
18 by csa Big redesign (early commit)	729
	730	static int hst_reconstruct_slice(HSTContext ctx, HSTReconstructorContextPtr rctx, float slice_result, const float *sinograms ) {
	731	int err;
	732
	733	assert(ctx);
	734	assert(rctx);
	735	assert(rctx->recon.reconstruct);
	736
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	737	#ifdef HST_MULTISLICE_MODE
	738	if (rctx->recon.send) {
	739	const float *last_sino;
	740	float *last_slice;
	741
	742	last_slice = rctx->saved_slice;
	743	last_sino = rctx->saved_sino;
	744
	745	if (sinograms)
	746	rctx->recon.send(rctx, sinograms);
	747
	748	rctx->saved_slice = slice_result;
	749	rctx->saved_sino = sinograms;
	750
	751	slice_result = last_slice;
	752	sinograms = last_sino;
	753	}
	754
	755	if (!slice_result) return 0;
	756	#endif /* HST_MULTISLICE_MODE */
	757	//
	758
18 by csa Big redesign (early commit)	759	if (rctx->recon.preprocess) {
	760	err = rctx->recon.preprocess(rctx, slice_result, sinograms);
	761	if (err) return err;
	762	}
	763
	764	err = rctx->recon.reconstruct(rctx, slice_result, sinograms);
	765	if (err) return err;
	766
	767	if (rctx->recon.postprocess) {
	768	err = rctx->recon.postprocess(rctx, slice_result, sinograms);
	769	if (err) return err;
	770	}
22 by csa Timing reporting, few bug fixes	771
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	772	rctx->processed_slices += SLICE_BLOCK;
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	773
18 by csa Big redesign (early commit)	774	return 0;
	775	}
	776
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	777	static int hst_threads_reconstruct_slice(HWThread thr, void hwctx, int slice_id, void data) {
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	778	int err = 0;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	779	int device_id = thr->thread_id;
	780	HSTContext ctx = (HSTContext)data;
	781	int cnt, count = ctx->setup.num_x * ctx->setup.num_y;
	782
43 by csa Option to disable preloading of image files, additional timers	783	#ifdef PYHST_MEASURE_TIMINGS
	784	GTimer *timer;
	785	#endif /* PYHST_MEASURE_TIMINGS */
45 by csa Fixes measurements of I/O timings	786
45 by csa Fixes measurements of I/O timings	787	#ifndef PYHST_IO_BENCHMARK
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	788	if (slice_id == HW_SCHED_CHUNK_FREE) {
	789	err = hst_reconstruct_slice(ctx, ctx->recon[device_id], NULL, NULL);
	790	if (ctx->recon[device_id]->recon.wait)
	791	ctx->recon[device_id]->recon.wait(ctx->recon[device_id]);
	792	} else if (slice_id >= 0) {
	793	err = hst_reconstruct_slice(ctx, ctx->recon[device_id],
	794	# ifdef HST_MULTISLICE_MODE
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	795	(ctx->result_reqbuf?ctx->result_reqbuf:ctx->result_tmpbuf) + SLICE_BLOCK * slice_id * count,
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	796	# else /* HST_MULTISLICE_MODE */
191 by Suren A. Chilingaryan Multislice for OpenCL	797	ctx->result_reqbuf?(ctx->result_reqbuf + SLICE_BLOCK * slice_id * count):(ctx->result_tmpbuf + SLICE_BLOCK * device_id * count),
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	798	# endif /* HST_MULTISLICE_MODE */
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	799	ctx->sinograms + SLICE_BLOCK * slice_id * ctx->setup.num_bins * ctx->setup.num_projections
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	800	);
	801	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	802	if (err) return err;
45 by csa Fixes measurements of I/O timings	803	#endif /* ! PYHST_IO_BENCHMARK */
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	804
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	805
65 by Suren A. Chilingaryan Reconstruction benchmark mode	806	#ifndef PYHST_RECON_BENCHMARK
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	807	if (!ctx->result_reqbuf) {
45 by csa Fixes measurements of I/O timings	808	hw_sched_lock_mutex(ctx->output_mutex);
45 by csa Fixes measurements of I/O timings	809
65 by Suren A. Chilingaryan Reconstruction benchmark mode	810	# ifdef PYHST_MEASURE_TIMINGS
43 by csa Option to disable preloading of image files, additional timers	811	timer = g_timer_new();
65 by Suren A. Chilingaryan Reconstruction benchmark mode	812	# endif /* PYHST_MEASURE_TIMINGS */
43 by csa Option to disable preloading of image files, additional timers	813
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	814	# ifdef HST_MULTISLICE_MODE
	815	if (slice_id == HW_SCHED_CHUNK_TERMINATOR) {
152 by Suren A. Chilingaryan Support fast writter	816	# ifdef HST_USE_FASTWRITER
	817	if (ctx->fw) {
	818	retry:
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	819	err = fastwriter_push_data(ctx->fw, SLICE_BLOCK * ctx->num_slices * count * sizeof(float), ctx->result_tmpbuf);
152 by Suren A. Chilingaryan Support fast writter	820	if (err) {
	821	if (err == EWOULDBLOCK) goto retry;
	822	# ifdef PYHST_MEASURE_TIMINGS
	823	if (timer) g_timer_destroy(timer);
	824	# endif
	825	hw_sched_unlock_mutex(ctx->output_mutex);
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	826	pyhst_error("faswriter failed wrting %u MB of data, error %i", SLICE_BLOCK * ctx->num_slices * count * sizeof(float)/1024/1024, err);
152 by Suren A. Chilingaryan Support fast writter	827	return 1;
	828	}
	829	} else {
	830	# endif /* HST_USE_FASTWRITER */
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	831	cnt = fwrite(ctx->result_tmpbuf, sizeof(float), SLICE_BLOCK * ctx->num_slices * count, ctx->output);
	832	if (cnt != SLICE_BLOCK * ctx->num_slices * count) {
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	833	# ifdef PYHST_MEASURE_TIMINGS
152 by Suren A. Chilingaryan Support fast writter	834	if (timer) g_timer_destroy(timer);
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	835	# endif
152 by Suren A. Chilingaryan Support fast writter	836	hw_sched_unlock_mutex(ctx->output_mutex);
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	837	pyhst_error("fwrite failed, send pushed characters %i, but written %i, errno %i", SLICE_BLOCK * ctx->num_slices * count, cnt, errno);
152 by Suren A. Chilingaryan Support fast writter	838	return 1;
	839	}
	840	# ifdef HST_USE_FASTWRITER
	841	}
	842	# endif /* HST_USE_FASTWRITER */
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	843	}
	844	# else /* HST_MULTISLICE_MODE */
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	845	err = fseek(ctx->output, ((long)(ctx->slice_offset + slice_id * SLICE_BLOCK)) * count * sizeof(float), SEEK_SET);
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	846	if (err) {
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	847	# ifdef PYHST_MEASURE_TIMINGS
43 by csa Option to disable preloading of image files, additional timers	848	if (timer) g_timer_destroy(timer);
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	849	# endif
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	850	hw_sched_unlock_mutex(ctx->output_mutex);
	851	pyhst_error("fseek failed, errno: %i", errno);
	852	return err;
	853	}
43 by csa Option to disable preloading of image files, additional timers	854
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	855	cnt = fwrite(ctx->result_tmpbuf + device_id * count, sizeof(float), SLICE_BLOCK * count, ctx->output);
	856	if (cnt != SLICE_BLOCK * count) {
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	857	# ifdef PYHST_MEASURE_TIMINGS
43 by csa Option to disable preloading of image files, additional timers	858	if (timer) g_timer_destroy(timer);
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	859	# endif
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	860	hw_sched_unlock_mutex(ctx->output_mutex);
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	861	pyhst_error("fwrite failed, send pushed characters %i, but written %i, errno %i", SLICE_BLOCK * count, cnt, errno);
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	862	return 1;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	863	}
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	864	# endif /* HST_MULTISLICE_MODE */
45 by csa Fixes measurements of I/O timings	865	//fsync(fileno(ctx->output));
45 by csa Fixes measurements of I/O timings	866
43 by csa Option to disable preloading of image files, additional timers	867
65 by Suren A. Chilingaryan Reconstruction benchmark mode	868	# ifdef PYHST_MEASURE_TIMINGS
43 by csa Option to disable preloading of image files, additional timers	869	if (timer) {
	870	// We should be inside the lock, otherwise multiple accesses.
	871	g_timer_stop(timer);
	872	ctx->io_timer += g_timer_elapsed(timer, NULL);
45 by csa Fixes measurements of I/O timings	873	//ctx->comp_timer -= g_timer_elapsed(timer, NULL);
43 by csa Option to disable preloading of image files, additional timers	874	g_timer_destroy(timer);
	875	}
65 by Suren A. Chilingaryan Reconstruction benchmark mode	876	# endif /* PYHST_MEASURE_TIMINGS */
45 by csa Fixes measurements of I/O timings	877
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	878	hw_sched_unlock_mutex(ctx->output_mutex);
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	879	}
35 by csa Fix python code not feeding the real data into the sinograms	880	/*
	881	FILE *f;
	882	char tmp[64];
	883	sprintf(tmp, "/tmp/test/%i-%i",slice_id + ctx->slice_offset, device_id);
	884	f=fopen(tmp, "w");
	885	fwrite(ctx->result_tmpbuf + device_id * count, sizeof(float), count, f);
	886	fclose(f);
	887	*/
65 by Suren A. Chilingaryan Reconstruction benchmark mode	888	#endif /* ! PYHST_RECON_BENCHMARK */
65 by Suren A. Chilingaryan Reconstruction benchmark mode	889
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	890	return 0;
	891	}
	892
18 by csa Big redesign (early commit)	893
22 by csa Timing reporting, few bug fixes	894	int hst_reconstruct_slices(HSTContextPtr ctx, int num_slices, float result, const float sinograms) {
18 by csa Big redesign (early commit)	895	int err;
43 by csa Option to disable preloading of image files, additional timers	896	#ifdef PYHST_MEASURE_TIMINGS
	897	GTimer *timer;
	898	#endif /* PYHST_MEASURE_TIMINGS */
18 by csa Big redesign (early commit)	899
	900	assert(ctx);
	901
20 by csa do more correct initialization	902	if (ctx->setup.what_changed) {
20 by csa do more correct initialization	903	hst_configure(ctx);
18 by csa Big redesign (early commit)	904	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	905
43 by csa Option to disable preloading of image files, additional timers	906	#ifdef PYHST_MEASURE_TIMINGS
	907	timer = g_timer_new();
	908	#endif /* PYHST_MEASURE_TIMINGS */
	909
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	910	ctx->num_slices = num_slices / SLICE_BLOCK;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	911	ctx->result_reqbuf = result;
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	912	ctx->sinograms = sinograms;
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	913
18 by csa Big redesign (early commit)	914	pyhst_info("num_projection: %d, angle_increment: %e, fai360: %s", ctx->setup.num_projections, ctx->setup.angle_increment, ctx->setup.fai360?"on":"off" );
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	915	hw_sched_schedule_task(ctx->sched, ctx, hst_threads_reconstruct_slice);
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	916	err = hw_sched_wait_task(ctx->sched);
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	917	ctx->slice_offset += SLICE_BLOCK * num_slices;
18 by csa Big redesign (early commit)	918
43 by csa Option to disable preloading of image files, additional timers	919	#ifdef PYHST_MEASURE_TIMINGS
	920	if (timer) {
	921	ctx->comp_timer += g_timer_elapsed(timer, NULL);
	922	g_timer_destroy(timer);
	923	}
	924	#endif /* PYHST_MEASURE_TIMINGS */
	925
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	926	/*
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	927	int i;
18 by csa Big redesign (early commit)	928	for (i=0; i<num_slices; i++) {
	929	err = hst_reconstruct_slice(ctx,
	930	ctx->recon[0],
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	931	result?result:ctx->result_tmpbuf,
18 by csa Big redesign (early commit)	932	sinograms + i * ctx->setup.num_bins * ctx->setup.num_projections
	933	);
	934	if (err) return err;
	935
29 by csa Filters and FastEDF are added	936	if (!result) {
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	937	fwrite(ctx->result_tmpbuf, ctx->setup.num_x * sizeof(float), ctx->setup.num_y, ctx->output);
29 by csa Filters and FastEDF are added	938	}
18 by csa Big redesign (early commit)	939
18 by csa Big redesign (early commit)	940	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	941	*/
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	942
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	943	return err;
18 by csa Big redesign (early commit)	944	}
18 by csa Big redesign (early commit)	945
38 by csa Preload data from EDF images while computing slices	946	int hst_reconstruct_start(HSTContextPtr ctx, int num_slices, const float *sinograms) {
43 by csa Option to disable preloading of image files, additional timers	947	#ifdef PYHST_MEASURE_TIMINGS
	948	GTimer *timer;
	949	#endif /* PYHST_MEASURE_TIMINGS */
	950
38 by csa Preload data from EDF images while computing slices	951	assert(ctx);
	952
	953	if (ctx->setup.what_changed) {
	954	hst_configure(ctx);
	955	}
	956
43 by csa Option to disable preloading of image files, additional timers	957	#ifdef PYHST_MEASURE_TIMINGS
	958	timer = g_timer_new();
	959	#endif /* PYHST_MEASURE_TIMINGS */
	960
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	961	ctx->num_slices = num_slices / SLICE_BLOCK;
38 by csa Preload data from EDF images while computing slices	962	ctx->result_reqbuf = NULL;
	963	ctx->sinograms = sinograms;
	964
	965	pyhst_info("num_projection: %d, angle_increment: %e, fai360: %s", ctx->setup.num_projections, ctx->setup.angle_increment, ctx->setup.fai360?"on":"off" );
	966	hw_sched_schedule_task(ctx->sched, ctx, hst_threads_reconstruct_slice);
	967	ctx->processing = 1;
43 by csa Option to disable preloading of image files, additional timers	968
	969	#ifdef PYHST_MEASURE_TIMINGS
	970	if (timer) {
	971	ctx->comp_timer += g_timer_elapsed(timer, NULL);
	972	g_timer_destroy(timer);
	973	}
	974	#endif /* PYHST_MEASURE_TIMINGS */
	975
	976	#ifndef HW_USE_PARALLEL_IO
	977	hst_reconstruct_wait(ctx);
	978	#endif /* ! HW_USE_PARALLEL_IO */
38 by csa Preload data from EDF images while computing slices	979
	980	return 0;
	981	}
	982
	983	int hst_reconstruct_wait(HSTContextPtr ctx) {
	984	int err = 0;
	985
43 by csa Option to disable preloading of image files, additional timers	986	#ifdef PYHST_MEASURE_TIMINGS
	987	GTimer *timer;
	988	timer = g_timer_new();
	989	#endif /* PYHST_MEASURE_TIMINGS */
	990
38 by csa Preload data from EDF images while computing slices	991	if (ctx->processing) {
	992	err = hw_sched_wait_task(ctx->sched);
186 by Suren A. Chilingaryan float2/4 modes in standard kernel	993	ctx->slice_offset += SLICE_BLOCK * ctx->num_slices;
38 by csa Preload data from EDF images while computing slices	994	ctx->processing = 0;
	995	}
43 by csa Option to disable preloading of image files, additional timers	996
	997	#ifdef PYHST_MEASURE_TIMINGS
	998	if (timer) {
	999	ctx->comp_timer += g_timer_elapsed(timer, NULL);
	1000	g_timer_destroy(timer);
	1001	}
	1002	#endif /* PYHST_MEASURE_TIMINGS */
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	1003
38 by csa Preload data from EDF images while computing slices	1004	return err;
	1005	}
	1006
	1007
171 by Suren A. Chilingaryan Disable broken SIMD	1008	#undef HW_USE_SIMD
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	1009	static int hst_threads_preprocess_projection(HWThread thr, void hwctx, int proj, void data) {
	1010	HSTContext ctx = (HSTContext)data;
	1011
	1012	int i,j;
	1013	int num_slices = ctx->preproc_slices;
	1014	int num_bins = ctx->setup.num_bins;
	1015	int num_proj = ctx->setup.num_projections;
	1016	float norm = ctx->setup.norm;
	1017
	1018	int num_bins4 = (num_bins/4);
	1019
	1020	#ifdef HW_USE_SIMD
	1021	__m128 norm4 = _mm_set_ps1(norm);
	1022	#endif /* HW_USE_SIMD */
	1023
	1024	while (!ctx->projection_data[proj])
	1025	usleep(100);
	1026
	1027	// printf("[%p] %i, %i of %i, %i %f\n", thr, num_slices, proj, num_proj, num_bins, norm);
	1028	for (i = 0; i < num_slices; i++) {
	1029	int src_offset = i * num_bins;
	1030	int dst_offset = i * num_bins * num_proj + proj * num_bins;
	1031
	1032	#ifdef HW_USE_SIMD
	1033	// Looks pretty inoptimal... Enforce padding? Hand-written assembler?
	1034	__m128* src = (__m128*)(ctx->projection_data[proj] + src_offset);
	1035	__m128* dst = (__m128*)(ctx->preproc_sinograms + dst_offset);
	1036
	1037	for (j = 0; j < num_bins4; j++) {
	1038	dst[j] = _mm_mul_ps(src[j], norm4);
	1039	}
	1040
	1041	for (j = num_bins4 * 4; j < num_bins; j++) {
	1042	#else /* HW_USE_SIMD */
	1043	for (j = 0; j < num_bins; j++) {
	1044	#endif /* HW_USE_SIMD */
	1045	ctx->preproc_sinograms[dst_offset + j] = norm * ctx->projection_data[proj][src_offset + j];
	1046	}
	1047	}
	1048
	1049	return 0;
	1050	}
	1051
	1052	int hst_preprocess_start(HSTContextPtr ctx, int num_slices, const float *sinograms) {
	1053	int err = 0;
	1054
	1055	ctx->num_projections = ctx->setup.num_projections;
	1056	ctx->preproc_slices = num_slices;
	1057	ctx->preproc_sinograms = sinograms;
	1058
	1059	#ifdef HW_USE_PARALLEL_IO
	1060	memset(ctx->projection_data, 0, ctx->setup.num_projections * sizeof(float*));
	1061	err = hw_sched_schedule_task(ctx->cpu_sched, ctx, hst_threads_preprocess_projection);
	1062	#endif /* ! HW_USE_PARALLEL_IO */
	1063
	1064	return err;
	1065	}
	1066
	1067	int hst_preprocess_wait(HSTContextPtr ctx) {
	1068	int err = 0;
	1069	#ifdef HW_USE_PARALLEL_IO
	1070	err = hw_sched_wait_task(ctx->cpu_sched);
	1071	#endif /* ! HW_USE_PARALLEL_IO */
	1072	return err;
1073	}
1074
1075	int hst_preprocess_projection(HSTContextPtr ctx, int proj, float *data) {
1076
1077	ctx->projection_data[proj] = data;
1078
1079	#ifndef HW_USE_PARALLEL_IO
1080	hst_threads_preprocess_projection(NULL, NULL, proj, ctx);
1081	#endif /* ! HW_USE_PARALLEL_IO */
1082
1083	/*
1084	int i,j;
1085	int num_slices = ctx->preproc_slices;
1086	int num_bins = ctx->setup.num_bins;
1087	int num_proj = ctx->setup.num_projections;
1088	float norm = ctx->setup.norm;
1089
1090	// printf("%i, %i of %i, %i %f\n", num_slices, proj, num_proj, num_bins, norm);
1091	for (i = 0; i < num_slices; i++) {
1092	for (j = 0; j < num_bins; j++) {
1093	ctx->preproc_sinograms[i * num_bins * num_proj + proj * num_bins + j] = norm * data[i * num_bins + j];
1094	}
1095	}
1096	*/
1097	return 0;
1098	}
1099
1100
22 by csa Timing reporting, few bug fixes	1101	HSTReconstructorConstContextPtr *hst_get_configured_reconstructors(HSTContextPtr ctx) {
	1102	assert(ctx);
	1103
	1104	return (HSTReconstructorConstContextPtr*)ctx->recon;
	1105	}
	1106
43 by csa Option to disable preloading of image files, additional timers	1107	#ifdef PYHST_MEASURE_TIMINGS
	1108	double hst_get_io_timer(HSTContextPtr ctx) {
	1109	assert(ctx);
	1110	return ctx->io_timer;
	1111	}
	1112
	1113	double hst_get_recon_timer(HSTContextPtr ctx) {
	1114	assert(ctx);
	1115	return ctx->comp_timer;
	1116	}
	1117	#endif /* PYHST_MEASURE_TIMINGS */
	1118
	1119
49 by root Merge /home/matthias/dev/pyHST	1120	HSTContext *hst_create_context(void) {
18 by csa Big redesign (early commit)	1121	if (hst_copies) pyhst_fail("Only a single HST context is supported by the current version");
	1122
	1123	HSTContext ctx = (HSTContext)malloc(sizeof(HSTContext));
	1124	if (ctx) {
	1125	memset(ctx, 0, sizeof(HSTContext));
	1126	++hst_copies;
	1127	}
	1128	return ctx;
	1129	}
	1130
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1131	static int hst_threads_init(HWThread thr, void hwctx, int device_id, void data) {
	1132	int err;
	1133
	1134	HSTContext ctx = (HSTContext)data;
	1135
	1136	//printf("Init %i\n", device_id);
32 by csa Fix crash in FFTW3 initialization and cleanup in multi-threaded case	1137	err = hst_reconstructor_init(ctx->recon[device_id], thr);
	1138	check_code(err, "hst_reconstructor_init");
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1139
	1140	return 0;
	1141	}
	1142
133 by Suren A. Chilingaryan Pinned OpenCL memory	1143	void *hst_pinned_malloc(size_t size, HSTPinnedAccess a) {
	1144	#ifdef HW_USE_OPENCL
	1145	return hst_opencl_host_malloc(size, a);
	1146	#else /* OPENCL */
	1147	# ifdef HW_USE_CUDA
	1148	return hst_cuda_host_malloc(size, a);
	1149	# else /* USEGPU */
37 by csa Optimizations of EDF reading	1150	return malloc(size);
	1151	/*
	1152	Unfortunatelly, this demands ROOT priveleges (or non-standard system
	1153	configuration) and anyway not supported by CUDA 2.2, so there is only
	1154	way to allocate through CUDA (probably we should get results in our
	1155	personal buffers to avoid such problems.
	1156	if (ctx->result_tmpbuf) mlock(ctx->result, 1 * ctx->setup.num_x * ctx->setup.num_y * sizeof(float));
	1157	ctx->result_tmpbuf = mmap(NULL, 1 * ctx->setup.num_x * ctx->setup.num_y * sizeof(float), PROT_READ\|PROT_WRITE, MAP_SHARED\|MAP_ANONYMOUS\|MAP_LOCKED, -1, 0);
	1158	*/
133 by Suren A. Chilingaryan Pinned OpenCL memory	1159	# endif /* USEGPU */
133 by Suren A. Chilingaryan Pinned OpenCL memory	1160	#endif /* OPENCL */
37 by csa Optimizations of EDF reading	1161	}
	1162
	1163	void hst_pinned_free(void *ptr) {
133 by Suren A. Chilingaryan Pinned OpenCL memory	1164	#ifdef HW_USE_OPENCL
	1165	hst_opencl_host_free(ptr);
	1166	#else /* OPENCL */
	1167	# ifdef HW_USE_CUDA
37 by csa Optimizations of EDF reading	1168	hst_cuda_host_free(ptr);
133 by Suren A. Chilingaryan Pinned OpenCL memory	1169	# else /* USEGPU */
37 by csa Optimizations of EDF reading	1170	free(ptr);
133 by Suren A. Chilingaryan Pinned OpenCL memory	1171	# endif /* USEGPU */
133 by Suren A. Chilingaryan Pinned OpenCL memory	1172	#endif /* OPENCL */
37 by csa Optimizations of EDF reading	1173	}
37 by csa Optimizations of EDF reading	1174
18 by csa Big redesign (early commit)	1175	int hst_init_context(HSTContext ctx, HSTSetup setup, const float custom_angles, const float axis_corrections) {
18 by csa Big redesign (early commit)	1176	int err;
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1177	int i, limit, threads;
18 by csa Big redesign (early commit)	1178	size_t size;
	1179
	1180	assert(ctx);
	1181	assert(setup);
	1182
	1183	hst_setup_init(&ctx->setup);
20 by csa do more correct initialization	1184
18 by csa Big redesign (early commit)	1185	size = ((void)&setup->END_OF_GLOBAL_PART) - ((void)setup);
	1186	memcpy(&ctx->setup, setup, size);
	1187
20 by csa do more correct initialization	1188	ctx->setup.what_changed = HST_ALL_CHANGED;
20 by csa do more correct initialization	1189	hst_configure_data_structures(ctx, custom_angles, axis_corrections);
18 by csa Big redesign (early commit)	1190
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1191
	1192	# ifdef HW_USE_CUDA
168 by Suren A. Chilingaryan Placeholders for DFI implementation	1193	switch (setup->method) {
	1194	case HST_METHOD_FBP:
	1195	cuda_recon = cuda_fbp_recon;
	1196	break;
175 by Suren A. Chilingaryan Make DFI optional	1197	# ifdef PYHST_ENABLE_DFI
168 by Suren A. Chilingaryan Placeholders for DFI implementation	1198	case HST_METHOD_DFI:
	1199	cuda_recon = cuda_dfi_recon;
	1200	break;
175 by Suren A. Chilingaryan Make DFI optional	1201	# endif /* PYHST_ENABLE_DFI */
182 by Suren A. Chilingaryan Fix DFI reconstruction	1202	default:
	1203	pyhst_error("Unknown reconstruction mode specififed");
	1204	exit(-1);
168 by Suren A. Chilingaryan Placeholders for DFI implementation	1205	}
173 by Suren A. Chilingaryan Fix few bugs in scheduller causing crashes in non-threaded mode (still inoperational)	1206	#endif /* HW_USE_CUDA */
	1207
	1208	#ifdef HW_USE_THREADS
	1209	threads = 0;
	1210
	1211	# ifdef HW_USE_CUDA
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1212	threads += cuda_recon->devices;
	1213	# endif /* HW_USE_CUDA */
	1214
	1215	# ifdef HW_USE_OPENCL
	1216	threads += opencl_recon->devices;
	1217	# endif /* HW_USE_OPENCL */
	1218
	1219	# if HW_USE_CPU == 1
	1220	threads += cpu_recon->devices - 1; // one core reserved for data preloading
	1221	# elif defined(HW_USE_CPU)
	1222	if (!threads) threads += cpu_recon->devices - 1;
	1223	# endif /* HW_USE_CPU */
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1224	#else /* HW_USE_THREADS */
	1225	threads = 1;
	1226	#endif /* HW_USE_THREADS */
	1227
	1228	ctx->num_recon = threads;
22 by csa Timing reporting, few bug fixes	1229	ctx->recon = (HSTReconstructorContext*)malloc((ctx->num_recon + 1) sizeof(HSTReconstructorContext*));
18 by csa Big redesign (early commit)	1230	check_alloc(ctx->recon, "pool of HSTReconstructors");
18 by csa Big redesign (early commit)	1231
22 by csa Timing reporting, few bug fixes	1232	ctx->recon[ctx->num_recon] = NULL;
22 by csa Timing reporting, few bug fixes	1233
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1234	i = 0; limit = 0;
	1235	#ifdef HW_USE_CUDA
	1236	limit += cuda_recon->devices;
	1237	for (; (i < limit)&&(i < threads); i++) {
	1238	ctx->recon[i] = hst_reconstructor_create(cuda_recon, &ctx->setup, i);
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1239	check_alloc(ctx->recon[i], "GPUContext");
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1240	}
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1241	#endif /* HW_USE_CUDA */
49 by root Merge /home/matthias/dev/pyHST	1242
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1243	#ifdef HW_USE_OPENCL
	1244	limit += opencl_recon->devices;
	1245	for (; (i < limit) && (i < threads); i++) {
49 by root Merge /home/matthias/dev/pyHST	1246	ctx->recon[i] = hst_reconstructor_create(opencl_recon, &ctx->setup, i);
	1247	check_alloc(ctx->recon[i], "OpenCLContext");
	1248	}
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1249	#endif /* HW_USE_OPENCL */
49 by root Merge /home/matthias/dev/pyHST	1250
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1251	#ifdef HW_USE_CPU
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1252	for (; i < threads; i++) {
	1253	ctx->recon[i] = hst_reconstructor_create(cpu_recon, &ctx->setup, threads - i - 1);
	1254	check_alloc(ctx->recon[i], "CPUContext");
	1255	}
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1256	#endif /* HW_USE_CPU */
49 by root Merge /home/matthias/dev/pyHST	1257
58 by Suren A. Chilingaryan Re-engineering of OpenCL detection	1258	if (!i) pyhst_fail("no reconstructors found");
	1259
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1260	pyhst_warning("Running %i reconstructors\n", threads);
	1261
	1262	ctx->sched = hw_sched_create(threads);
	1263	check_alloc(ctx->sched, "scheduller");
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	1264
	1265	ctx->cpu_sched = hw_sched_create((cpu_recon->devices>1)?2:1);
	1266	// ctx->cpu_sched = hw_sched_create((cpu_recon->devices>1)?(cpu_recon->devices - 1):1);
	1267	check_alloc(ctx->cpu_sched, "cpu scheduller");
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1268
	1269	ctx->output_mutex = hw_sched_create_mutex();
	1270	//Can return NULL if threads are disabled
	1271	//check_alloc(ctx->output_mutex, "hw_sched_create_mutex");
	1272
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	1273	#ifdef HST_MULTISLICE_MODE
	1274	hw_sched_set_sequential_mode(ctx->sched, &ctx->num_slices, &ctx->current_slice, HW_SCHED_FLAG_FREE_CALL\|HW_SCHED_FLAG_TERMINATOR_CALL);
	1275	#else /* HST_MULTISLICE_MODE */
	1276	hw_sched_set_sequential_mode(ctx->sched, &ctx->num_slices, &ctx->current_slice, 0);
	1277	#endif /* HST_MULTISLICE_MODE */
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1278
	1279	err = hw_sched_schedule_thread_task(ctx->sched, ctx, hst_threads_init);
	1280	check_code(err, "hw_sched_schedule_task");
	1281
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	1282	hw_sched_set_sequential_mode(ctx->cpu_sched, &ctx->num_projections, &ctx->current_projection, 0);
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	1283
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1284	/*
	1285	for (i = 0; i < threads; i++) {
	1286	err = hst_reconstructor_init(ctx->recon[i]);
	1287	check_code(err, "hst_cpu_init_context");
	1288	}
	1289	*/
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	1290	ctx->projection_data = (float*)malloc(ctx->setup.num_projections sizeof(float*));
	1291	check_alloc(ctx->projection_data, "projection_data");
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1292
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	1293	#ifdef HST_MULTISLICE_MODE
	1294	ctx->result_tmpbuf = hst_pinned_malloc(ctx->setup.max_slices * ctx->setup.num_x * ctx->setup.num_y * sizeof(float), HST_PINNED_W);
	1295	#else /* HST_MULTISLICE_MODE */
133 by Suren A. Chilingaryan Pinned OpenCL memory	1296	ctx->result_tmpbuf = hst_pinned_malloc(threads * ctx->setup.num_x * ctx->setup.num_y * sizeof(float), HST_PINNED_W);
151 by Suren A. Chilingaryan Multislice mode: preload into the GPU memory complete slices	1297	#endif /* HST_MULTISLICE_MODE */
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1298	check_alloc(ctx->result_tmpbuf, "temporary buffer for reconstructed slices");
	1299
	1300	err = hw_sched_wait_thread_task(ctx->sched);
	1301	check_code(err, "hw_sched_wait_task");
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	1302
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1303	return 0;
	1304	}
	1305
	1306	static int hst_threads_free(HWThread thr, void hwctx, int device_id, void data) {
	1307	HSTContext ctx = (HSTContext)data;
	1308	hst_reconstructor_free(ctx->recon[device_id]);
20 by csa do more correct initialization	1309
18 by csa Big redesign (early commit)	1310	return 0;
	1311	}
	1312
	1313	void hst_free_context(HSTContext *ctx) {
	1314	int i;
	1315
	1316	assert(ctx);
20 by csa do more correct initialization	1317
37 by csa Optimizations of EDF reading	1318	hst_pinned_free(ctx->result_tmpbuf);
20 by csa do more correct initialization	1319
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	1320	if (ctx->projection_data) {
	1321	free(ctx->projection_data);
	1322	}
	1323
18 by csa Big redesign (early commit)	1324	if (ctx->recon) {
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1325	if (ctx->sched) {
	1326	hw_sched_execute_thread_task(ctx->sched, ctx, hst_threads_free);
	1327	}
	1328
18 by csa Big redesign (early commit)	1329	for (i = 0; i < ctx->num_recon; i++) {
	1330	if (ctx->recon[i]) hst_reconstructor_destroy(ctx->recon[i]);
	1331	}
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1332
18 by csa Big redesign (early commit)	1333	free(ctx->recon);
18 by csa Big redesign (early commit)	1334	}
20 by csa do more correct initialization	1335
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1336	if (ctx->output_mutex) {
	1337	hw_sched_destroy_mutex(ctx->output_mutex);
	1338	}
	1339
132 by Suren A. Chilingaryan Pipelined preprocessing (normalize & transpose)	1340	if (ctx->cpu_sched) {
	1341	hw_sched_destroy(ctx->cpu_sched);
	1342	}
	1343
30 by csa Multi-GPU, Multi-CPU, and Hybrid modes support	1344	if (ctx->sched) {
	1345	hw_sched_destroy(ctx->sched);
	1346	}
	1347
20 by csa do more correct initialization	1348	hst_setup_free(&ctx->setup);
18 by csa Big redesign (early commit)	1349
	1350	memset(ctx, 0, sizeof(HSTContext));
	1351	}
	1352
	1353	void hst_destroy_context(HSTContext *ctx) {
	1354	--hst_copies;
	1355	hst_free_context(ctx);
	1356	free(ctx);
	1357	}