/articles/toma

To get this branch, use:
bzr branch http://darksoft.org/webbzr/articles/toma
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
\begin{table}[htb]
\caption{\label{table:alg_prms} List of parameters used in code snippets}
\begin{tabularx}{\columnwidth} { llX }
\hline
Var          	    & Type     & Description \\
\hline
$n_p$        	    & int      & Number of projections \\
$n_v$               & int      & Number of slices reconstructed in parallel \\
$n_q$               & int      & Number of pixels assigned per GPU thread \\
%$n_o$               & int      & Oversampling factor \\
$n_s$               & int      & The side of a pixel square reconstructed by a thread block \\
$\vdata{n}_t$	    & int2     & Dimensions of thread block \\
\hline
$s_p$               & int      & Size of the larger projection block, indicates the size of caches holding projection constants and $h_m$ values \\
$s_d$               & int      & Size of data cache, specifies how many projection lines are cached \\
$s_t$               & int      & Number of threads assigned to cache a projection row, see \sectionname~\ref{section:alurec_shmem} and \tablename~
\ref{tbl:shmemconf} \\
$s_i$               & int      & Iterations required to completely cache a projection row (determined based $\vdata{n}_t$, $s_t$, and the used caching optimizations as explained in \sectionname~\ref{section:alurec_shmem}) \\
%$s_u$               & int      & Unrolling hint of inner projection loop \\
\hline
$\vdata{v}_a$  	    & float2   & The position of rotation axis \\
\hline
$c_c$        	    & float[]  & Constant array storing cosine values of the projection angles \\
$c_s$        	    & float[]  & Constant array storing sine values of the projection angles \\
$\vdata{c}_{cs}$    & float2[] & Constant array storing (cosine, sine) pairs for each projection angle \\
$c_a$        	    & float[]  & Constant array storing coordinate of the rotational axis with applied per projection correction to compensate for possible mechanical displacements \\
$c_m$               & float[]  & Constant array storing coefficients required to quickly compute $h_m$ \\
\hline
\end{tabularx}
\end{table}

\begin{table}[htb]
\caption{\label{table:alg_idxs} List of indexes used in code snippets}
\begin{tabularx}{\columnwidth} { llX }
\hline
Var          	    & Type     & Description \\
\hline
$\vdata{m}_b$   	& int2     & The index of a thread block within the computation grid. Referred as \emph{blockIdx} in CUDA or \emph{get\_group\_id()} in OpenCL \\
$\vdata{m}_t$   	& int2     & The index of a thread with the thread block. Referred as \emph{threadIdx} in CUDA or \emph{get\_local\_id()} in OpenCL \\
$\vdata{m}_g$   	& int2     & The index of a thread within the computation grid, i.e. $\vdata{m}_b \vmul \vdata{n}_t + \vdata{m}_t$ \\

$\vdata{m}_*'$  	& int2     & The re-mapped index, the number is specified in superscript if multiple mappings are used \\ 
\hline
$\vdata{f}_g^*$	    & float2   & The absolute coordinates of the reconstructed pixel according to the selected mapping, usually: $\vdata{f}'_g = \vdata{m}'_g - \vdata{v}_a$ \\
$\vdata{f}_b$	    & float2   & The absolute coordinates of a pixel block (i.e. coordinates of the pixel processed by the first thread of the block) \\
\hline
$m_p$        	    & int      & For algorithms processing multiple projections in parallel, it defines a projection index in a group \\
$m_d$               & int      & For algorithms caching the sinogram in shared memory, this is a mapping selecting offset in the cache \\
$m_l$        	    & int      & Linear addressing of threads in the thread block ($\vy{m_t} \mul \vx{n_t} + \vx{m_t}$). It is another mapping used for caching constants. \\
\hline
\end{tabularx}
\end{table}

\begin{table}[htb]
\caption{\label{table:alg_vars} List of variables used in code snippets}
\begin{tabularx}{\columnwidth} { S{l}lX }
\hline
Var          	    & Type     & Description \\
\hline
$h$          	    & float    & The required projection bin (including offset from the center) \\
%$h'$                & float    & Partially pre-computed projection bin \\
$h_i$               & int      & The position of the required projection bin in the cache \\
$h_f$               & float    & The floating-point representation of $h_i$ \\
$h_l$               & float    & The offset from the center of bin (i.e. coefficient for linear interpolation) \\
$h_b$               & float    & The bin required by the first thread of the block \\
$h_m$        	    & float[]  & The smallest bin required by a thread block in the selected projection row \\
$h_x$               & float[][]& The cache storing the value of $c_a + \vx{f_g} \mul c_c - h_m$ for each column of pixels processed by a thread block (and for each of $s_d$ cached projections) \\

\hline
$p_*$          	    & int      & Projection number ($p$) and projection iterators ($p_b$, $p_i$) \\
$q_*$    	        & int      & Pixel block iterators \\

\hline
$\vfloat{d}$        & float[][]& The cache storing a subset of sinogram required to process $s_d$ projections for the current thread block \\
$\vfloat{s}$ 	    & float[]  & Variable accumulating the impact of the projections. Defined as array if the thread is responsible for multiple pixels. \\
$\vfloat{r}$ 	    & float[][]& The reconstructed slice \\
\hline
\end{tabularx}
\end{table}


\begin{table}[htb]
\caption{\label{table:alg_ss} Memory Domains}
\begin{tabularx}{\columnwidth} { S{l}X }
\hline
Superscript   & Domain \\
\hline
$\gmem{\cdot}$     & Variable in global GPU memory \\
$\cmem{\cdot}$     & Variable in constant memory \\
$\shmem{\cdot}$    & Variable in shared memory \\
\hline
\end{tabularx}
\end{table}