1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
\begin{algorithm}[htb]
\DontPrintSemicolon
\caption{\label{alg:newtex4} Optimized implementation of the back-projection kernel relaying on the texture engine to perform interpolation}
%\begin{algorithmic}
\KwIn {\algsimpleinput. Mappings $\ver{2}{\vdata{m}_t}$ and $m_p$ are computed as explained in \algorithmname~\ref{alg:newtex4remap}. }
\KwShMem {$\shmem{\vfloat{s}}[64][4]$, $\shmem{\vfloat{r}}[16][16]$}
\KwOut {Reconstructed slice $\gmem{\vfloat{r}}$}
\Begin {
%$\vdata{m}_g'' \eq \vdata{m}_b \vmul \vdata{n}_t + \vdata{m}_t''$ \;
%$m_l \eq \vy{m_t} \cdot \vx{n_t} + \vx{m_t}$ \;
\tcc{Computing pixel coordinates using the new mapping}
$\ver{2}{\vdata{m}_g} \eq \vdata{m}_b \vmul \vdata{n}_t + \ver{2}{\vdata{m}_t}$ \;
$\vdata{f}'_g \eq \ver{2}{\vdata{m}_g} - \vdata{v}_a$ \;
\tcc{Computing partial sums}
$\vfloat{s}[4] \eq \{0\}$ \;
\ForToBy{p}{m_p}{n_p}{4}{
$c_s \eq \vy{\cmem{c_s}[p]}$ \;
$h \eq \cmem{c_a}[p] + \vx{f'_g} \mul \cmem{c_c}[p] - \vy{f'_g} \mul \cmem{c_s}[p] + \fconst{0.5}$ \;
\ForTo{q}{0}{4}{
$\vfloat{s}[q] \aeq $ \KwTex{$h$, $p + \fconst{0.5}$} \;
$h \seq 4 \mul c_s$ \;
}
}
\tcc{Reduction}
$\ver{3}{\vdata{m}_t} = \vlist{\vx{m_t} \imod 4, 4 \mul \vy{m_t} + \vx{m_t} \idiv 4}$ \;
\ForTo{q}{0}{4}{
\tcc{Moving partial sums to shared memory}
$\shmem{\vfloat{s}}[\vx{n_t} \mul \vy{\ver{2}{m_t}} + \vx{\ver{2}{m_t}}][m_p] \eq \vfloat{s}[q]$ \;
\KwSync \;
\tcc{Performing reduction}
\CFor{i \eq 2}{i \geq 1}{i \deq 2}{
\If{$\vx{\ver{3}{m_t}} < i$}{
$\shmem{\vfloat{s}}[\vy{\ver{3}{m_t}}][\vx{\ver{3}{m_t}}] \aeq \shmem{\vfloat{s}}[\vy{\ver{3}{m_t}}][\vx{\ver{3}{m_t}}+i]$ \;
}
\KwFence \;
}
\tcc{To coalesce global memory writes, results are grouped in shared memory}
\If{$\vx{\ver{3}{m_t}} \ifeq 0$} {
$\shmem{\vfloat{r}}[4 \mul q + \vy{\ver{3}{m_t}} \idiv 16][\vy{\ver{3}{m_t}} \imod 16] \eq \shmem{\vfloat{s}}[\vy{\ver{3}{m_t}}][0]$ \;
}
\KwSync \;
}
$\gmem{\vfloat{r}}[\vy{m_g}][\vx{m_g}] \eq \shmem{\vfloat{r}}[\vy{m_t}][\vx{m_t}]$ \;
}
\end{algorithm}
|