/articles/toma

To get this branch, use:
bzr branch http://darksoft.org/webbzr/articles/toma
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
\begin{algorithm}[htb]
\DontPrintSemicolon
\caption{\label{alg:newtex4} Optimized implementation of the back-projection kernel relaying on the texture engine to perform interpolation}
%\begin{algorithmic}
\KwIn {\algsimpleinput. Mappings $\ver{2}{\vdata{m}_t}$ and $m_p$ are computed as explained in \algorithmname~\ref{alg:newtex4remap}. }
\KwShMem {$\shmem{\vfloat{s}}[64][4]$, $\shmem{\vfloat{r}}[16][16]$}
\KwOut {Reconstructed slice $\gmem{\vfloat{r}}$}
\Begin {
%$\vdata{m}_g'' \eq \vdata{m}_b \vmul \vdata{n}_t + \vdata{m}_t''$ \;
%$m_l \eq \vy{m_t} \cdot \vx{n_t} + \vx{m_t}$ \;

\tcc{Computing pixel coordinates using the new mapping}
$\ver{2}{\vdata{m}_g} \eq \vdata{m}_b \vmul \vdata{n}_t + \ver{2}{\vdata{m}_t}$ \;
$\vdata{f}'_g \eq \ver{2}{\vdata{m}_g} - \vdata{v}_a$ \;

\tcc{Computing partial sums}
$\vfloat{s}[4] \eq \{0\}$ \;
\ForToBy{p}{m_p}{n_p}{4}{
    $c_s \eq \vy{\cmem{c_s}[p]}$ \;
    $h \eq \cmem{c_a}[p] + \vx{f'_g} \mul \cmem{c_c}[p] - \vy{f'_g} \mul \cmem{c_s}[p] + \fconst{0.5}$ \;
    
	\ForTo{q}{0}{4}{
       $\vfloat{s}[q] \aeq $ \KwTex{$h$, $p + \fconst{0.5}$} \;
       $h \seq 4 \mul c_s$ \;
    }
}

\tcc{Reduction}
$\ver{3}{\vdata{m}_t} =  \vlist{\vx{m_t} \imod 4, 4 \mul \vy{m_t} + \vx{m_t} \idiv 4}$ \;
\ForTo{q}{0}{4}{
  \tcc{Moving partial sums to shared memory}
  $\shmem{\vfloat{s}}[\vx{n_t} \mul \vy{\ver{2}{m_t}} + \vx{\ver{2}{m_t}}][m_p] \eq \vfloat{s}[q]$ \;
  \KwSync \;
  
  \tcc{Performing reduction}
  \CFor{i \eq 2}{i \geq 1}{i \deq 2}{
    \If{$\vx{\ver{3}{m_t}} < i$}{
        $\shmem{\vfloat{s}}[\vy{\ver{3}{m_t}}][\vx{\ver{3}{m_t}}] \aeq \shmem{\vfloat{s}}[\vy{\ver{3}{m_t}}][\vx{\ver{3}{m_t}}+i]$ \;
    }
    \KwFence \;
  }
  
  \tcc{To coalesce global memory writes, results are grouped in shared memory}
  \If{$\vx{\ver{3}{m_t}} \ifeq 0$} {
     $\shmem{\vfloat{r}}[4 \mul q + \vy{\ver{3}{m_t}} \idiv 16][\vy{\ver{3}{m_t}} \imod 16] \eq \shmem{\vfloat{s}}[\vy{\ver{3}{m_t}}][0]$ \;
  }
  \KwSync \;
}
$\gmem{\vfloat{r}}[\vy{m_g}][\vx{m_g}] \eq \shmem{\vfloat{r}}[\vy{m_t}][\vx{m_t}]$  \;
}
\end{algorithm}