/articles/toma

To get this branch, use:
bzr branch http://darksoft.org/webbzr/articles/toma
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
\begin{algorithm}[htb]
\DontPrintSemicolon
\caption{\label{alg:newtex4_cc} The main loop of \algorithmname~\ref{alg:newtex4} modified to cache geometrical constants in the shared memory}
\KwIn { Similar to \algorithmname~\ref{alg:newtex4}, but projection constants $\gmem{c}_*$ are provided in global GPU memory}
\KwShMem { $\shmem{\vdata{c}_{cs}}[s_p]$, $\shmem{c_{a}}[s_p]$ }

\ForToBy{p_b}{0}{n_p}{s_p}{
  \tcc{Caching projection constants in shared memory}
  $m_l \eq \vy{m_t} \mul \vx{n_t} + \vx{m_t}$ \;
  $\shmem{\vdata{c}_{cs}}[m_l] \eq \vlist{\gmem{c_c}[p_b + m_l], \gmem{c_s}[p_b + m_l]}$ \;
  $\shmem{c_a}[m_l] \eq \gmem{c_a}[p_b + m_l]$ \;

  \KwSync \;

  \tcc{Computing partial sums}
  \ForToBy{p}{m_p}{min(s_p, n_p - p_b)}{4}{
    $c_s \eq \vy{\shmem{c_{cs}}[p]}$ \;
    $h \eq \shmem{c_a}[p] + \vx{f'_g} \mul \vx{\shmem{c_{cs}}[p]} - \vy{f'_g} \mul \vy{\shmem{c_{cs}}[p]} + \fconst{0.5}$ \;
    
	\ForTo{q}{0}{4}{
       $\vfloat{s}[q] \aeq $ \KwTex{$h$, $p_b + p + \fconst{0.5}$} \;
%       $h \seq 4 \mul \vy{\shmem{c_{cs}}[p]}$ \;
       $h \seq 4 \mul c_s$ \;
    }
  }

  \KwSync \;
}
\end{algorithm}