/articles/toma

To get this branch, use:
bzr branch http://darksoft.org/webbzr/articles/toma
47 by Suren A. Chilingaryan
The conceptual part of transaction paper
1
\begin{algorithm}[htb]
2
\DontPrintSemicolon
3
\caption{\label{alg:texrec_pascal} Optimized implementation of the back-projection kernel relaying on the texture engine to perform interpolation}
4
%\begin{algorithmic}
5
\KwIn {Texture, projection constants~($\cmem{c}_*$),  dimensions~($n_*$), cache sizes~($s_*$), and other parameters~($v_*$)}
6
\KwShMem {$\shmem{\vfloat{s}}[64][4]$, $\shmem{\vfloat{r}}[16][16]$}
7
\KwOut {Reconstructed slice $\gmem{\vfloat{r}}$}
8
\Begin {
53 by Suren A. Chilingaryan
transact: partially apply proofs by Evelina
9
\tcc{Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant}
47 by Suren A. Chilingaryan
The conceptual part of transaction paper
10
$square \eq \vy{m_t} \imod 4$ \;
11
$quadrant \eq \vx{m_t} \idiv 4$ \;
12
$pixel \eq \vx{m_t} \imod 4$ \;
13
53 by Suren A. Chilingaryan
transact: partially apply proofs by Evelina
14
\tcc{Computing projection and pixel offsets}
47 by Suren A. Chilingaryan
The conceptual part of transaction paper
15
$m_p \eq \vy{m_t} \idiv 4$ \;
16
$\vx{m_t'} \eq 4 \mul square + 2 \mul (quadrant \imod 2) +  (pixel \imod 2) $ \;
17
$\vy{m_t'} \eq 2 \mul (quadrant \idiv 2) +  (pixel \idiv 2) $ \;
18
53 by Suren A. Chilingaryan
transact: partially apply proofs by Evelina
19
\tcc{Computing pixel coordinates}
47 by Suren A. Chilingaryan
The conceptual part of transaction paper
20
$\vdata{m}'_g \eq \vdata{m}_b \vmul \vdata{n}_t + \vdata{m}'_t$ \;
21
$\vdata{f}'_g \eq \vdata{m}'_g - \vdata{v}_a$ \;
22
53 by Suren A. Chilingaryan
transact: partially apply proofs by Evelina
23
\tcc{Computing partial sums}
47 by Suren A. Chilingaryan
The conceptual part of transaction paper
24
$\vfloat{s}[4] \eq \{0\}$ \;
25
\ForToBy{p}{m_p}{n_p}{4}{
26
    $c_s \eq \vy{\cmem{c_s}[p]}$ \;
27
    $h \eq \cmem{c_a}[p] + \vx{f'_g} \mul \cmem{c_c}[p] - \vy{f'_g} \mul \cmem{c_s}[p] + \fconstf{0.5}$ \;
28
    
29
	\ForTo{q}{0}{4}{
30
       $\vfloat{s}[q] \aeq $ \KwTex{$h - 4 \mul q \mul c_s$, $p + \fconstf{0.5}$} \;
31
    }
32
}
33
34
\tcc{Reduction}
35
$\vdata{m}_t'' =  \vlist{\vx{m_t} \imod 4, 4 \mul \vy{m_t} + \vx{m_t} \idiv 4}$ \;
36
37
\ForTo{q}{0}{4}{
38
    \tcc{Moving partial sums to shared memory}
39
    $\shmem{\vfloat{s}}[\vx{n_t} \mul \vy{m_t'} + \vx{m_t'}][m_p] \eq \vfloat{s}[q]$ \;
40
    \KwSyncThreads \;
41
  
42
    \tcc{Performing reduction}
43
    $\vfloat{r} \eq \shmem{\vfloat{s}}[\vy{m_t'}][\vx{m_t'}]$ \;
44
    \CFor{i \eq 2}{i \geq 1}{i \deq 2}{
45
        $\vfloat{r} \aeq$ \KwShflXor{$\vfloat{r}$, $i$, $4$} \;   
46
    }
47
  
53 by Suren A. Chilingaryan
transact: partially apply proofs by Evelina
48
    \tcc{Grouping results in shared memory to coalesce global memory writes}
47 by Suren A. Chilingaryan
The conceptual part of transaction paper
49
    \If{$\vx{m_t''} \ifeq 0$} {
50
        $\shmem{\vfloat{r}}[4 \mul q + \vy{m_t''} \idiv 16][\vy{m_t''} \imod 16] \eq\vfloat{r}$ \;
51
    }
52
    \KwSyncThreads \;
53
}
54
55
$\gmem{\vfloat{r}}[\vy{m_g}][\vx{m_g}] \eq \shmem{\vfloat{r}}[\vy{m_t}][\vx{m_t}]$  \;
56
}
57
\end{algorithm}