1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
\begin{algorithm}[htb]
\DontPrintSemicolon
\caption{\label{alg:split} Modification of \algorithmname~\ref{alg:alurec} to split the 4-slice cache as required on Fermi and AMD architectures }
\Begin{
\ForTo{i}{0}{s_i}{
$h \eq i \mul s_t + m_d$ \;
$\vfloat{d} \eq $ \KwTex{$h_m + h + \fconst{0.5}$, $p + \fconst{0.5}$} \;
$\shmem{\vdata{d}}_1[m_p][h] \eq (float2)\vlist{\vx{d}, \vy{d}}$ \;
$\shmem{\vdata{d}}_2[m_p][h] \eq (float2)\vlist{\vz{d}, \vw{d}}$ \;
}
\KwDots {
$\vfloat{d}_1 \eq (float4)\vlist{\shmem{\vdata{d}}_1[p_i][h_i], \shmem{\vdata{d}}_2[p_i][h_i]} $ \;
$\vfloat{d}_2 \eq (float4)\vlist{\shmem{\vdata{d}}_1[p_i][h_i + 1], \shmem{\vdata{d}}_2[p_i][h_i + 1]} $\;
$\vfloat{d}_2 \eq \vfloat{d}_2 - \vfloat{d}_1$ \;
}
}
\end{algorithm}
|