1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
\begin{table}[htb] %[htbp]
\begin{threeparttable}
\caption{\label{tbl:shmemconf} The optimal parameters to prevent shared memory bank conflicts in ALU-based reconstruction kernel}
\centering
\noindent
%\resizebox{\columnwidth}{!}{\begin{tabular}{} ... \end{\tabular}}
\begin{tabularx}{\columnwidth}{ | c | l | l | l | X | }
\multicolumn{5}{l}{\textbf{Standard Caching Mode} (see \sectionname~\ref{section:alurec_shmem})} \\
\hline
Area & $n_v$ & Platform & Threads & Optimizations \\
\hline
\multirow{6}{*}{32x32}
& \multirow{2}{*}{1}
& 32-bit & 16 & - \\
& & 64-bit & 8 & write64 \\
\cline{2-5}
& \multirow{2}{*}{2}
& 32-bit & 16 & - \\
& & 64-bit & 16 & - \\
\cline{2-5}
& \multirow{2}{*}{4}
& AMD \& Fermi & 16 & double-buffer \\
& & Kepler+ & 16 & - \\
\hline
\multirow{6}{*}{64x64}
& \multirow{2}{*}{1}
& 32-bit & 32 & - \\
& & 64-bit & 16 & write64 \\
\cline{2-5}
& \multirow{2}{*}{2}
& 32-bit & 32 & - \\
& & 64-bit & 32 & - \\
\cline{2-5}
& \multirow{2}{*}{4}
& AMD \& Fermi & 32 & double-buffer \\
& & Kepler+ & 32 & - \\
\hline
\multicolumn{5}{l}{} \\
\multicolumn{5}{l}{\textbf{Advanced Caching Mode} (see \sectionname~\ref{section:ycache})} \\
\hline
Area & $n_v$ & Platform & Threads & Optimizations \\
\hline
\multirow{1}{*}{32x32}
& \multirow{1}{*}{1}
& All & 16 & - \\
\hline
\multirow{1}{*}{64x64}
& \multirow{1}{*}{1}
& All & 32 & - \\
\hline
\end{tabularx}
\begin{tablenotes}
\item For each considered configuration, the number of threads per projection row and the required optimizations are specified. The \emph{double-buffer} optimization splits the shared memory cache in 2 parts to prevent bank conflicts on the NVIDIA Fermi and all considered AMD architectures. The \emph{write64} optimization combines two writes to shared memory to use full bandwidth of Kepler GPUs.
\end{tablenotes}
\end{threeparttable}
\end{table}
|