Commits

Dimitris Leventeas  committed 21dedea

Cuda included

  • Participants
  • Parent commits 584f994

Comments (0)

Files changed (2)

File parallel prefix sum.pdf

Binary file modified.

File parallel prefix sum.tex

 
 \end{frame}
 
+\section{CUDA}
+
+\subsection{Preliminaries}
+
+\begin{frame}
+    \frametitle{Basic concepts}
+
+    \begin{definition}[Basic concepts]
+        \begin{enumerate}
+            \item \emph{thread:} concurrent code and associated state executed on the CUDA device (in parallel with other threads)
+            \item \emph{warp:} a group of threads executed physically in parallel.
+        \end{enumerate}
+
+    \end{definition}
+
+\end{frame}
+
+\begin{frame}
+    \frametitle{Basic concepts (2)}
+
+    \begin{definition}[Data banks and conflicts]
+        \begin{itemize}
+            \item Memory is divided in data banks.
+            \item Successive 32-bit words $\rightarrow$ successive banks.
+            \item G80 (16 banks) $\rightarrow bank = memory \, address\, mod\, 16$.
+            \item $1$ bank access to $1$ dataset.
+            \item More datasets in the same bank $\rightarrow$ serialization.
+            \item Bank conflict.
+            \item Exception: broadcast (same address to all).
+        \end{itemize}
+
+    \end{definition}
+
+\end{frame}
+
+\begin{frame}
+    \frametitle{Data bank}
+
+    \resizebox{\textwidth}{!}{
+    \begin{tabular}{|l|c c c c | c c c c | c c c c | c}
+      \hline
+      Bank    & \multicolumn{4}{c}{1} & \multicolumn{4}{c}{2} & \multicolumn{4}{c}{3} & \dots \\ \hline
+      Address &  0 & 1  &  2 &  3     &  4 & 5  & 6  & 7      &  8 &  9 & 10 & 11     & \dots \\ 
+      Address & 64 & 65 & 66 & 67     & 68 & 69 & 70 & 71     & 72 & 73 & 74 & 75     & \dots \\
+      Address & \multicolumn{4}{c}{\dots} & \multicolumn{4}{c}{\dots} & \multicolumn{4}{c}{\dots} & \dots \\ \hline
+    \end{tabular}
+    }
+\end{frame}
+
+
+
 \end{document}