Source

ReproducibleResearchTutorial / Vignette / ReproducibleResearch.Rnw

\documentclass[14pt]{beamer}
\usetheme{Warsaw}
\usepackage{hyperref}
\usepackage{verbatim}
%\usepackage{listings}
\newcommand{\Rfunction}[1]{{\texttt{#1}}}
\newcommand{\Rfunarg}[1]{{\texttt{#1}}}
\newcommand{\Robject}[1]{{\texttt{#1}}}
\newcommand{\Rpackage}[1]{{\textit{#1}}}
\newcommand{\Rclass}[1]{{\textit{#1}}}
\newcommand{\code}[1]{{\texttt{#1}}}
\newcommand{\software}[1]{{\textit{#1}}}
\SweaveOpts{png=true,format=png,pdf=true,cache=False,echo=True}

\title[Reproducible Research with R]{Reproducible Research with R}
\subtitle{Using knitr, Sweave, version control, and packages to improve reproducibility}
\author{Sean Davis}
\institute[NCI]{National Cancer Institute}

\begin{document}
\SweaveOpts{concordance=TRUE}


%------------------ Title 

\begin{frame}[plain]
  \titlepage
\end{frame}

\begin{frame}
\frametitle{Outline}
  \tableofcontents
\end{frame}

%------------------ Reproducible Research Section

\section{Reproducible Research}

\begin{frame}
\frametitle{What is Reproducible Research?}
\begin{exampleblock}{}
  {\large ``The term \textit{reproducible research} refers to the idea that the ultimate product of research is the paper along with the full computational environment used to produce the results in the paper such as the code, data, etc. necessary for reproduction of the results and building upon the research.''}
  \vskip5mm
  \hspace*\fill{\small--- Wikipedia}
\end{exampleblock}

Details available \href{http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4720217}{in this set of review articles}.
\end{frame}

\begin{frame}[plain]
\begin{figure}
\includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{Literate_Programming_book_cover.jpg}
\caption{The first description of \textit{Literate Programming} came from Donald Knuth in the 1970's}
\end{figure}
\end{frame}

\begin{frame}
\frametitle{What is Literate Programming?}
\begin{exampleblock}{}
{\small ``The \textit{literate programming paradigm}, represents a move away from writing programs in the manner and order imposed by the computer, and instead enables programmers to develop programs in the order demanded by the logic and flow of their thoughts.  Literate programs are written as an uninterrupted exposition of logic in an ordinary human language, much like the text of an essay, in which macros are included to hide abstractions and traditional source code.''}
  \vskip5mm
  \hspace*\fill{\small--- Wikipedia}
\end{exampleblock}
\end{frame}

\begin{frame}
\frametitle{Tangling and Weaving}
Literate programming tools are used to produce two products:
\begin{itemize}
\pause
\item{The \textit{tangled} code that is meant to be consumed by the computer to produce the results of the analysis.}
\pause
\item{The \textit{woven} document that renders the documentation, code, and results into a human-consumable format.}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Other Aspects of Reproducible Research}
\begin{itemize}
\item{Versioning of data \textit{and} code}
\item{Data and code availability}
\item{Data and code provenance}
\item{Dependency tracking}
\item{Documentation}
\end{itemize}
\end{frame}

%----------------------- Sweave Section

\subsection{Sweave}

\begin{frame}
\frametitle{What is Sweave?}
\begin{itemize}
\item{A literate programming tool for R.}
\item{Based on noweb markup and \LaTeX.}
\item{A set of tools for working with Rnw (RNoWeb) files.}
\end{itemize}
\end{frame}

\begin{frame}[fragile]
\scriptsize\verbatiminput{ExampleSweaveDocument.Rnw}
\normalsize
\end{frame}

\begin{frame}[fragile]{Running Sweave}
\begin{block}{Run from command line}
{\small\begin{verbatim}
R CMD Sweave ExampleSweaveDocument.Rnw
R CMD pdflatex ExampleSweaveDocument.tex
\end{verbatim}
}
\end{block}
\begin{block}{Run from within R}
{\small\begin{verbatim}
Sweave("ExampleSweaveDocument.Rnw")
system("R CMD pdflatex ExampleSweaveDocument.tex")
\end{verbatim}
}
\end{block}
\end{frame}


%----------------------- knitr Section

\subsection{knitr}

\begin{frame}
\frametitle{knitr Introduction}
\begin{exampleblock}{}
  {\large ``The knitr package was designed to be a transparent engine for dynamic report generation with R, solve some long-standing problems in Sweave, and combine features in other add-on packages into one package''}
  \vskip5mm
  \hspace*\fill{\small--- The knitr website}
\end{exampleblock}

\end{frame}

\begin{frame}[fragile]{knitr on an R script}
\begin{block}{Volcano.R}
{\small\begin{verbatim}
z <- 2 * volcano        # Exaggerate the relief
x <- 10 * (1:nrow(z))   # 10 meter spacing (S to N)
y <- 10 * (1:ncol(z))   # 10 meter spacing (E to W)
par(mar=rep(.5,4))
persp(x, y, z, theta = 120, phi = 15, scale = FALSE, axes = FALSE)
\end{verbatim}
}
\end{block}
\end{frame}

\begin{frame}[fragile]{knitr on R script}
\begin{block}{spin the R script}
\begin{verbatim}
# install.pacakges('knitr')
library(knitr)
spin('Volcano.R')
\end{verbatim}
\end{block}
This will produce:
\begin{itemize}
\item{Volcano.Rmd}
\item{Volcano.md}
\item{Volcano.html}
\end{itemize}
\end{frame}

\begin{frame}{knitr for literate programming}
\begin{itemize}
\item{Traditional Rnw files (used by Sweave) to produce latex/pdf.}
\item{Using the \href{http://daringfireball.net/projects/markdown/syntax}{markdown}-based .Rmd files}
\item{``spinning'' an R file to html}
\item{First ``spinning'' an R file to Rmd and then working on the Rmd file}
\end{itemize}
\end{frame}

\section{R Packages}

\begin{frame}{Advantages of R packages}
\begin{itemize}
\item{Standard packaging mechanism}
\item{Versioned}
\item{Maintains provenance}
\item{Documented}
\item{Can contain both code \textit{and} data}
\item{Tracks dependencies}
\item{Simplifies literate programming}
\end{itemize}
\end{frame}

\begin{frame}[fragile]{Create a simple package}
<<createpackage,size="scriptsize">>=
package.skeleton(code_files='../pubmed.R'
                 ,path='..',name='pubmedR',
                 force=TRUE)
@
This produces a directory, pubmedR that is an R package.
\end{frame}

\begin{frame}{Next steps}
\begin{itemize}
\item{Edit DESCRIPTION file}
\item{Edit .Rd files (documentation)}
\item{Add data files or further R code}
\item{R CMD check}
\item{R CMD INSTALL}
\item{SHARE!!!}
\end{itemize}
\end{frame}

%------------------- Version Control

\section{Version Control}

\begin{frame}[fragile]{What is Version Control?}
\begin{exampleblock}{}
  {``Revision control, also known as version control and source control (and an aspect of software configuration management), is the management of changes to documents, computer programs, large web sites, and other collections of information.''}
  \vskip5mm
  \hspace*\fill{\small--- Wikipedia}
\end{exampleblock}
\end{frame}

\begin{frame}{Why Version Control?}
Traditional Reasons:
\begin{itemize}
\item{maintain history of resources}
\item{allow tracking of changes with provenance}
\item{provide for parallel tracks of development (branching and merging)}
\end{itemize}
More recent reasons:
\begin{itemize}
\item{Social coding}
\item{Sharing code publicly}
\item{Automation of tasks based on code changes}
\end{itemize}
\end{frame}

\subsection{git}

\begin{frame}[fragile]{Git for version control}
\begin{exampleblock}{}
  {``Git is a free and open source distributed version control system designed to handle everything from small to very large projects with speed and efficiency.  Git is easy to learn and has a tiny footprint with lightning fast performance. It outclasses SCM tools like Subversion, CVS, Perforce, and ClearCase with features like cheap local branching, convenient staging areas, and multiple workflows.''}
  \vskip5mm
  \hspace*\fill{\small--- Git website}
\end{exampleblock}
\end{frame}

\section{Pulling it all together}

\begin{frame}[fragile]{Pulling it all together}
\url{https://bitbucket.org/seandavi/reproducibleresearchtutorial}
\begin{block}{Checkout the code}
\begin{verbatim}
git clone https://bitbucket.org/seandavi/reproducibleresearchtutorial.git
\end{verbatim}
\end{block}
\end{frame}



\begin{frame}[fragile]
<<sessionInfo,size="scriptsize">>=
sessionInfo()
@ 
\end{frame}

\end{document}