Wiki

Clone wiki

RSparkSeq / Examples

1. To get gene counts:

rcont<-RSparkContext(master="mesos://sparkseq001.cloudapp.net:5050",executor="hdfs:///frameworks/spark/0.9.0/spark-0.9.0-incubating-hadoop_1.2.1-bin.tar.gz",
sparkJar="sparkseq-core-assembly-0.1-SNAPSHOT.jar",sparkMesosCoarse=TRUE, debug=TRUE)

seqAnalysis<-RSparkSeqAnalysis(rcont,"hdfs://sparkseq002.cloudapp.net:9000/BAM/64MB/derfinder/chrom_Y/M/orbFrontalF1_Y.bam",1,1,1,debug=TRUE)
regionHashMap(seqAnalysis) <-"hdfs://sparkseq002.cloudapp.net:9000/BAM/64MB/aux/Homo_sapiens.GRCh37.74_genes_chr_merged_swap.bed"

samplesID<-c(11, 32, 3, 42, 43, 47, 53, 58)
for(i in samplesID) addBAMFile(seqAnalysis,c(paste("hdfs://sparkseq002.cloudapp.net:9000/BAM/64MB/derfinder/chrom_Y/M/orbFrontalF",as.character(i) ,"_Y.bam",sep=""),i) )

genes<-geneCounts(seqAnalysis)
> genes[1:5,]
          Feature Sample_1 Sample_3 Sample_11 Sample_32 Sample_42 Sample_43 Sample_47 Sample_53 Sample_58
1 ENSG00000012817     8720     9103      8585      7375      5387      5835      7669      8175      7258
2 ENSG00000067048     6321     8647      5325      5598      3468      4140      4376      6827      4335
3 ENSG00000067646     1354     1499      1691       889       977       558       872      1128      1004
4 ENSG00000092377       75       48        31        61        29        10        31        26        42
5 ENSG00000099715      291      418       158       229        42       128       125       145       303

2. To get exon counts you need only to change BED file:

regionHashMap(seqAnalysis) <-"hdfs://sparkseq002.cloudapp.net:9000//BAM/64MB/aux/Homo_sapiens.GRCh37.74_exons_chr_sort_uniq.bed"

exons<-exonCounts(seqAnalysis)
> exons[1:5,]
          Feature Sample_1 Sample_3 Sample_11 Sample_32 Sample_42 Sample_43 Sample_47 Sample_53 Sample_58
1 ENSE00000652498      186      251       135       168       131       140       100       198       149
2 ENSE00000652501      148      211       136       156       130        83       140       230       171
3 ENSE00000652502      207      383       173       198        83       129       188       176       169
4 ENSE00000652503      215      384       182       265        93       136       177       161       135
5 ENSE00000652506      112      196        87       185        73        61       120       101       1

Updated