Wiki
Clone wikisparkseq / Pipelines
Under development
Feature counts with filtering
import pl.elka.pw.sparkseq.seqAnalysis.SparkSeqAnalysis
import pl.elka.pw.sparkseq.conversions.SparkSeqConversions
import pl.elka.pw.sparkseq.util.SparkSeqRegType._
/*initiate a SparkSeqJunctionAnalysis object and attach samples to it*/
val seqAnalysis = new SparkSeqAnalysis(sc,"/BAM/64MB/derfinder/chrom_Y/M/orbFrontalF1_Y.bam",1,1,1)
val caseId = Array(11, 32, 3, 42, 43, 47, 53, 58)
for(i<-caseId)
seqAnalysis.addBAM(sc,"/BAM/64MB/derfinder/chrom_Y/M/orbFrontalF"+i.toString+"_Y.bam",i,1)
/*filter out reads with low quality, i.e. PHRED > 30*/
seqAnalysis.filterMappingQuality(_ > 30)
/*CIGAR regular expression for filtering out alignment gaps > 5000bp */
val cigRegex=
"^([0-9]+[MEQISXDHP]+)+((0*([0-9]{1,3}|[1-4][0-9]{3}))?N?[0-9]+M[0-9]*[EQISXDHP]*){0,3}[0-9]*[MEQISXDHP]*$".r
seqAnalysis.filterCigarString(cigRegex.pattern.matcher(_).matches)
/* get feature counts */
/*exons*/
val genExonsMapB = sc.broadcast(SparkSeqConversions.BEDFileToHashMap(sc,
"/BAM/64MB/aux/Homo_sapiens.GRCh37.74_exons_chr_sort_uniq.bed"))
val exonCounts = seqAnalysis.getCoverageRegion(genExonsMapB, unionMode=true)
/*genes*/
val genesMapB = sc.broadcast(SparkSeqConversions.BEDFileToHashMap(sc,
"/BAM/64MB/aux/Homo_sapiens.GRCh37.74_genes_chr_merged_swap.bed"))
val genesCount = seqAnalysis.getCoverageRegion(genesMapB, unionMode=true)
Junctions counts with filtering
import pl.elka.pw.sparkseq.seqAnalysis.SparkSeqAnalysis
import pl.elka.pw.sparkseq.conversions.SparkSeqConversions
import pl.elka.pw.sparkseq.util.SparkSeqRegType._
import pl.elka.pw.sparkseq.junctions.SparkSeqJunctionAnalysis
/*initiate a SparkSeqJunctionAnalysis object and attach samples to it*/
val seqAnalysis = new SparkSeqAnalysis(sc,"/BAM/64MB/derfinder/chrom_Y/M/orbFrontalF1_Y.bam",1,1,1)
val caseId = Array(11, 32, 3, 42, 43, 47, 53, 58)
for(i<-caseId)
seqAnalysis.addBAM(sc,"/BAM/64MB/derfinder/chrom_Y/M/orbFrontalF"+i.toString+"_Y.bam",i,1)
/*filter out reads with low quality, i.e. PHRED > 30*/
seqAnalysis.filterMappingQuality(_ > 30)
/*CIGAR regular expression for filtering out alignment gaps > 5000bp */
val cigRegex=
"^([0-9]+[MEQISXDHP]+)+((0*([0-9]{1,3}|[1-4][0-9]{3}))?N?[0-9]+M[0-9]*[EQISXDHP]*){0,3}[0-9]*[MEQISXDHP]*$".r
seqAnalysis.filterCigarString(cigRegex.pattern.matcher(_).matches)
/*get junction reads*/
val juncAnalysis = new SparkSeqJunctionAnalysis(seqAnalysis)
val juncReadsCount = juncAnalysis.getJuncReadsCounts().filter(j=>(j._2 > 10))
val juncResults = juncReadsCount
.map(j=>(SparkSeqConversions.splitSampleID(j._1._1),SparkSeqConversions.splitSampleID(j._1._2),j._2) )
.map(j=>((j._1._1,SparkSeqConversions.idToCoordinates(j._1._2),
SparkSeqConversions.idToCoordinates(j._2._2)),j._3 ))
Updated