Source

RNA-seq pipeline / rna.sample.conf

Full commit
# Custom part
# ======================================================================

[SPECIE]
# Which version of static library to use for tools, 'mm9' for mouse
# and 'hg19' for human. For more information, see the 'static library'
# region below.

VERSION = hg19
[Scripts]

ROOTDIR=/mnt/Storage/home/sunhf/sandbox/pipeline/
[Data]
# Define a variable with a fastq path, which is used in [comparison]
# section below

micro=/mnt/Storage/home/sunhf/sandbox/pipeline/for_test/micro.fastq
micro2=/mnt/Storage/home/sunhf/sandbox/pipeline/for_test/micro2.fastq
micro3=/mnt/Storage/home/sunhf/sandbox/pipeline/for_test/micro3.fastq

[comparison]
# Create a comparison for more analysis, such as differential genes
# finding. Use ":" between two names which is defined in [define]
# section to creat a comarison. Use "," between names in the same
# group, for example, if there are 3 replicates,write:
# "Sample1vsSample2 = sample1:sample2,sample2.rep1,sample2.rep2"
# DON'T use SPACEBAR near commas and the colon.

singlevsdouble=micro,micro2:micro3
selfvsself=micro:micro
self3vsself:micro,micro2,micro3:micro
svs=micro:micro

[Steps]
# Custom the usage to use tools, don't reverse the order in the list
# for first run


# The dependency of tools look like this
#
# fastqc_raw <= None
# fastqc_mapped <= tophat
# tophat <= None
# gfold <= tophat
# cufflinks <= tophat
# summary_rpkm <= cufflinks
# summary_mapped <= tophat
# bam2bed <=tophat

# a complete step order may be like the following
steps = fastqc_raw,tophat,fastqc_mapped,summary_mapped,gfold,cufflinks,summary_rpkm,bam2bed



[Output]
# which directory you want to use to output

OUTPUTDIR = /mnt/Storage/home/sunhf/databox/rna/strict_test

# Static data
# ====================================================================

[hg19]
# Bowtie genome sequence index path. If in PATH, there 
# are files hg19.1.ebwt, hg19.2.ebwt and so on, and hg19.rev.1.ebwt,
# hg19.rev.2.ebwt, write PATH/hg19
# Used by tophat.

BOWTIE_GENOME_INDEX_PATH = /mnt/Storage/data/Bowtie/hg19

# Gene transer format file, which has the information of every gene id
# and its own transcript id.
# Used by tophat, cufflinks

GTF = /mnt/Storage/data/RefGene/hg19.gtf

# Used by gfold
GPF = /mnt/Storage/data/RefGene/hg19.gpf

# RefFlat gene annotation file
# Used by DEGseq
REFFLAT = /mnt/Storage/data/Annotation/hg19.refflat

[mm9]

BOWTIE_GENOME_INDEX_PATH = /mnt/Storage/data/Bowtie/mm9

GTF = /mnt/Storage/data/RefGene/mm9.gtf

# Used by gfold
GPF = /mnt/Storage/data/RefGene/mm9.gpf

REFFLAT = /mnt/Storage/data/Annotation/mm9.refflat


# Tool config
# ====================================================================

[fastqc]
# Settings for fastqc quality control tool

main = /opt/bin/fastqc

[tophat]
# Settings for tophat alignment

main = /opt/bin/tophat
extend_option =
bam2bed = /opt/bin/bedtools/bamToBed

[cufflinks]
# Settings for cufflinks transcripts finding
main = /opt/bin/cufflinks

[gfold]
# Settings for gfold differential genes finding
main = /opt/bin/gfold
samtools = /opt/bin/samtools


# Advance config
# ====================================================================

[Performance]

process_number = 5


# To do in next version
# ======================================================================


[pairdefine]
# Define a pair of pair-end sequencing data, which is used in
# [comparison] section 
mate_inner_distance=150

[strict]
# If this option is set to True
# The program will stop directly when it meets a warning about
# dependency, else it will just display a warning and continue.
strict=False