Source

utils / makeMake.sh

Full commit
#!/bin/bash

# Look for binaries we need, exit if not found

BWA=`which bwa`
SAMTOOLS=`which samtools`
BOWTIE2=`which bowtie2`
TOPHAT=`which tophat`
NOVO=`which novoalign`
aligner=$1

INDEXPATH=/usr/local/scratch/genomes

if [ $aligner == 'bwa' ]
then
  ext='bwt'
elif [ $aligner == 'bowtie2' ]
then
  ext='bt2'
elif [ $aligner == 'tophat' ]
then
  ext='ebwt'
else
  ext='nix'
fi

[[ -z ${NOVO} ]] && [[ $aligner == "novoalign" ]] &&  exit 1
[[ -z ${BWA} ]] &&  [[ $aligner == "bwa" ]] && exit 1
[[ -z ${BOWTIE2} ]] &&  [[ $aligner == "bowtie2" ]] && exit 1
[[ -z ${TOPHAT} ]] &&  [[ $aligner == "tophat" ]] && exit 1
[[ -z ${SAMTOOLS} ]] && exit 1


# Use the SampleSheet.csv to create the script and align
# SampleSheets has a header we want to skip

nlines=(`wc -l SampleSheet.csv`)
while read line
do
  # There may be reasons to skip this line... i.e. when there's no indexed genome 
  Skip=1

  # Split the SampleSheet line using commas, store values we need (ID, Lane, Genome and Index)
  IFS=","
  lineArray=($line)
  FCID=${lineArray[0]}
  Lane=`printf "%03d" ${lineArray[1]}`
  ID=${lineArray[2]}
  Genome=${lineArray[3]}
  if [ -z ${lineArray[4]} ]
  then 
    # If there's no index, set it to NoIndex, just like Illumina does
    Index="NoIndex"
  else 
    Index=${lineArray[4]}
  fi
  
  # Create the Experiment name, this is also the name Illumina gives to files
  expName=${ID}_${Index}_L${Lane}
  
  # Check how many chunks we have, this will be the number of SGE array tasks

  ntask=`ls ${ID}_${Index}_L${Lane}_R1_*.fastq.gz | wc -l`

  # Also check if this is paired end sequencing. This changes the bwa line
  isPE=0
  if [ -f ${ID}_${Index}_L${Lane}_R2_001.fastq.gz ]
  then
    isPE=1
  fi  
  
  # Ok, what if we don't have the genome?
  [[ -d ${INDEXPATH}/${Genome}/${aligner} ]] && Skip=0
  if [ $Skip -eq 1 ]
  then
    echo "Missing ${Genome}, you should correct the file by hand (align_${expName}) and qsub it"
    GFile="FIXME"
  else
    # If there's the genome, look for the proper index (assuming there's one index per directory ^__^ )
    GFile=`ls ${INDEXPATH}/${Genome}/${aligner}/*.${ext} | head -n1`
    if [ $aligner == 'bowtie2' ]
    then
      GFile=${GFile%%.1.${ext}}
    elif [ $aligner == 'tophat' ]
    then
      GFile=${GFile%%.1.${ext}}
      AFile=`dirname $GFile`
      AFile=${AFile}/../gtf/genes.gtf
    elif [ $aligner == 'bwa' ]
    then
      GFile=${GFile%%.bwt}
    fi
    echo "Using ${GFile}"
  fi  
done < <(tail -n $(( ${nlines[0]} - 1 )) SampleSheet.csv)

# generate Makefile

unset IFS

# set the list of BAM files
bamTargets=''
for x in `seq ${ntask}`
do
  chunk=$(printf "%03d" ${x})
  if [ $aligner == 'tophat' ]
  then
    bamTargets="${bamTargets} ${expName}_${chunk}/accepted_hits.bam"
  else
    bamTargets="${bamTargets} ${expName}_${chunk}.bam"
  fi
done  

cat > Makefile << ENDOFMAKE
SHELL = /bin/bash
BWA = ${BWA}
SAMTOOLS = ${SAMTOOLS}
NOVO = ${NOVO}
BOWTIE2 = ${BOWTIE2}
TOPHAT = ${TOPHAT}
NOVOOPT = -o SAM -F ILM1.8 -r Exhaustive 20 -t 20
BOW_OPT = --local 
BWAOPT_ALN =
TOPHAT_OPT = --bowtie1 --library-type fr-unstranded -G ${AFile} -r -40 
REF = ${GFile}
BWAOPT_PE = -r '@RG\tID:${ID}_${Index}\tPL:illumina\tPU:${FCID}\tLB:${ID}\tSM:${ID}\tCN:CTGB'
BWAOPT_SE =


all: ${ID}_${Index}.bam.bai
	/bin/mv ${ID}_${Index}.bam ${ID}_${Index}.bam.lock
	/bin/rm -f *.bam
	/bin/mv ${ID}_${Index}.bam.lock ${ID}_${Index}.bam
	@echo "Time to give some meaning to these files"

${ID}_${Index}.bam.bai: ${bamTargets}
	\$(SAMTOOLS) merge ${expName}.bam ${bamTargets}

${expName}.bam.bai: ${expName}.bam
	\$(SAMTOOLS) index ${expName}.bam

ENDOFMAKE

for x in `seq ${ntask}`
do
  chunk=`printf "%03d" ${x}`
  if [ $isPE == 1 ]
  then
    if [ $aligner == 'bwa' ]
         then
           alnstr="\$(BWA) sampe \$(BWAOPT_PE) \$(REF) <(\$(BWA) aln \$(BWAOPT_ALN) \$(REF) ${expName}_R1_${chunk}.fastq.gz) <(\$(BWA) aln \$(BWAOPT_ALN) \$(REF) ${expName}_R2_${chunk}.fastq.gz) ${expName}_R1_${chunk}.fastq.gz ${expName}_R2_${chunk}.fastq.gz | \$(SAMTOOLS) view -Su - | \$(SAMTOOLS) sort - ${expName}_${chunk}"
         elif [ $aligner == 'tophat' ]
         then
           alnstr="\$(TOPHAT) -o ${expName}_${chunk}  \$(TOPHAT_OPT)  \$(REF)  ${expName}_R1_${chunk}.fastq.gz ${expName}_R2_${chunk}.fastq.gz"
         elif [ $aligner == 'bowtie2' ]
         then
           alnstr="\$(BOWTIE2) \$(BOW_OPT) -x \$(REF) -1 ${expName}_R1_${chunk}.fastq.gz -2 ${expName}_R2_${chunk}.fastq.gz | \$(SAMTOOLS) view -Su - | \$(SAMTOOLS) sort - ${expName}_${chunk}"
         else
           alnstr="\$(NOVO) -d \$(REF) -f <(zcat ${expName}_R1_${chunk}.fastq.gz) <(zcat ${expName}_R2_${chunk}.fastq.gz) \$(NOVOOPT) | \$(SAMTOOLS) view -Su - | \$(SAMTOOLS) sort - ${expName}_${chunk} "
         fi
  if [ $aligner == 'tophat' ]
  then
    cat >> Makefile << ENDOFMAKE1
${expName}_${chunk}/accepted_hits.bam: ${expName}_R1_${chunk}.fastq.gz ${expName}_R2_${chunk}.fastq.gz
	$alnstr 
        

ENDOFMAKE1
  else
    cat >> Makefile << ENDOFMAKE1
${expName}_${chunk}.bam: ${expName}_R1_${chunk}.fastq.gz ${expName}_R2_${chunk}.fastq.gz
	$alnstr 
	
ENDOFMAKE1
  fi
  

  else
    cat >> Makefile << ENDOFMAKE2
${expName}_${chunk}.bam: ${expName}_R1_${chunk}.fastq.gz
	\$(BWA) samse \$(BWAOPT_SE) \$(REF) <(\$(BWA) aln \$(BWAOPT_ALN) \$(REF) ${expName}_R1_${chunk}.fastq.gz) ${expName}_R1_${chunk}.fastq.gz | \$(SAMTOOLS) view -Su - | \$(SAMTOOLS) sort - ${expName}_${chunk}
	
ENDOFMAKE2

  fi
done