Source

utils / bwa_submit_single_file.sh

#!/bin/bash
# simple script to compute alll bwa alignment in a dir.

pairedEnd=false
run=false


name="run_name"
ID="sample_id"
PL="illumina"
PU="sample_fcid"
LB="sample_library"
SM="sample_name"
CN="CTGB"
reference="hg19"

JOBLIST=
BAMFILES=

BWA=/usr/local/cluster/bin/bwa
SAMTOOLS=/usr/local/cluster/bin/samtools
BWAOPT_ALN=
BWAOPT_PE=
BWAOPT_SE=
PICMERGE=/usr/local/cluster/bin/MergeSamFiles.jar
PICOPTS=
VALIDATION_STRINGENCY=SILENT
CREATE_INDEX=true
MSD=true
ASSUME_SORTED=true

while getopts "n:r1:2:g:I:P:U:L:S:C:" opt
do
  case $opt in
   n)
     name=$OPTARG
     ;;
   1) 
     FR1=$(cd $(dirname $OPTARG); pwd)/$(basename $OPTARG) 
     ;;
   2) 
     FR2=$(cd $(dirname $OPTARG); pwd)/$(basename $OPTARG) 
     pairedEnd=true
     ;;
   g)
     reference=$OPTARG
     ;;
   I)
     ID=$OPTARG
     ;;
   P)
     PL=$OPTARG
     ;;
   U)
     PU=$OPTARG
     ;;
   L)
     LB=$OPTARG
     ;;
   S)
     SM=$OPTARG
     ;;
   C)
     CN=$OPTARG
     ;;
   r)
     run=true
     ;;  
   *)
     echo "Not implemented"
     exit 1
     ;;  
  esac
done
shift $(($OPTIND - 1))


#fcid=`tail -1 SampleSheet.csv | cut -d',' -f1 `
#reference=`tail -1 SampleSheet.csv | cut -d',' -f4`
#name=`tail -1 SampleSheet.csv | cut -d',' -f3`
#rindex=`tail -1 SampleSheet.csv | cut -d',' -f5`
#[[ "$rindex" == "" ]] && rindex=NoIndex

ref_genome=/lustre1/genomes/$reference/bwa/$reference
experiment_name=$name

WDIR=`basename $FR1`
LOCAL_SCRATCH=/lustre2/scratch/${RANDOM}/${experiment_name}
mkdir -p $LOCAL_SCRATCH
cd $LOCAL_SCRATCH

# split FR1
if [ $FR1 == ${FR1%%.gz} ]
then
  split -l 4000000 -d -a 4 $FR1 read1_
else
  gzcat $FR1 | split -l -d -a 4 - read1_
fi

if [[ $pairedEnd ]]
then
  if [ $FR2 == ${FR2%%.gz} ]
  then
    split -l 4000000 -d -a 4 $FR2 read2_
  else
    gzcat $FR2 | split -l -d -a 4 - read2_
  fi
fi    


#



for file in read1_*
do
	R1=$file
	R2=${file/read1_/read2_}
	chunk=${file##read1_}
	R_final=$experiment_name"_"$chunk

        job_nameR1="a1_${chunk}"
        job_nameR2="a2_${chunk}"
        job_nameR="s_"${chunk} 

	# write the first job script
	SCRIPT=job.r1.$chunk
	cat <<__EOF__> $SCRIPT
#PBS -l select=1:ncpus=2:mem=16g:app=java
#PBS -N ${job_nameR1:0:15}
#PBS -M cittaro.davide@hsr.it
#PBS -P ${experiment_name}
#PBS -m a

cd $LOCAL_SCRATCH

/usr/bin/time $BWA aln -t 2 $BWAOPT_ALN $ref_genome $R1 > $LOCAL_SCRATCH/${R1}.sai
__EOF__

	if $run
	then
		job_id1="$(qsub $SCRIPT)"
		test "x$job_id1" = "x" && { echo >&2 "*** error while submitting job $SCRIPT" ; exit 1 ; }
		qstat  $job_id1 || { echo >&2 "*** couldn't check for job $job_id1 (R1)" ; exit 1 ; }
	fi

#second job
if [[ $pairedEnd ]]
then

	# write the second job script

	SCRIPT=job.r2.$chunk
	cat <<__EOF__> $SCRIPT
#PBS -l select=1:ncpus=2:mem=16g:app=java
#PBS -N ${job_nameR2:0:15}
#PBS -M cittaro.davide@hsr.it
#PBS -P ${experiment_name}
#PBS -m a

cd $PWD

/usr/bin/time $BWA aln -t 2 $BWAOPT_ALN $ref_genome $R2 >$LOCAL_SCRATCH/${R2}.sai
__EOF__

	if $run
	then
		job_id2="$(qsub $SCRIPT)"
		test "x$job_id2" = "x" && { echo >&2 "*** error while submitting job $SCRIPT" ; exit 1 ; }
		qstat   $job_id2 || { echo >&2 "*** couldn't check for job $job_id2 (R2)" ; exit 1 ; }
	fi

	#------------------------------------------------------------------------#

	# create the third job: runs only if previous two successful.

	SCRIPT=job.final.$R_final
	cat <<__EOF__> $SCRIPT
#PBS -l select=1:ncpus=2:mem=24g:app=java
#PBS -W depend=afterok:$job_id1:$job_id2
#PBS -N ${job_nameR:0:15}
#PBS -M cittaro.davide@hsr.it
#PBS -P ${experiment_name}
#PBS -m a

cd $PWD

/usr/bin/time $BWA sampe $BWAOPT_PE -r "@RG\tID:${ID}\tPL:${PL}\tPU:${PU}\tLB:${LB}\tSM:${SM}\tCN:${CN}" $ref_genome $LOCAL_SCRATCH/${R1}.sai $LOCAL_SCRATCH/${R2}.sai  $R1 $R2 >$LOCAL_SCRATCH/${R_final}.sam
$SAMTOOLS view -Su $LOCAL_SCRATCH/${R_final}.sam | $SAMTOOLS sort - $R_final
__EOF__

	if $run
	then
		id_final="$(qsub $SCRIPT)"
		test "x$id_final" = "x" && { echo >&2 "*** error while submitting $SCRIPT" ; exit 1 ; }
		qstat   $id_final || { echo >&2 "*** couldn't check for job $id_final (final)" ; exit 1 ; }
	fi

	test "x$id_final" != "x" && JOBLIST=$JOBLIST:$id_final
        BAMFILES=$BAMFILES" "I=$R_final".bam"


else # is not paired

	SCRIPT=job.final.$R_final
	cat <<__EOF__> $SCRIPT
#PBS -l select=1:ncpus=2:mem=24g:app=java
#PBS -W depend=afterok:$job_id1
#PBS -N ${job_nameR:0:15}
#PBS -M cittaro.davide@hsr.it
#PBS -P ${experiment_name}
#PBS -m a

cd $PWD

/usr/bin/time $BWA samse $BWAOPT_SE -r "@RG\tID:${ID}\tPL:${PL}\tPU:${PU}\tLB:${LB}\tSM:${SM}\tCN:${CN}" $ref_genome $LOCAL_SCRATCH/${R1}.sai $R1 >$LOCAL_SCRATCH/${R_final}.sam
$SAMTOOLS view -Su $LOCAL_SCRATCH/${R_final}.sam | $SAMTOOLS sort - $R_final
__EOF__

	if $run
	then
		id_final="$(qsub $SCRIPT)"
		test "x$id_final" = "x" && { echo >&2 "*** error while submitting $SCRIPT" ; exit 1 ; }
		qstat   $id_final || { echo >&2 "*** couldn't check for job $id_final (final)" ; exit 1 ; }
	fi

	test "x$id_final" != "x" && JOBLIST=$JOBLIST:$id_final
        BAMFILES=$BAMFILES" "I=$R_final".bam"

fi #end of paired end test
done # end of files
#------------------------------------------------------------------------#

# 


JOBLIST=${JOBLIST#:}
BAMFILES=${BAMFILES#" "}

job_nameC="combin"$experiment_name 
SCRIPT=job.combine.$R_final

cat <<__EOF__> $SCRIPT
#PBS -l select=1:ncpus=8:mem=48g
#PBS -W depend=afterok:$JOBLIST
#PBS -N ${job_nameC:0:15}
#PBS -M cittaro.davide@hsr.it
#PBS -P ${experiment_name}
#PBS -m a
#PBS -m e

cd $PWD

java -jar $PICMERGE $BAMFILES \
        O=${experiment_name}.bam \
        CREATE_INDEX=true \
        MSD=true \
        VALIDATION_STRINGENCY=SILENT \
        ASSUME_SORTED=true \
        USE_THREADING=true

/usr/local/cluster/bin/samtools flagstat $experiment_name.bam
/bin/rm -f $LOCAL_SCRATCH/${R1}.sai
[[ -f $LOCAL_SCRATCH/${R2}.sai ]] && /bin/rm -f $LOCAL_SCRATCH/${R2}.sai
/bin/rm -f $LOCAL_SCRATCH/${R_final}.sam

/bin/mv $experiment_name.bam $experiment_name.bam.lock
/bin/mv $experiment_name.bai $experiment_name.bai.lock

/bin/rm -f *.bam *.bai

/bin/mv $experiment_name.bam.lock ${WDIR}/$experiment_name.bam 
/bin/mv $experiment_name.bai.lock ${WDIR}/$experiment_name.bai

rm -fr $LOCAL_SCRATCH
rm -fr `dirname ${LOCAL_SCRATCH}`
__EOF__

if $run
then
	id_combine="$(qsub $SCRIPT)"
	test "x$id_combine" = "x" && { echo >&2 "*** error while submitting $SCRIPT" ; exit 1 ; }
	qstat  $id_combine || { echo >&2 "*** couldn't check for job $id_combine (combine)" ; exit 1 ; }
fi

#------------------------------------------------------------------------#

exit