Commits

Davide Cittaro committed 1230aa5

various things

Comments (0)

Files changed (4)

 #PBS -P ${experiment_name}
 #PBS -m a
 
+TMP_SCRATCH=/dev/shm/\${RANDOM}/${experiment_name}
+mkdir -p \$TMP_SCRATCH
 cd $PWD
 
-/usr/bin/time $BWA sampe $BWAOPT_PE -r "@RG\tID:$experiment_name"_"$lane\tPL:illumina\tPU:$fcid\tLB:$experiment_name\tSM:$experiment_name\tCN:CTGB" $ref_genome $LOCAL_SCRATCH/tmp.$R1_final $LOCAL_SCRATCH/tmp.$R2_final  $R1 $R2 >$LOCAL_SCRATCH/tmp.sampe.$R_final
-$SAMTOOLS view -Su $LOCAL_SCRATCH/tmp.sampe.$R_final | $SAMTOOLS sort - $R_final
+/usr/bin/time $BWA sampe $BWAOPT_PE -r "@RG\tID:$experiment_name"_"$lane\tPL:illumina\tPU:$fcid\tLB:$experiment_name\tSM:$experiment_name\tCN:CTGB" $ref_genome $LOCAL_SCRATCH/tmp.$R1_final $LOCAL_SCRATCH/tmp.$R2_final  $R1 $R2 >\$TMP_SCRATCH/tmp.sampe.$R_final
+cd \$TMP_SCRATCH
+$SAMTOOLS view -Su \$TMP_SCRATCH/tmp.sampe.$R_final | $SAMTOOLS sort - $R_final
+mv ${R_final}.bam ${LOCAL_SCRATCH}
+rm -fr \`dirname \${TMP_SCRATCH}\`
 __EOF__
 
+
 	if $run
 	then
 		id_final="$(qsub $SCRIPT)"
 	fi
 
 	test "x$id_final" != "x" && JOBLIST=$JOBLIST:$id_final
-        BAMFILES=$BAMFILES" "I=$R_final".bam"
+        BAMFILES=$BAMFILES" "I=${LOCAL_SCRATCH}/$R_final".bam"
 
 done
 

bwa_submit_single_file.sh

 #!/bin/bash
 # simple script to compute alll bwa alignment in a dir.
 
+function get_help {
+cat << _EOF
+Usage:
+
+bwa_submit_single_file.sh [options] -1 file_R1 [-2 file_R2] [-r]
+	-n name		name of the experiment
+	-1 file		file of read1
+	-2 file		file of read2
+	-g ref		name of the reference genome
+	-I id		Sample ID
+	-P name		Platform [illumina]
+	-U id		Platform unit (flowcell ID)
+	-L id		Library
+	-S name		Sample name
+	-C name		Center [CTGB]
+	-r		run the script
+_EOF
+
+exit 1 
+}
+
 pairedEnd=false
 run=false
 
 MSD=true
 ASSUME_SORTED=true
 
-while getopts "n:r1:2:g:I:P:U:L:S:C:" opt
+[[ ${#*} == 0 ]] && get_help
+while getopts "hn:r1:2:g:I:P:U:L:S:C:" opt
 do
   case $opt in
+   h)
+     get_help
+     ;;
    n)
      name=$OPTARG
      ;;
      run=true
      ;;  
    *)
-     echo "Not implemented"
-     exit 1
+     get_help
      ;;  
   esac
 done
 shift $(($OPTIND - 1))
 
-
 #fcid=`tail -1 SampleSheet.csv | cut -d',' -f1 `
 #reference=`tail -1 SampleSheet.csv | cut -d',' -f4`
 #name=`tail -1 SampleSheet.csv | cut -d',' -f3`
 ref_genome=/lustre1/genomes/$reference/bwa/$reference
 experiment_name=$name
 
-WDIR=`basename $FR1`
+WDIR=`dirname $FR1`
 LOCAL_SCRATCH=/lustre2/scratch/${RANDOM}/${experiment_name}
+
+cat << EOF
+Read1: $FR1
+Read2: $FR2
+paired: $pairedEnd
+reference: $ref_genome
+name: $name
+ID: $ID
+PL: $PL
+PU: $PU
+LB: $LB
+SM: $SM
+CN: $CN
+run: $run
+working dir: $WDIR
+scratch: $LOCAL_SCRATCH
+
+EOF
+
+
 mkdir -p $LOCAL_SCRATCH
 cd $LOCAL_SCRATCH
 
 # split FR1
 if [ $FR1 == ${FR1%%.gz} ]
 then
-  split -l 4000000 -d -a 4 $FR1 read1_
+  split -l 8000000 -d -a 4 $FR1 read1_
 else
-  gzcat $FR1 | split -l -d -a 4 - read1_
+  zcat $FR1 | split -l 8000000 -d -a 4 - read1_
 fi
 
-if [[ $pairedEnd ]]
+if [[ $pairedEnd == 'true' ]]
 then
   if [ $FR2 == ${FR2%%.gz} ]
   then
-    split -l 4000000 -d -a 4 $FR2 read2_
+    split -l 8000000 -d -a 4 $FR2 read2_
   else
-    gzcat $FR2 | split -l -d -a 4 - read2_
+    zcat $FR2 | split -l 8000000 -d -a 4 - read2_
   fi
 fi    
 
 #PBS -N ${job_nameR1:0:15}
 #PBS -M cittaro.davide@hsr.it
 #PBS -P ${experiment_name}
+#PBS -o ${WDIR}/${job_nameR1}.log
+#PBS -e ${WDIR}/${job_nameR1}.err
 #PBS -m a
 
 cd $LOCAL_SCRATCH
 	fi
 
 #second job
-if [[ $pairedEnd ]]
+if [[ $pairedEnd == 'true' ]]
 then
 
 	# write the second job script
 #PBS -N ${job_nameR2:0:15}
 #PBS -M cittaro.davide@hsr.it
 #PBS -P ${experiment_name}
+#PBS -o ${WDIR}/${job_nameR2}.log
+#PBS -e ${WDIR}/${job_nameR2}.err
 #PBS -m a
 
 cd $PWD
 #PBS -N ${job_nameR:0:15}
 #PBS -M cittaro.davide@hsr.it
 #PBS -P ${experiment_name}
+#PBS -o ${WDIR}/${job_nameR}.log
+#PBS -e ${WDIR}/${job_nameR}.err
 #PBS -m a
 
 cd $PWD
-
-/usr/bin/time $BWA sampe $BWAOPT_PE -r "@RG\tID:${ID}\tPL:${PL}\tPU:${PU}\tLB:${LB}\tSM:${SM}\tCN:${CN}" $ref_genome $LOCAL_SCRATCH/${R1}.sai $LOCAL_SCRATCH/${R2}.sai  $R1 $R2 >$LOCAL_SCRATCH/${R_final}.sam
-$SAMTOOLS view -Su $LOCAL_SCRATCH/${R_final}.sam | $SAMTOOLS sort - $R_final
+TMP_SCRATCH=/dev/shm/\${RANDOM}/${experiment_name}
+mkdir -p \$TMP_SCRATCH
+/usr/bin/time $BWA sampe $BWAOPT_PE -r "@RG\tID:${ID}\tPL:${PL}\tPU:${PU}\tLB:${LB}\tSM:${SM}\tCN:${CN}" $ref_genome $LOCAL_SCRATCH/${R1}.sai $LOCAL_SCRATCH/${R2}.sai  $R1 $R2 >\$TMP_SCRATCH/${R_final}.sam
+rm $LOCAL_SCRATCH/${R1}.sai $LOCAL_SCRATCH/${R2}.sai
+cd \$TMP_SCRATCH
+$SAMTOOLS view -Su \$TMP_SCRATCH/${R_final}.sam | $SAMTOOLS sort - $R_final
+mv ${R_final}.bam ${LOCAL_SCRATCH}
+rm -fr \`dirname \${TMP_SCRATCH}\`
 __EOF__
 
 	if $run
 #PBS -N ${job_nameR:0:15}
 #PBS -M cittaro.davide@hsr.it
 #PBS -P ${experiment_name}
+#PBS -o ${WDIR}/${job_nameR}.log
+#PBS -e ${WDIR}/${job_nameR}.err
 #PBS -m a
 
 cd $PWD
 
-/usr/bin/time $BWA samse $BWAOPT_SE -r "@RG\tID:${ID}\tPL:${PL}\tPU:${PU}\tLB:${LB}\tSM:${SM}\tCN:${CN}" $ref_genome $LOCAL_SCRATCH/${R1}.sai $R1 >$LOCAL_SCRATCH/${R_final}.sam
-$SAMTOOLS view -Su $LOCAL_SCRATCH/${R_final}.sam | $SAMTOOLS sort - $R_final
+/usr/bin/time $BWA samse $BWAOPT_SE -r "@RG\tID:${ID}\tPL:${PL}\tPU:${PU}\tLB:${LB}\tSM:${SM}\tCN:${CN}" $ref_genome $LOCAL_SCRATCH/${R1}.sai $R1 >\$TMP_SCRATCH/${R_final}.sam
+rm $LOCAL_SCRATCH/${R1}.sai 
+TMP_SCRATCH=/dev/shm/\${RANDOM}/${experiment_name}
+mkdir -p \$TMP_SCRATCH
+cd \$TMP_SCRATCH
+$SAMTOOLS view -Su \$TMP_SCRATCH/${R_final}.sam | $SAMTOOLS sort - $R_final
+mv ${R_final}.bam ${LOCAL_SCRATCH}
+rm \$TMP_SCRATCH/${R_final}.sam 
+rm -fr \`dirname \${TMP_SCRATCH}\`
 __EOF__
 
 	if $run
 BAMFILES=${BAMFILES#" "}
 
 job_nameC="combin"$experiment_name 
-SCRIPT=job.combine.$R_final
+SCRIPT=job.combine.$experiment_name
 
 cat <<__EOF__> $SCRIPT
 #PBS -l select=1:ncpus=8:mem=48g
 #PBS -N ${job_nameC:0:15}
 #PBS -M cittaro.davide@hsr.it
 #PBS -P ${experiment_name}
+#PBS -o ${WDIR}/${job_nameC}.log
+#PBS -e ${WDIR}/${job_nameC}.err
 #PBS -m a
 #PBS -m e
 
   vcf_out = vcf.Writer(sys.stdout, vcf_file, lineterminator='\n')
   # we now traverse the tree. As soon as we get into an overlapping feature
   # we update the record entry and the INFO in the header
+  last_snp = ""
   for record in vcf_file:
     chrom = record.CHROM
+    this_snp = "%s:%d-%s-%s-%s" % (record.CHROM, record.POS, record.ID, record.REF, record.ALT)
     if not gene_props[chrom]:
       # no need to process, just write
       vcf_out.write_record(record)
               # this happens when we only have one gene, no columns in the annotation file
               record.INFO[prefix] = True
             added.add(gene)
-          vcf_out.write_record(record)     
+          if this_snp != last_snp:
+            vcf_out.write_record(record)     
+          last_snp = this_snp
 
 def mark_main():
 

soapsplice_submit_lane.sh

 
 cd $PWD
 
+TMP_SCRATCH=/dev/shm/\${RANDOM}/${experiment_name}
+mkdir -p \$TMP_SCRATCH
+
 #create header
-awk '{OFS="\t"; print "@SQ","SN:"\$1,"LN:"\$2}' $faidx > ${LOCAL_SCRATCH}/header.$R_final
-echo -e "@RG\tID:$experiment_name"_"$lane\tPL:illumina\tPU:$fcid\tLB:$experiment_name\tSM:$experiment_name\tCN:CTGB" >> ${LOCAL_SCRATCH}/header.$R_final
+awk '{OFS="\t"; print "@SQ","SN:"\$1,"LN:"\$2}' $faidx > \${TMP_SCRATCH}/header.$R_final
+echo -e "@RG\tID:$experiment_name"_"$lane\tPL:illumina\tPU:$fcid\tLB:$experiment_name\tSM:$experiment_name\tCN:CTGB" >> \${TMP_SCRATCH}/header.$R_final
 sversion=( \`$SSPLICE | head -n1\` )
-echo -e "@PG\tID:soapsplice\tPN:soapsplice\tVN:\${sversion[2]}" >> ${LOCAL_SCRATCH}/header.$R_final
+echo -e "@PG\tID:soapsplice\tPN:soapsplice\tVN:\${sversion[2]}" >> \${TMP_SCRATCH}/header.$R_final
 
-$SSPLICE -d $ref_genome -1 $R1 -2 $R2 -o ${LOCAL_SCRATCH}/${R_final} -p 4 ${SSPLICEOPT_ALN}
+$SSPLICE -d $ref_genome -1 $R1 -2 $R2 -o \${TMP_SCRATCH}/${R_final} -p 4 ${SSPLICEOPT_ALN}
+
+cd \$TMP_SCRATCH
+cat \${TMP_SCRATCH}/header.$R_final \$TMP_SCRATCH/${R_final}.sam | $SAMTOOLS view -Su - | $SAMTOOLS sort - $R_final
+mv ${R_final}.bam ${LOCAL_SCRATCH}
+mv \$TMP_SCRATCH/*.junc ${LOCAL_SCRATCH}
+rm -fr \`dirname \${TMP_SCRATCH}\`
+
 __EOF__
 
 	if $run
 
 	# create the third job: runs only if previous two successful.
 
-	SCRIPT=job.final.$R_final
-	cat <<__EOF__> $SCRIPT
-#PBS -l select=1:ncpus=2:mem=24g
-#PBS -W depend=afterok:$job_id1
-#PBS -N s_${R_final:0:13}
-#PBS -M cittaro.davide@hsr.it
-#PBS -P ${experiment_name}
-#PBS -m a
-
-cd $PWD
-
-cat ${LOCAL_SCRATCH}/header.$R_final ${LOCAL_SCRATCH}/${R_final}.sam | $SAMTOOLS view -Su - | $SAMTOOLS sort - $R_final
-__EOF__
-
-	if $run
-	then
-		id_final="$(qsub $SCRIPT)"
-		test "x$id_final" = "x" && { echo >&2 "*** error while submitting $SCRIPT" ; exit 1 ; }
-		qstat   $id_final || { echo >&2 "*** couldn't check for job $id_final (final)" ; exit 1 ; }
-	fi
-
 	test "x$id_final" != "x" && JOBLIST=$JOBLIST:$id_final
         BAMFILES=$BAMFILES" "I=$R_final".bam"