Commits

Ross Lazarus committed b5eab7e

Max fraction in one sample working now. needs more checking

Comments (0)

Files changed (2)

tools/rgenetics/rgTableRanks.py

         med = 0
         sigma = 0
         if self.nfiles > 0:
-            ranks = self.fileMaxfrac.values()
+            ranks = self.fileMaxfracs.values()
             ranks.sort() # for median
             mu = self.mean(ranks)
             med = self.median(ranks)
         self.ntoreport = int(opts.ntoreport)
         self.doMaxfrac = opts.startNorm # none if not set
         self.normColumns = []
+        self.startNorm = self.endNorm = None
         if self.doMaxfrac:
-			assert opts.startNorm > 0 and opts.endNorm > 0, 'Start of normal counts and end of normal counts must both be greater than zero - given %d:%d' % (opts.startNorm,opts.endNorm)
-			assert opts.startNorm < opts.endNorm,'Start of normal counts column must be less than end normal counts column'
-			self.normColumns = range(opts.startNorm-1, opts.endNorm) 
+            self.startNorm = int(opts.startNorm)
+            self.endNorm = int(opts.endNorm)
+            assert self.startNorm > 0 and self.endNorm > 0, 'Start of normal counts and end of normal counts must both be greater than zero - given %d:%d' % (opts.startNorm,opts.endNorm)
+            assert self.startNorm < self.endNorm,'Start of normal counts column must be less than end normal counts column'
+            self.normColumns = range(self.startNorm-1, self.endNorm) 
         if len(self.flist) > 0:
             self.readAll() # autorun
  
             self.termdict[t] = concordRank(t) # create
         self.termdict[t].addRank(f,frank)
         if maxFrac:
-			self.termdict[t].addMaxfrac(f,maxFrac)
+           self.termdict[t].addMaxfrac(f,maxFrac)
 
     def readOne(self,fnum,f):
         """ read a file and create/bump all concord term records to record term hits
         d = [x.strip().split(self.delim) for x in d]
         terms = [(x[col],i+1) for i,x in enumerate(d)] # assume sorted already
         if self.doMaxfrac:
-			normcounts = [[x[i] for i in self.normColumns] for x in d]
-			normSums = [sum(x) for x in normcounts]
-			normFracs = [[x/normSums[j] for x in y] for j,y in enumerate(normSums)]
-			maxF = [max(x) for x in normFracs]
-		else:
-			maxF = [None for x in terms]
+            normCounts = [[float(x[i]) for i in self.normColumns] for x in d]
+	    normSums = [sum(x) for x in normCounts]
+	    normFracs = [[x/normSums[j] for x in y] for j,y in enumerate(normCounts)]
+	    maxF = [max(x) for x in normFracs]
+	else:
+	    maxF = [None for x in terms]
         for i,(t,frank) in enumerate(terms):
             self.addTerm(t,f,frank,maxF[i])
             
         for t in self.termdict.values():
             (mu,med,sigma) = t.getStats()
             if self.doMaxfrac:
-				(fmu,fmed,fsigma) = t.getMaxfracStats()
-				newres = [med,(t.term,str(med),str(sigma),str(t.nfiles),fmu,fsigma)])
-			else:
-				newres = [med,(t.term,str(med),str(sigma),str(t.nfiles))])
+                (fmu,fmed,fsigma) = t.getMaxfracStats()
+              	newres = [med,(t.term,str(med),str(sigma),str(t.nfiles),str(fmu),str(fsigma))]
+            else:
+                newres = [med,(t.term,str(med),str(sigma),str(t.nfiles))]
             res.append(newres)
         res.sort() # decorated in rank order
         res = ['\t'.join(x[1]) for x in res] # undecorate and return only the results for printing
         if self.doMaxfrac:
-			res.insert(0,'Term\tMedianRank\tSDRank\tNfiles\tMeanMaxfrac\tSDMaxfrac')
-		else:
-			res.insert(0,'Term\tMedianRank\tSDRank\tNfiles')
+           res.insert(0,'Term\tMedianRank\tSDRank\tNfiles\tMeanMaxfrac\tSDMaxfrac')
+        else:
+           res.insert(0,'Term\tMedianRank\tSDRank\tNfiles')
         if self.ntoreport == 0:
             return res # not truncated
         else:

tools/rgenetics/rgTableRanks.xml

   <description>for multiple tabular files</description>
   <command interpreter="python">rgTableRanks.py --colnum "$colnum" --ntoreport "$ntoreport" --output_tab "$output_tab"
    --has_header "$has_header"
+  #if $doNorm.normCols == "yes":
+  --startNorm "$doNorm.startNorm" --endNorm "$doNorm.endNorm"
+  #end if
   #for $fname in $inputs:
      --input_list "${fname.inputf}"
   #end for 
-  #if doNorm.normCols == "yes":
-  --startNorm "$doNorm.startNorm" --endNorm "$doNorm.endNorm"
-  #end if
   </command>
   <inputs>
     <repeat name="inputs" title="Input files to rank - must all have the identifier (eg gene name) in the same column" >
-        <param name="inputf" type="data" format="tabular" label="Another input tabular file from your history" 
-   size="100"/>
+        <param name="inputf" type="data" format="tabular" label="Another input tabular file from your history"  size="100"/>
     </repeat>   
     <param name="colnum" type="integer" value="1" size="5" label="Column (1=first) containing the identifier to be ranked across all input files"/>
     <conditional name="doNorm">
-	   <param name="normCols" type="select" label="Evaluate normalised counts (report maximum fraction of each contig's reads in any sample - large fractions suggest poor evidence)" 
-         help="Use this option if this is an edgeR or DESeq output file where normalised contig counts are available for each sample">
-        <option value="yes">Yes</option>
-        <option value="" selected="true">No</option>
-    </param> 
-    <when value="yes">
+	 <param name="normCols" type="select" label="Evaluate normalised counts (report maximum fraction of each contig's reads in any sample - large fractions suggest poor evidence)" 
+          help="Use this option if this is an edgeR or DESeq output file where normalised contig counts are available for each sample">
+            <option value="yes">Yes</option>
+            <option value="" selected="true">No</option>
+        </param> 
+        <when value="yes">
 		<param name="startNorm" type="integer" value="0" size="5" label="Column (1=first) containing the first normalized count to be analysed"/>
 		<param name="endNorm" type="integer" value="0" size="5" label="Column (1=first) containing the last normalized count to be analysed"/>
-    </when>	
+        </when>	
     </conditional>
     <param name="has_header" type="boolean" display="checkbox" checked="true" truevalue="True" falsevalue="False" label="Files all have a header row that should be ignored" />
     <param name="ntoreport" type="integer" value="100" size="5" label="Total number of top ranked ids to report (0=all)"/>