1. Davide Cittaro
  2. trumpet

Commits

dawe  committed 32e02ac

auto threshold available, much like h-index

  • Participants
  • Parent commits 2591bae
  • Branches default

Comments (0)

Files changed (1)

File scripts/bam2xgmml.py

View file
   option_parser.add_argument("-q","--mapquality", help="Mapping quality filter", type=int, default=0)
   option_parser.add_argument("-p","--pairs", help="Min. number of pairs required to call an edge", type=int, default=10)
   option_parser.add_argument("-G","--graphname", help="Graph name (will appear in cytoscape)", default="Graph from BAM")
+  option_parser.add_argument("-D","--dump", help="Dump network data in a NPZ opject with given name", default=None)
+  option_parser.add_argument("-a","--autothreshold", help="Sets threshold to N so that there are at least N interaction with N supporting pairs", action="store_true", default=False)
   options = option_parser.parse_args()
 
   interval_tree = {} # to store genomic intervals
   # get non-zero elements of the sparse matrix. Use only lower triangular matrix
   (node1, node2, weight) = scipy.sparse.find(scipy.sparse.tril(ad_matrix))
   
-  mask = weight > options.pairs
+  if options.dump:
+    np.savez(options.dump, node1, node2, weight)
+  
+  threshold = 1
+  if options.autothreshold:
+    n_int = len(weight[weight >= threshold])
+    while n_int > threshold:
+      threshold += 1
+      n_int = len(weight[weight >= threshold])
+      if n_int == 0:
+        sys.stderr.write("Cannot determine automatich threshold, switching to %d\n" % options.pairs)
+        threshold = options.pairs
+        break
+  else:
+    threshold = options.pairs
+  sys.stderr.write("Threshold set to %s\n" % threshold)
+  mask = weight >= threshold
   visited_nodes = np.unique(np.append(node1[mask], node2[mask]))
   
   # print network. It would be much elegant with xml modules, but I don't have the docs right now, so I will print lines...
 
   options.outfile.write('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n')
-  options.outfile.write('<graph label="%s"\n' % options.graphname)
-  options.outfile.write("""xmlns:dc="http://purl.org/dc/elements/1.1/" 
-      xmlns:xlink="http://www.w3.org/1999/xlink" 
-      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
-      xmlns:cy="http://www.cytoscape.org" 
-      xmlns="http://www.cs.rpi.edu/XGMML"  
-      directed="0">
+  options.outfile.write('<graph label="%s"' % options.graphname)
+  options.outfile.write("""
+  xmlns:dc="http://purl.org/dc/elements/1.1/" 
+  xmlns:xlink="http://www.w3.org/1999/xlink" 
+  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
+  xmlns:cy="http://www.cytoscape.org" 
+  xmlns="http://www.cs.rpi.edu/XGMML"  
+  directed="0">
   """)
 
   for id1 in visited_nodes: