David McClosky  committed 680afc3

second-stage/programs/features/combine-feature-counts: Add option for setting threshold, sort features so NLogP is first

  • Participants
  • Parent commits 57cf9e2
  • Branches default

Comments (0)

Files changed (1)

File second-stage/programs/features/combine-feature-counts

 and renumbering the features.  This can be used on the output of
 extract-spfeatures-counts.  It should not need to be used on the output
 of extract-spfeatures."""
-import sys
-import gzip, bz2
+import sys, getopt, gzip, bz2
+opts, args = getopt.gnu_getopt(sys.argv[1:], 't')
+threshold = int(dict(opts).get('t', 5))
+print >>sys.stderr, "Threshold:", threshold
 def opener(filename):
     """Open files based on their extension."""
         return file(filename)
 name_to_freq = {} # { feature name : freq }
-threshold = 5
-for filename in sys.argv[1:]:
+for filename in args:
     print >>sys.stderr, "Reading", filename
     for line in opener(filename):
         freq, name = line.split('\t', 1)
 feature_names = [name for name, freq in name_to_freq.iteritems()
     if freq >= threshold]
 print >>sys.stderr, len(feature_names), "pruned features"
+def sorter(key):
+    # put NLogP first
+    return (key != 'NLogP 0', key)
 print >>sys.stderr, "Writing sorted features out..."
 i = 0