olgert denas avatar olgert denas committed 4eebabf

fixed small bugs and added options

Comments (0)

Files changed (3)

lib/bx/align/epo.py

     def _strfactory(cls, line):
         """factory class method for Chain
 
-        @param line: header of a chain (in .chain format)
+        :param line: header of a chain (in .chain format)
         """
 
         assert type(line) == str, "this is a factory from string"
         chain header will have tStrand=+, qStrand=+ (resp. -). No strand
         changes on the other cases.
 
-        @param trg_comp: target (i.e, the first) component
-        @type trg_comp: L{EPOitem}
-        @param qr_comp: query (i.e, the second) component
-        @type qr_comp: L{EPOitem}
-        @param trg_chrom_sizes: chromosome sizes of the target
-        @type trg_chrom_sizes: dictionary of the type (chrom) --> size
-        @param qr_chrom_sizes: chromosome sizes of the query
-        @type qr_chrom_sizes: dictionary of the type (chrom) --> size
-        @return: A L{Chain} instance"""
+        :param trg_comp: target (i.e, the first) component
+        :type trg_comp: L{EPOitem}
+        :param qr_comp: query (i.e, the second) component
+        :type qr_comp: L{EPOitem}
+        :param trg_chrom_sizes: chromosome sizes of the target
+        :type trg_chrom_sizes: dictionary of the type (chrom) --> size
+        :param qr_chrom_sizes: chromosome sizes of the query
+        :type qr_chrom_sizes: dictionary of the type (chrom) --> size
+        :return: A L{Chain} instance"""
 
         # size, target, query arrays
         S, T, Q = [], [], []
     def _parse_file(cls, fname, pickle=False):
         """parse a .chain file into a list of the type [(L{Chain}, arr, arr, arr) ...]
 
-        @param fname: name of the file"""
+        :param fname: name of the file"""
 
         if fname.endswith('.pkl'):
             log.debug("loading pickled file %s ..." % fname)
     def _strfactory(cls, line):
         """factory method for an EPOitem
 
-        @param line: a line of input"""
+        :param line: a line of input"""
 
         cmp = line.rstrip().split()
         chrom = cmp[2]
     def _parse_epo(cls, fname):
         """Load an entire file in the EPO format into a dictionary of the type {gab_id => [Epoitem, ...]}
 
-        @param fname: file name"""
+        :param fname: file name"""
 
         data = {}
         with open(fname) as fd:
     def cigar_iter(self, reverse):
         """self.cigar => [(length, type) ... ] iterate the cigar
 
-        @param reverse: whether to iterate in the reverse direction (right-to-left)
-        @type reverse: boolean
+        :param reverse: whether to iterate in the reverse direction (right-to-left)
+        :type reverse: boolean
 
-        @return a list of pairs of the type [(length, M/D) ..]
+        :return a list of pairs of the type [(length, M/D) ..]
         """
 
         l = 0
         for example 4MD4M2DM with reverse=False will produce [(0,4), (5,9), (11,12)]
         4MD4M2DM with reverse=True will produce [(0,1), (3,7), (8,12)] (= 12 - previous interval)
 
-        @param reverse: whether to iterate in the reverse direction (right-to-left) (this is passed as is to self.cigar_iter)
-        @type reverse: boolean
-        @param thr: shift all intervals by this much
-        @type thr: integer
+        :param reverse: whether to iterate in the reverse direction (right-to-left) (this is passed as is to self.cigar_iter)
+        :type reverse: boolean
+        :param thr: shift all intervals by this much
+        :type thr: integer
 
-        @return: list of pairs"""
+        :return: list of pairs"""
 
         d = [(thr,thr)]
         dl = 0

scripts/bnMapper.py

 from bx.align import epo
 from bx.align.epo import bed_union as elem_u
 
-elem_t = np.dtype([('chrom', np.str_, 5), ('start', np.int64), ('end', np.int64), ('id', np.str_, 100)])
+elem_t = np.dtype([('chrom', np.str_, 30), ('start', np.int64), ('end', np.int64), ('id', np.str_, 100)])
+LOG_LEVELS = {"info" : logging.INFO, "debug" : logging.DEBUG, "silent" : logging.CRITICAL}
 
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig()
 log = logging.getLogger()
 
 class GIntervalTree( IntervalTree ):
         """insert an element. use this method as the IntervalTree one.
         this will simply call the IntervalTree.add method on the right tree
 
-        @param chrom: chromosome
-        @param element: the argument of IntervalTree.insert_interval
-        @return: None
+        :param chrom: chromosome
+        :param element: the argument of IntervalTree.insert_interval
+        :return: None
         """
 
         self._trees.setdefault(chrom, IntervalTree()).insert_interval( element )
     def find(self, chrom, start, end):
         """find the intersecting elements
 
-        @param chrom: chromosome
-        @param start: start
-        @param end: end
-        @return: a list of intersecting elements"""
+        :param chrom: chromosome
+        :param start: start
+        :param end: end
+        :return: a list of intersecting elements"""
 
         tree = self._trees.get( chrom, None )
         if tree:
     """transform the coordinates of this elem into the other species.
 
     elem intersects this chain's ginterval.
-    return a lisit of the type [(to_chr, start, end, elem[id]) ... ]"""
+    :return: a list of the type [(to_chr, start, end, elem[id]) ... ]"""
 
     start, end = max(elem['start'], chain.tStart) - chain.tStart, min(elem['end'], chain.tEnd) - chain.tStart
 
 
         #mapped elements must end up in the same chain and chromosome
         if len(matching_block_ids) == 0 or len(matching_block_ids) > 1:
-            log.debug("%s in different chain/chromosomes" % (str(from_elem)))
+            log.debug("%s no match or in different chain/chromosomes" % (str(from_elem)))
             continue
 
         # do the actual mapping
     parser = argparse.ArgumentParser(description=__doc__, epilog="Olgert Denas (Taylor Lab)",
             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-    parser.add_argument("input", help="Input to process. If this is a directory, all files in it will be mapped and placed on --output, which should be a directory.")
+    parser.add_argument("input", nargs='*',
+            help="Input to process. If more than a file is specified, all files will be mapped and placed on --output, which should be a directory.")
     parser.add_argument("alignment", help="Alignment file (.chain or .pkl)")
 
     parser.add_argument("-f", '--format', choices=("BED4", "BED12"), default="BED4",
             help="Output format.")
     parser.add_argument("-o", '--output', metavar="FILE", default='stdout',
             type=lambda s: ((s in ('stdout', '-') and "/dev/stdout") or s),
-            help="Output file. Mandatory if input is a directory.")
+            help="Output file. Mandatory if more than on file in input.")
     parser.add_argument("-t", '--threshold', metavar="FLOAT", default=0., type=float,
             help="Mapping threshold i.e., |elem| * threshold <= |mapped_elem|")
     parser.add_argument("-s", '--screen', default=False, action='store_true',
             help="Only report elements in the alignment (without mapping). -t has not effect here (TODO)")
     parser.add_argument('-g', '--gap', type=int, default=-1,
             help="Ignore elements with an insertion/deletion of this or bigger size.")
+    parser.add_argument('-v', '--verbose', type=str, choices=LOG_LEVELS.keys(), default='info',
+            help='Verbosity level')
 
 
     opt = parser.parse_args()
+    log.setLevel(LOG_LEVELS[opt.verbose])
 
     #check for output if input is a directory arguments
-    if os.path.isdir(opt.input) and (not os.path.isdir(opt.output)):
-        parser.error("If input is a dir, output is mandatory and should be a dir as well")
+    if len(opt.input) > 1 and (not os.path.isdir(opt.output)):
+        parser.error("For multiple inputs, output is mandatory and should be a dir.")
 
 
     #loading alignments from opt.alignment
         TREE.add(chain.tName, Interval(chain.tStart, chain.tEnd, chain.id))
 
     # transform elements
-    if os.path.isdir(opt.input):
-        for infn in os.listdir(opt.input):
-            inpath = os.path.join(opt.input, infn)
-            outpath = os.path.join(opt.output, infn)
+    if len(opt.input) > 1:
+        for inpath in opt.input:
+            if not os.path.isfile(inpath):
+                log.warning("skipping %s (not a file) ..." % inpath)
+                continue
+            outpath = os.path.join(opt.output, os.path.basename(inpath))
             if os.path.isfile(outpath):
                 log.warning("overwriting %s ..." % outpath)
             transform_file(loadFeatures(inpath), outpath, EPO, TREE, opt)
     else:
-        transform_file(loadFeatures( opt.input ), opt.output, EPO, TREE, opt)
+        transform_file(loadFeatures( opt.input[0] ), opt.output, EPO, TREE, opt)
 
 
 

scripts/out_to_chain.py

             epilog="Olgert Denas (Taylor Lab)",
             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-    parser.add_argument("input", help="File to process")
+    parser.add_argument("input", help="File to process.")
     parser.add_argument("--species", nargs=2, default=["homo_sapiens", "mus_musculus"],
-            help="Names of target and query species (respectively) in the alignment")
-    parser.add_argument("--chrsizes", nargs=2,
-            help="Chromosome sizes for the given species. For human and mouse, these are provided, so you can leave it empty.")
+            help="Names of target and query species (respectively) in the alignment.")
+    parser.add_argument("--chrsizes", nargs=2, required=True,
+            help="Chromosome sizes for the given species.")
     parser.add_argument("-o", '--output', metavar="FILE", default='stdout', type=outFile, help="Output file")
 
     opt = parser.parse_args()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.