1. James Taylor
  2. bx-python

Commits

James Taylor  committed b8bc17f

setuptools / egg related changes (incomplete)

  • Participants
  • Parent commits 5adad74
  • Branches default

Comments (0)

Files changed (12)

File bed_complement.py

View file
 usage: %prog bed_file chrom_length_file
 """
 
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
 import sys
 
 from bx.bitset import *
 
 import cookbook.doc_optparse
 
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
 def read_len( f ):
     """Read a 'LEN' file and return a mapping from chromosome to length"""
     mapping = dict()

File bed_coverage.py

View file
 usage: %prog bed files ...
 """
 
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
 import psyco_full
 import sys
 from bx.bitset import BinnedBitSet
 from bx.bitset_builders import *
 from itertools import *
 
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
 bed_filenames = sys.argv[1:]
 if bed_filenames:
     input = chain( * imap( open, bed_filenames ) )

File bed_intersect.py

View file
 
 # Read first bed into some bitsets
 
-bitsets = binned_bitsets_from_file( open( in_fname ) )
+bitsets = binned_bitsets_from_bed_file( open( in_fname ) )
 
 # Read second BED and intersect
 

File bed_intersect_basewise.py

View file
 
 usage: %prog bed_file_1 bed_file_2
 """
+
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
 import sys
 from warnings import warn
 from bx.bitset import *
 from bx.bitset_builders import *
 import cookbook.doc_optparse
 
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
 options, args = cookbook.doc_optparse.parse( __doc__ )
 try:
     in_fname, in2_fname = args

File bed_merge_overlapping.py

View file
 usage: %prog bed files ...
 """
 
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
 import psyco_full
 import sys
 
 from bx.bitset_builders import *
 from itertools import *
 
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
 bed_filenames = sys.argv[1:]
 if bed_filenames:
     input = chain( * imap( open, bed_filenames ) )

File bed_rand_intersect.py

View file
 
 from __future__ import division 
 
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
 import sys, random
 import bisect
 import stats
 from Numeric import *
-from bx.bitset import BitSet
+from bx.bitset import *
 
 maxtries = 1000
 
+class MaxtriesException( Exception ):
+    pass
+
+def bit_clone( bits ):
+    new = BitSet( bits.size )
+    new.ior( bits )
+    return new
+
 def throw_random_2( lengths, mask ):
     """
     Version of throw using gap lists (like Hiram's randomPlacement). This 
             else: 
                 break
         if max_candidate == 0:
-            raise "No gap can fit region of length %d" % length
+            raise MaxtriesException( "No gap can fit region of length %d" % length )
         # Select start position
         s = random.randrange( candidate_bases )
         # Map back to region
         assert ( gap_length, gap_start, gap_end ) == gaps.pop( chosen_index )
         # gap_length, gap_start, gap_end =  gaps.pop( chosen_index )
         assert s >= 0
-        assert gap_start + s + length <= gap_end
+        assert gap_start + s + length <= gap_end, "Expected: %d + %d + %d == %d <= %d" % ( gap_start, s, length, gap_start + s + length, gap_end )
         gaps.reverse()
         if s > 0:
             bisect.insort( gaps, ( s, gap_start, gap_start + s ) )
     assert bits.count_range( 0, bits.size ) == sum( lengths )
     return bits
             
-def throw_random( lengths, mask ):
+def throw_random_1( lengths, mask ):
     total_length = mask.size
     bits = BitSet( total_length )
-    bits |= mask
     lengths = lengths[:]
     random.shuffle( lengths )
     for length in lengths:
             start = random.randrange( total_length-length )
             # Check if that interval is already covered at all
             if bits[start] == 0 and bits.next_set( start, start+length ) == start+length:
-                # Mark the range covered and continue
-                bits.set_range( start, length )
-                break
+                # Also check the mask!
+                if mask[start] == 0 and mask.next_set( start, start+length ) == start+length:
+                    # Mark the range covered and continue
+                    bits.set_range( start, length )
+                    break
         else:
-            raise "Could not place intervals after %d tries" % maxtries
+            raise MaxtriesException( "Could not place intervals after %d tries" % maxtries )
     assert bits.count_range( 0, bits.size ) == sum( lengths )
     return bits
 
+def throw_random( lengths, mask ):
+    saved = None
+    for i in range( 10 ):
+        try:
+            return throw_random_2( lengths, mask )
+        except MaxtriesException, e:
+            saved = e
+            continue
+    raise e
+    
 def as_bits( region_start, region_length, intervals ):
     bits = BitSet( region_length )
     for chr, start, stop in intervals:
         bits.set_range( start - region_start, stop - start )
     return bits
 
+def interval_lengths( bits ):
+    end = 0
+    while 1:
+        start = bits.next_set( end )
+        if start == bits.size: break
+        end = bits.next_clear( start )
+        yield end - start
+
 def count_overlap( bits1, bits2 ):
     b = BitSet( bits1.size )
     b |= bits1
         print >>sys.stderr, "Processing region:", fields[3]
         r_chr, r_start, r_stop = fields[0], int( fields[1] ), int( fields[2] )
         r_length = r_stop - r_start
+        # Load the mask
+        mask = overlapping_in_bed( mask_fname, r_chr, r_start, r_stop )
+        bits_mask = as_bits( r_start, r_length, mask )
+        bits_not_masked = bit_clone( bits_mask ); bits_not_masked.invert()
+        # Load the first set
         intervals1 = overlapping_in_bed( intervals1_fname, r_chr, r_start, r_stop )
         bits1 = as_bits( r_start, r_length, intervals1 )
-        mask = overlapping_in_bed( mask_fname, r_chr, r_start, r_stop )
-        bits_mask = as_bits( r_start, r_length, mask )
+        # Intersect it with the mask 
+        bits1.iand( bits_not_masked )
         # Sanity checks
         assert count_overlap( bits1, bits_mask ) == 0
         # For each data set
         for featnum, intervals2_fname in enumerate( intervals2_fnames ):
+            print >>sys.stderr, intervals2_fname
             intervals2 = overlapping_in_bed( intervals2_fname, r_chr, r_start, r_stop )
             bits2 = as_bits( r_start, r_length, intervals2 )
+            bits2.iand( bits_not_masked )
             assert count_overlap( bits2, bits_mask ) == 0
             # Observed values
             actual_overlap = count_overlap( bits1, bits2 )
             total_actual[featnum] += actual_overlap
             # Sample 
-            lengths2 = [ stop - start for chr, start, stop in intervals2 ]
+            lengths2 = list( interval_lengths( bits2 ) )
             total_lengths2[ featnum ] += sum( lengths2 )
             for i in range( nsamples ):
                 # Build randomly covered bitmask for second set

File bed_subtract_basewise.py

View file
 usage: %prog bed_file_1 bed_file_2
 
 """
+
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
 import sys
 from warnings import warn
 from bx.bitset_builders import binned_bitsets_from_file
 import cookbook.doc_optparse
 
-import pkg_resources
-pkg_resources.require( "bx-python" )
-
 def print_bits_as_bed( bits ):
     end = 0
     while 1:

File bx/bitset.pyx

View file
     def __dealloc__( self ):
         bitFree( & self.bits )
 
+    ## def clone( self ):
+    ##     other = BitSet( self.bitCount )
+    ##     other.ior( self )
+    ##     return other
+
     property size:
         def __get__( self ):
             return self.bitCount

File ez_setup.py

View file
-#!/usr/bin/env python
-"""Bootstrap setuptools installation
-
-If you want to use setuptools in your package's setup.py, just include this
-file in the same directory with it, and add this to the top of your setup.py::
-
-    from ez_setup import use_setuptools
-    use_setuptools()
-
-If you want to require a specific version of setuptools, set a download
-mirror, or use an alternate download directory, you can do so by supplying
-the appropriate options to ``use_setuptools()``.
-
-This file can also be run as a script to install or upgrade setuptools.
-"""
-import sys
-DEFAULT_VERSION = "0.6a2"
-DEFAULT_URL     = "http://cheeseshop.python.org/packages/%s/s/setuptools/" % sys.version[:3]
-
-md5_data = {
-    'setuptools-0.5a13-py2.3.egg': '85edcf0ef39bab66e130d3f38f578c86',
-    'setuptools-0.5a13-py2.4.egg': 'ede4be600e3890e06d4ee5e0148e092a',
-    'setuptools-0.6a1-py2.3.egg': 'ee819a13b924d9696b0d6ca6d1c5833d',
-    'setuptools-0.6a1-py2.4.egg': '8256b5f1cd9e348ea6877b5ddd56257d',
-    'setuptools-0.6a2-py2.3.egg': 'b98da449da411267c37a738f0ab625ba',
-    'setuptools-0.6a2-py2.4.egg': 'be5b88bc30aed63fdefd2683be135c3b',
-}
-
-import sys, os
-
-def _validate_md5(egg_name, data):
-    if egg_name in md5_data:
-        from md5 import md5
-        digest = md5(data).hexdigest()
-        if digest != md5_data[egg_name]:
-            print >>sys.stderr, (
-                "md5 validation of %s failed!  (Possible download problem?)"
-                % egg_name
-            )
-            sys.exit(2)
-    return data    
-
-
-def use_setuptools(
-    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
-    download_delay=15
-):
-    """Automatically find/download setuptools and make it available on sys.path
-
-    `version` should be a valid setuptools version number that is available
-    as an egg for download under the `download_base` URL (which should end with
-    a '/').  `to_dir` is the directory where setuptools will be downloaded, if
-    it is not already available.  If `download_delay` is specified, it should
-    be the number of seconds that will be paused before initiating a download,
-    should one be required.  If an older version of setuptools is installed,
-    this routine will print a message to ``sys.stderr`` and raise SystemExit in
-    an attempt to abort the calling script.  
-    """
-    try:
-        import setuptools
-        if setuptools.__version__ == '0.0.1':
-            print >>sys.stderr, (
-            "You have an obsolete version of setuptools installed.  Please\n"
-            "remove it from your system entirely before rerunning this script."
-            )
-            sys.exit(2)
-    except ImportError:
-        egg = download_setuptools(version, download_base, to_dir, download_delay)
-        sys.path.insert(0, egg)
-        import setuptools; setuptools.bootstrap_install_from = egg
-
-    import pkg_resources
-    try:
-        pkg_resources.require("setuptools>="+version)
-
-    except pkg_resources.VersionConflict:
-        # XXX could we install in a subprocess here?
-        print >>sys.stderr, (
-            "The required version of setuptools (>=%s) is not available, and\n"
-            "can't be installed while this script is running. Please install\n"
-            " a more recent version first."
-        ) % version
-        sys.exit(2)
-
-def download_setuptools(
-    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
-    delay = 15
-):
-    """Download setuptools from a specified location and return its filename
-
-    `version` should be a valid setuptools version number that is available
-    as an egg for download under the `download_base` URL (which should end
-    with a '/'). `to_dir` is the directory where the egg will be downloaded.
-    `delay` is the number of seconds to pause before an actual download attempt.
-    """
-    import urllib2, shutil
-    egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3])
-    url = download_base + egg_name
-    saveto = os.path.join(to_dir, egg_name)
-    src = dst = None
-    if not os.path.exists(saveto):  # Avoid repeated downloads
-        try:
-            from distutils import log
-            if delay:
-                log.warn("""
----------------------------------------------------------------------------
-This script requires setuptools version %s to run (even to display
-help).  I will attempt to download it for you (from
-%s), but
-you may need to enable firewall access for this script first.
-I will start the download in %d seconds.
----------------------------------------------------------------------------""",
-                    version, download_base, delay
-                ); from time import sleep; sleep(delay)
-            log.warn("Downloading %s", url)
-            src = urllib2.urlopen(url)
-            # Read/write all in one block, so we don't create a corrupt file
-            # if the download is interrupted.
-            data = _validate_md5(egg_name, src.read())
-            dst = open(saveto,"wb"); dst.write(data)
-        finally:
-            if src: src.close()
-            if dst: dst.close()
-    return os.path.realpath(saveto)
-
-def main(argv, version=DEFAULT_VERSION):
-    """Install or upgrade setuptools and EasyInstall"""
-
-    try:
-        import setuptools
-    except ImportError:
-        import tempfile, shutil
-        tmpdir = tempfile.mkdtemp(prefix="easy_install-")
-        try:
-            egg = download_setuptools(version, to_dir=tmpdir, delay=0)
-            sys.path.insert(0,egg)
-            from setuptools.command.easy_install import main
-            main(list(argv)+[egg])
-        finally:
-            shutil.rmtree(tmpdir)
-    else:
-        if setuptools.__version__ == '0.0.1':
-            # tell the user to uninstall obsolete version
-            use_setuptools(version)
-
-    req = "setuptools>="+version
-    import pkg_resources
-    try:
-        pkg_resources.require(req)
-    except pkg_resources.VersionConflict:
-        try:
-            from setuptools.command.easy_install import main
-        except ImportError:
-            from easy_install import main
-        main(list(argv)+[download_setuptools(delay=0)])
-        sys.exit(0) # try to force an exit
-    else:
-        if argv:
-            from setuptools.command.easy_install import main
-            main(argv)
-        else:
-            print "Setuptools version",version,"or greater has been installed."
-            print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'
-
-
-            
-def update_md5(filenames):
-    """Update our built-in md5 registry"""
-
-    import re
-    from md5 import md5
-
-    for name in filenames:
-        base = os.path.basename(name)
-        f = open(name,'rb')       
-        md5_data[base] = md5(f.read()).hexdigest()
-        f.close()
-
-    data = ["    %r: %r,\n" % it for it in md5_data.items()]
-    data.sort()
-    repl = "".join(data)
-
-    import inspect
-    srcfile = inspect.getsourcefile(sys.modules[__name__])
-    f = open(srcfile, 'rb'); src = f.read(); f.close()
-
-    match = re.search("\nmd5_data = {\n([^}]+)}", src)
-    if not match:
-        print >>sys.stderr, "Internal error!"
-        sys.exit(2)
-
-    src = src[:match.start(1)] + repl + src[match.end(1):]
-    f = open(srcfile,'w')
-    f.write(src)
-    f.close()
-
-
-if __name__=='__main__':
-    if len(sys.argv)>2 and sys.argv[1]=='--md5update':
-        update_md5(sys.argv[2:])
-    else:
-        main(sys.argv[1:])
-
-
-
-
-
+#!python
+"""Bootstrap setuptools installation
+
+If you want to use setuptools in your package's setup.py, just include this
+file in the same directory with it, and add this to the top of your setup.py::
+
+    from ez_setup import use_setuptools
+    use_setuptools()
+
+If you want to require a specific version of setuptools, set a download
+mirror, or use an alternate download directory, you can do so by supplying
+the appropriate options to ``use_setuptools()``.
+
+This file can also be run as a script to install or upgrade setuptools.
+"""
+import sys
+DEFAULT_VERSION = "0.6a6"
+DEFAULT_URL     = "http://cheeseshop.python.org/packages/%s/s/setuptools/" % sys.version[:3]
+
+md5_data = {
+    'setuptools-0.5a13-py2.3.egg': '85edcf0ef39bab66e130d3f38f578c86',
+    'setuptools-0.5a13-py2.4.egg': 'ede4be600e3890e06d4ee5e0148e092a',
+    'setuptools-0.6a1-py2.3.egg': 'ee819a13b924d9696b0d6ca6d1c5833d',
+    'setuptools-0.6a1-py2.4.egg': '8256b5f1cd9e348ea6877b5ddd56257d',
+    'setuptools-0.6a2-py2.3.egg': 'b98da449da411267c37a738f0ab625ba',
+    'setuptools-0.6a2-py2.4.egg': 'be5b88bc30aed63fdefd2683be135c3b',
+    'setuptools-0.6a3-py2.3.egg': 'ee0e325de78f23aab79d33106dc2a8c8',
+    'setuptools-0.6a3-py2.4.egg': 'd95453d525a456d6c23e7a5eea89a063',
+    'setuptools-0.6a4-py2.3.egg': 'e958cbed4623bbf47dd1f268b99d7784',
+    'setuptools-0.6a4-py2.4.egg': '7f33c3ac2ef1296f0ab4fac1de4767d8',
+    'setuptools-0.6a5-py2.3.egg': '748408389c49bcd2d84f6ae0b01695b1',
+    'setuptools-0.6a5-py2.4.egg': '999bacde623f4284bfb3ea77941d2627',
+}
+
+import sys, os
+
+def _validate_md5(egg_name, data):
+    if egg_name in md5_data:
+        from md5 import md5
+        digest = md5(data).hexdigest()
+        if digest != md5_data[egg_name]:
+            print >>sys.stderr, (
+                "md5 validation of %s failed!  (Possible download problem?)"
+                % egg_name
+            )
+            sys.exit(2)
+    return data    
+
+
+def use_setuptools(
+    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
+    download_delay=15
+):
+    """Automatically find/download setuptools and make it available on sys.path
+
+    `version` should be a valid setuptools version number that is available
+    as an egg for download under the `download_base` URL (which should end with
+    a '/').  `to_dir` is the directory where setuptools will be downloaded, if
+    it is not already available.  If `download_delay` is specified, it should
+    be the number of seconds that will be paused before initiating a download,
+    should one be required.  If an older version of setuptools is installed,
+    this routine will print a message to ``sys.stderr`` and raise SystemExit in
+    an attempt to abort the calling script.  
+    """
+    try:
+        import setuptools
+        if setuptools.__version__ == '0.0.1':
+            print >>sys.stderr, (
+            "You have an obsolete version of setuptools installed.  Please\n"
+            "remove it from your system entirely before rerunning this script."
+            )
+            sys.exit(2)
+    except ImportError:
+        egg = download_setuptools(version, download_base, to_dir, download_delay)
+        sys.path.insert(0, egg)
+        import setuptools; setuptools.bootstrap_install_from = egg
+
+    import pkg_resources
+    try:
+        pkg_resources.require("setuptools>="+version)
+
+    except pkg_resources.VersionConflict:
+        # XXX could we install in a subprocess here?
+        print >>sys.stderr, (
+            "The required version of setuptools (>=%s) is not available, and\n"
+            "can't be installed while this script is running. Please install\n"
+            " a more recent version first."
+        ) % version
+        sys.exit(2)
+
+def download_setuptools(
+    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
+    delay = 15
+):
+    """Download setuptools from a specified location and return its filename
+
+    `version` should be a valid setuptools version number that is available
+    as an egg for download under the `download_base` URL (which should end
+    with a '/'). `to_dir` is the directory where the egg will be downloaded.
+    `delay` is the number of seconds to pause before an actual download attempt.
+    """
+    import urllib2, shutil
+    egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3])
+    url = download_base + egg_name
+    saveto = os.path.join(to_dir, egg_name)
+    src = dst = None
+    if not os.path.exists(saveto):  # Avoid repeated downloads
+        try:
+            from distutils import log
+            if delay:
+                log.warn("""
+---------------------------------------------------------------------------
+This script requires setuptools version %s to run (even to display
+help).  I will attempt to download it for you (from
+%s), but
+you may need to enable firewall access for this script first.
+I will start the download in %d seconds.
+---------------------------------------------------------------------------""",
+                    version, download_base, delay
+                ); from time import sleep; sleep(delay)
+            log.warn("Downloading %s", url)
+            src = urllib2.urlopen(url)
+            # Read/write all in one block, so we don't create a corrupt file
+            # if the download is interrupted.
+            data = _validate_md5(egg_name, src.read())
+            dst = open(saveto,"wb"); dst.write(data)
+        finally:
+            if src: src.close()
+            if dst: dst.close()
+    return os.path.realpath(saveto)
+
+def main(argv, version=DEFAULT_VERSION):
+    """Install or upgrade setuptools and EasyInstall"""
+
+    try:
+        import setuptools
+    except ImportError:
+        import tempfile, shutil
+        tmpdir = tempfile.mkdtemp(prefix="easy_install-")
+        try:
+            egg = download_setuptools(version, to_dir=tmpdir, delay=0)
+            sys.path.insert(0,egg)
+            from setuptools.command.easy_install import main
+            main(list(argv)+[egg])
+        finally:
+            shutil.rmtree(tmpdir)
+    else:
+        if setuptools.__version__ == '0.0.1':
+            # tell the user to uninstall obsolete version
+            use_setuptools(version)
+
+    req = "setuptools>="+version
+    import pkg_resources
+    try:
+        pkg_resources.require(req)
+    except pkg_resources.VersionConflict:
+        try:
+            from setuptools.command.easy_install import main
+        except ImportError:
+            from easy_install import main
+        main(list(argv)+[download_setuptools(delay=0)])
+        sys.exit(0) # try to force an exit
+    else:
+        if argv:
+            from setuptools.command.easy_install import main
+            main(argv)
+        else:
+            print "Setuptools version",version,"or greater has been installed."
+            print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'
+
+
+            
+def update_md5(filenames):
+    """Update our built-in md5 registry"""
+
+    import re
+    from md5 import md5
+
+    for name in filenames:
+        base = os.path.basename(name)
+        f = open(name,'rb')       
+        md5_data[base] = md5(f.read()).hexdigest()
+        f.close()
+
+    data = ["    %r: %r,\n" % it for it in md5_data.items()]
+    data.sort()
+    repl = "".join(data)
+
+    import inspect
+    srcfile = inspect.getsourcefile(sys.modules[__name__])
+    f = open(srcfile, 'rb'); src = f.read(); f.close()
+
+    match = re.search("\nmd5_data = {\n([^}]+)}", src)
+    if not match:
+        print >>sys.stderr, "Internal error!"
+        sys.exit(2)
+
+    src = src[:match.start(1)] + repl + src[match.end(1):]
+    f = open(srcfile,'w')
+    f.write(src)
+    f.close()
+
+
+if __name__=='__main__':
+    if len(sys.argv)>2 and sys.argv[1]=='--md5update':
+        update_md5(sys.argv[2:])
+    else:
+        main(sys.argv[1:])
+
+
+
+
+

File maf_tile.py

View file
     -m, --missingData: Inserts wildcards for missing block rows instead of '-'
 """
 
+import pkg_resources
+pkg_resources.require( "bx-python" )
+
 import psyco_full
 
 import cookbook.doc_optparse

File scripts.list

View file
 aggregate_scores_in_intervals.py
+get_scores_in_intervals.py
 align_print_template.py
 bed_intersect.py
 bed_intersect_basewise.py
+bed_rand_intersect.py
+bed_diff_basewise_summary.py
 bed_merge_overlapping.py
 bed_coverage.py
 bed_complement.py

File setup.py

View file
 
 bitset_deps = 'bits.c', 'common.c', 'memalloc.c', 'dlist.c', 'errabort.c', 'osunix.c', 'wildcmp.c'
 
-setup(  name = "python-bio-tools",
+setup(  name = "bx-python",
         version = "0.1.0",
-        py_modules = [ 'psyco_full' ],
+        py_modules = [ 'psyco_full', 'stats', 'pstat' ],
         packages = find_packages(),
         scripts = scripts,
         ext_modules=[ Extension( "bx.bitset", [ "bx/bitset.pyx", "src/binBits.c" ] + [ JK_LIB + f for f in bitset_deps ], include_dirs=[JK_INC, "src"] ) ],
         author_email = "james@bx.psu.edu",
         description = "Tools for manipulating biological data, particularly multiple sequence alignments",
         url = "http://www.bx.psu.edu/miller_lab/",
-        zip_safe = True
+        zip_safe = False
      )