Source

utils / bighilbert.py

#!/usr/bin/env python2.7

import sys
import bx.bbi.bigwig_file
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import os
import struct

def getChromosomeSizesFromBigWig(bwname):
  csize = {}
  fh = open(os.path.expanduser(bwname), "rb")
  magic = fh.read(4)
  if magic == '&\xfc\x8f\x88':
    endianness = '<'
  elif magic == '\x88\x8f\xfc&':
    endianness = '>'
  else:
    raise IOError("The file is not in bigwig format")
  (version,zoomLevels,chromosomeTreeOffset,fullDataOffset,fullIndexOffset,fieldCount,definedFieldCount,autoSqlOffset,totalSummaryOffset,uncompressBufSize,reserved)=struct.unpack(endianness+'HHQQQHHQQIQ',fh.read(60))
  if version < 3:
    raise IOError("Bigwig files version <3 are not supported")
  fh.seek(chromosomeTreeOffset)
  magic = fh.read(4)
  if magic == '\x91\x8c\xcax':
    endianness = '<'
  elif magic == 'x\xca\x8c\x91':
    endianness = '>'
  else:
    raise ValueError("Wrong magic for this bigwig data file")
  (blockSize, keySize, valSize, itemCount, reserved) = struct.unpack(endianness + 'IIIQQ', fh.read(28))
  (isLeaf, reserved, count) = struct.unpack(endianness + 'BBH', fh.read(4))
  for n in range(count):
    (key, chromId, chromSize) = struct.unpack(endianness + str(keySize) + 'sII', fh.read(keySize + 2 * 4))
    csize[key.replace('\x00', '')] = chromSize
  return csize

# These functions were copied from simpleHilbertCurve (https://github.com/dentearl/simpleHilbertCurve.git)

def d2xy(n, d):
    """
    take a d value in [0, n**2 - 1] and map it to
    an x, y value (e.g. c, r).
    """
    assert(d <= n**2 - 1)
    t = d
    x = y = 0
    s = 1
    while (s < n):
        rx = 1 & (t / 2)
        ry = 1 & (t ^ rx)
        x, y = rot(s, x, y, rx, ry)
        x += s * rx
        y += s * ry
        t /= 4
        s *= 2
    return x, y

def rot(n, x, y, rx, ry):
    """
    rotate/flip a quadrant appropriately
    """
    if ry == 0:
        if rx == 1:
            x = n - 1 - x
            y = n - 1 - y
        return y, x
    return x, y

Sample1 = bx.bbi.bigwig_file.BigWigFile(open(sys.argv[1], 'rb'))
Sample2 = bx.bbi.bigwig_file.BigWigFile(open(sys.argv[2], 'rb'))
csize = getChromosomeSizesFromBigWig(sys.argv[1])
n = 1 << 10
#imsize = 640

v_d2xy = np.vectorize(d2xy)

for chrom in csize.keys():
  if '_' in chrom: continue
  sys.stderr.write("Processing chromosome %s\n" % chrom)
  
  chrom_length = csize[chrom]
  c_a, r_a = vd2xy(n, (n**2 - 1) * i / chrom_length)

  sys.stderr.write("Processing Sample1\n")
  hc = np.zeros((n, n))
  data = Sample1.get_as_array(chrom, 0, chrom_length)
  data[np.isnan(data)] = 0
  for i in xrange(0, chrom_length):
    hc[r_a[i]][c_a[i]] += data[i]
  # dump hilbert curve in image and array
  np.save("Sample1_%s" % chrom, hc)
  plt.imshow(np.log1p(hc), origin=0, alpha=0.3, cmap=plt.cm.Blues)  

  sys.stderr.write("Processing Sample2\n")
  hc = np.zeros((n, n))
  data = Sample2.get_as_array(chrom, 0, chrom_length)
  data[np.isnan(data)] = 0
  for i in xrange(0, chrom_length):
    hc[r_a[i]][c_a[i]] += data[i]
  # dump hilbert curve in image and array
  np.save("Sample2_%s" % chrom, hc)
  plt.imshow(np.log1p(hc), origin=0, alpha=0.3, cmap=plt.cm.Reds)  
  plt.title("Hilbert curve for %s" % chrom)
  plt.savefig("HC_%s.png" % chrom)
  plt.close()

  
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.