1. James Taylor
  2. bx-python

Source

bx-python / lib / bx / seq / nib.py

James Taylor ff9456b 

James Taylor 250c585 
James Taylor ff9456b 
Bob Harris 2061099 



James Taylor ff9456b 

Bob Harris 2061099 
James Taylor ff9456b 

Bob Harris 2061099 
James Taylor ff9456b 





Bob Harris 2061099 

James Taylor ff9456b 

Bob Harris 3f1f7ca 
James Taylor 250c585 

James Taylor bb2a4bb 

James Taylor 250c585 




Bob Harris 2061099 
James Taylor 250c585 
Bob Harris 2061099 






mruffalo a3b93fa 
James Taylor 250c585 
Bob Harris 2061099 


James Taylor f452c9d 



James Taylor 250c585 
Bob Harris 2061099 

James Taylor 250c585 
Bob Harris 2061099 

James Taylor f452c9d 
James Taylor bb2a4bb 
Bob Harris 3f1f7ca 











Bob Harris 75fafd3 










"""
Classes to support nib files.

:Author: James Taylor (james@bx.psu.edu), Bob Harris (rsharris@bx.psu.edu)

A nib sequence is a sequence of DNA, using the 10 character alphabet A,C,G,T,N
(upper and lower case).  The file is packed as 4 bits per character.

nib file format
---------------

Fields can be in big- or little-endian format;  they must match the endianess
of the magic number.

============ =========== ======================================================
offset 0x00: 6B E9 3D 3A big endian magic number (3A 3D E9 6B => little endian)
offset 0x04: xx xx xx xx length of data sequence (counted in characters)
offset 0x08:  ...        data sequence;  most significant nybble in each
                         byte is first in sequence
============ =========== ======================================================
"""

from __future__ import division

from bx.seq.seq import SeqFile,SeqReader
import sys, struct, string, math

import _nib

NIB_MAGIC_NUMBER = 0x6BE93D3A
NIB_MAGIC_NUMBER_SWAP = 0x3A3DE96B
NIB_MAGIC_SIZE = 4
NIB_LENGTH_SIZE = 4

class NibFile(SeqFile):

    def __init__(self, file, revcomp=False, name="", gap=None):
        SeqFile.__init__(self,file,revcomp,name,gap)

        self.byte_order = ">"
        magic = struct.unpack(">L", file.read(NIB_MAGIC_SIZE))[0]
        if (magic != NIB_MAGIC_NUMBER):
            if magic == NIB_MAGIC_NUMBER_SWAP: self.byte_order = "<"
            else: raise Exception("Not a NIB file")
        self.magic = magic
        self.length = struct.unpack("%sL" % self.byte_order, file.read(NIB_LENGTH_SIZE))[0]

    def raw_fetch(self, start, length):
        # Check parameters
        assert start >= 0, "Start must be greater than 0"
        assert length >= 0, "Length must be greater than 0"
        assert start + length <= self.length, "Interval beyond end of sequence"
        # Read block of bytes containing sequence
        block_start = int(math.floor(start / 2))
        block_end = int(math.floor((start + length - 1) / 2))
        block_len = block_end + 1 - block_start
        self.file.seek(NIB_MAGIC_SIZE + NIB_LENGTH_SIZE + block_start)
        raw = self.file.read(block_len)
        # Unpack compressed block into a character string and return
        return _nib.translate_raw_data( raw, start, length  )

class NibReader(SeqReader):
    
    def __init__(self, file, revcomp=False, name="", gap=None):
        SeqReader.__init__(self,file,revcomp,name,gap)

    def next(self):
        if (self.seqs_read != 0): return  # nib files have just one sequence
        seq = NibFile(self.file,self.revcomp,self.name,self.gap)
        self.seqs_read += 1
        return seq


class NibWriter(object):

    def __init__(self,file):
        self.file = file

    def write(self,seq):
        assert (False), "NibWriter.write() is not implemented yet"

    def close(self):
        self.file.close()