Commits

James Taylor committed fe3d5eb

BGZF support (really simple wrapper to code from samtools)

Comments (0)

Files changed (8)

lib/bx/misc/bgzf.pyx

+"""
+Seekable access to BGZ files based on samtools code. Does not yet implement
+complete file-like interface.
+"""
+
+ctypedef unsigned long long int64_t
+
+cdef extern from "Python.h":
+    char * PyString_AsString( object )
+    object PyString_FromStringAndSize( char *, int )
+
+cdef extern from "bgzf.h":
+    ctypedef struct BGZF
+    BGZF * bgzf_open( char * path, char * mode )
+    int bgzf_close( BGZF * fp )
+    int bgzf_read( BGZF * fp, void * data, int length )
+    int64_t bgzf_tell( BGZF * fp )
+    int64_t bgzf_seek( BGZF * fp, int64_t pos, int where )
+    
+cdef class BGZFFile( object ):
+    cdef BGZF * bgzf
+    def __init__( self, path, mode="r" ):
+        self.bgzf = bgzf_open( path, mode )
+        if not self.bgzf:
+            raise IOError( "Could not open file" )
+    def close( self ):
+        if self.bgzf:
+            bgzf_close( self.bgzf )
+    def read( self, int length ):
+        cdef object rval = PyString_FromStringAndSize( NULL, length )
+        bgzf_read( self.bgzf, PyString_AsString( rval ), length )
+        return rval
+    def tell( self ):
+        return bgzf_tell( self.bgzf )
+    def seek( self, int64_t pos, int where=0 ):
+        return bgzf_seek( self.bgzf, pos, where )

lib/bx/misc/bgzf_tests.py

+import bx.misc.bgzf
+
+def test_bgzf():
+    f = bx.misc.bgzf.BGZFFile( "../test_data/bgzf_tests/test.txt.gz" )
+    print f.read( 10 )
+    print f.seek( 0 )
+    print f.read( 10 )
+    
+test_bgzf()
     extensions.append( Extension( "bx.seq._twobit", [ "lib/bx/seq/_twobit.pyx" ] ) )
     # Translation if character / integer strings 
     extensions.append( Extension( "bx._seqmapping", [ "lib/bx/_seqmapping.pyx" ] ) )
+    # BGZF
+    extensions.append( Extension( "bx.misc.bgzf",
+                                  [ "lib/bx/misc/bgzf.pyx", "src/samtools/bgzf.c" ],
+                                  include_dirs=[ "src/samtools"],
+                                  libraries=['z'] ) )
+
     
     # The following extensions won't (currently) compile on windows
     if platform.system() not in ( 'Microsoft', 'Windows' ):

src/samtools/bgzf.c

+/* The MIT License
+
+   Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+/*
+  2009-06-29 by lh3: cache recent uncompressed blocks.
+  2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
+  2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "bgzf.h"
+
+#include "khash.h"
+typedef struct {
+	int size;
+	uint8_t *block;
+	int64_t end_offset;
+} cache_t;
+KHASH_MAP_INIT_INT64(cache, cache_t)
+
+#if defined(_WIN32) || defined(_MSC_VER)
+#define ftello(fp) ftell(fp)
+#define fseeko(fp, offset, whence) fseek(fp, offset, whence)
+#else
+extern off_t ftello(FILE *stream);
+extern int fseeko(FILE *stream, off_t offset, int whence);
+#endif
+
+typedef int8_t bgzf_byte_t;
+
+static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
+static const int MAX_BLOCK_SIZE = 64 * 1024;
+
+static const int BLOCK_HEADER_LENGTH = 18;
+static const int BLOCK_FOOTER_LENGTH = 8;
+
+static const int GZIP_ID1 = 31;
+static const int GZIP_ID2 = 139;
+static const int CM_DEFLATE = 8;
+static const int FLG_FEXTRA = 4;
+static const int OS_UNKNOWN = 255;
+static const int BGZF_ID1 = 66; // 'B'
+static const int BGZF_ID2 = 67; // 'C'
+static const int BGZF_LEN = 2;
+static const int BGZF_XLEN = 6; // BGZF_LEN+4
+
+static const int GZIP_WINDOW_BITS = -15; // no zlib header
+static const int Z_DEFAULT_MEM_LEVEL = 8;
+
+
+inline
+void
+packInt16(uint8_t* buffer, uint16_t value)
+{
+    buffer[0] = value;
+    buffer[1] = value >> 8;
+}
+
+inline
+int
+unpackInt16(const uint8_t* buffer)
+{
+    return (buffer[0] | (buffer[1] << 8));
+}
+
+inline
+void
+packInt32(uint8_t* buffer, uint32_t value)
+{
+    buffer[0] = value;
+    buffer[1] = value >> 8;
+    buffer[2] = value >> 16;
+    buffer[3] = value >> 24;
+}
+
+static inline
+int
+bgzf_min(int x, int y)
+{
+    return (x < y) ? x : y;
+}
+
+static
+void
+report_error(BGZF* fp, const char* message) {
+    fp->error = message;
+}
+
+static BGZF *bgzf_read_init()
+{
+	BGZF *fp;
+	fp = calloc(1, sizeof(BGZF));
+    fp->uncompressed_block_size = MAX_BLOCK_SIZE;
+    fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
+    fp->compressed_block_size = MAX_BLOCK_SIZE;
+    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
+	fp->cache_size = 0;
+	fp->cache = kh_init(cache);
+	return fp;
+}
+
+static
+BGZF*
+open_read(int fd)
+{
+#ifdef _USE_KNETFILE
+    knetFile *file = knet_dopen(fd, "r");
+#else
+    FILE* file = fdopen(fd, "r");
+#endif
+    BGZF* fp;
+	if (file == 0) return 0;
+	fp = bgzf_read_init();
+    fp->file_descriptor = fd;
+    fp->open_mode = 'r';
+#ifdef _USE_KNETFILE
+    fp->x.fpr = file;
+#else
+    fp->file = file;
+#endif
+    return fp;
+}
+
+static
+BGZF*
+open_write(int fd, bool is_uncompressed)
+{
+    FILE* file = fdopen(fd, "w");
+    BGZF* fp;
+	if (file == 0) return 0;
+	fp = malloc(sizeof(BGZF));
+    fp->file_descriptor = fd;
+    fp->open_mode = 'w';
+    fp->owned_file = 0; fp->is_uncompressed = is_uncompressed;
+#ifdef _USE_KNETFILE
+    fp->x.fpw = file;
+#else
+    fp->file = file;
+#endif
+    fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
+    fp->uncompressed_block = NULL;
+    fp->compressed_block_size = MAX_BLOCK_SIZE;
+    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
+    fp->block_address = 0;
+    fp->block_offset = 0;
+    fp->block_length = 0;
+    fp->error = NULL;
+    return fp;
+}
+
+BGZF*
+bgzf_open(const char* __restrict path, const char* __restrict mode)
+{
+    BGZF* fp = NULL;
+    if (mode[0] == 'r' || mode[0] == 'R') { /* The reading mode is preferred. */
+#ifdef _USE_KNETFILE
+		knetFile *file = knet_open(path, mode);
+		if (file == 0) return 0;
+		fp = bgzf_read_init();
+		fp->file_descriptor = -1;
+		fp->open_mode = 'r';
+		fp->x.fpr = file;
+#else
+		int fd, oflag = O_RDONLY;
+#ifdef _WIN32
+		oflag |= O_BINARY;
+#endif
+		fd = open(path, oflag);
+		if (fd == -1) return 0;
+        fp = open_read(fd);
+#endif
+    } else if (mode[0] == 'w' || mode[0] == 'W') {
+		int fd, oflag = O_WRONLY | O_CREAT | O_TRUNC;
+#ifdef _WIN32
+		oflag |= O_BINARY;
+#endif
+		fd = open(path, oflag, 0666);
+		if (fd == -1) return 0;
+        fp = open_write(fd, strstr(mode, "u")? 1 : 0);
+    }
+    if (fp != NULL) {
+        fp->owned_file = 1;
+    }
+    return fp;
+}
+
+BGZF*
+bgzf_fdopen(int fd, const char * __restrict mode)
+{
+	if (fd == -1) return 0;
+    if (mode[0] == 'r' || mode[0] == 'R') {
+        return open_read(fd);
+    } else if (mode[0] == 'w' || mode[0] == 'W') {
+        return open_write(fd, strstr(mode, "u")? 1 : 0);
+    } else {
+        return NULL;
+    }
+}
+
+static
+int
+deflate_block(BGZF* fp, int block_length)
+{
+    // Deflate the block in fp->uncompressed_block into fp->compressed_block.
+    // Also adds an extra field that stores the compressed block length.
+
+    bgzf_byte_t* buffer = fp->compressed_block;
+    int buffer_size = fp->compressed_block_size;
+
+    // Init gzip header
+    buffer[0] = GZIP_ID1;
+    buffer[1] = GZIP_ID2;
+    buffer[2] = CM_DEFLATE;
+    buffer[3] = FLG_FEXTRA;
+    buffer[4] = 0; // mtime
+    buffer[5] = 0;
+    buffer[6] = 0;
+    buffer[7] = 0;
+    buffer[8] = 0;
+    buffer[9] = OS_UNKNOWN;
+    buffer[10] = BGZF_XLEN;
+    buffer[11] = 0;
+    buffer[12] = BGZF_ID1;
+    buffer[13] = BGZF_ID2;
+    buffer[14] = BGZF_LEN;
+    buffer[15] = 0;
+    buffer[16] = 0; // placeholder for block length
+    buffer[17] = 0;
+
+    // loop to retry for blocks that do not compress enough
+    int input_length = block_length;
+    int compressed_length = 0;
+    while (1) {
+		int compress_level = fp->is_uncompressed? 0 : Z_DEFAULT_COMPRESSION;
+        z_stream zs;
+        zs.zalloc = NULL;
+        zs.zfree = NULL;
+        zs.next_in = fp->uncompressed_block;
+        zs.avail_in = input_length;
+        zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
+        zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
+
+        int status = deflateInit2(&zs, compress_level, Z_DEFLATED,
+                                  GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+        if (status != Z_OK) {
+            report_error(fp, "deflate init failed");
+            return -1;
+        }
+        status = deflate(&zs, Z_FINISH);
+        if (status != Z_STREAM_END) {
+            deflateEnd(&zs);
+            if (status == Z_OK) {
+                // Not enough space in buffer.
+                // Can happen in the rare case the input doesn't compress enough.
+                // Reduce the amount of input until it fits.
+                input_length -= 1024;
+                if (input_length <= 0) {
+                    // should never happen
+                    report_error(fp, "input reduction failed");
+                    return -1;
+                }
+                continue;
+            }
+            report_error(fp, "deflate failed");
+            return -1;
+        }
+        status = deflateEnd(&zs);
+        if (status != Z_OK) {
+            report_error(fp, "deflate end failed");
+            return -1;
+        }
+        compressed_length = zs.total_out;
+        compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
+        if (compressed_length > MAX_BLOCK_SIZE) {
+            // should never happen
+            report_error(fp, "deflate overflow");
+            return -1;
+        }
+        break;
+    }
+
+    packInt16((uint8_t*)&buffer[16], compressed_length-1);
+    uint32_t crc = crc32(0L, NULL, 0L);
+    crc = crc32(crc, fp->uncompressed_block, input_length);
+    packInt32((uint8_t*)&buffer[compressed_length-8], crc);
+    packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
+
+    int remaining = block_length - input_length;
+    if (remaining > 0) {
+        if (remaining > input_length) {
+            // should never happen (check so we can use memcpy)
+            report_error(fp, "remainder too large");
+            return -1;
+        }
+        memcpy(fp->uncompressed_block,
+               fp->uncompressed_block + input_length,
+               remaining);
+    }
+    fp->block_offset = remaining;
+    return compressed_length;
+}
+
+static
+int
+inflate_block(BGZF* fp, int block_length)
+{
+    // Inflate the block in fp->compressed_block into fp->uncompressed_block
+
+    z_stream zs;
+    zs.zalloc = NULL;
+    zs.zfree = NULL;
+    zs.next_in = fp->compressed_block + 18;
+    zs.avail_in = block_length - 16;
+    zs.next_out = fp->uncompressed_block;
+    zs.avail_out = fp->uncompressed_block_size;
+
+    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
+    if (status != Z_OK) {
+        report_error(fp, "inflate init failed");
+        return -1;
+    }
+    status = inflate(&zs, Z_FINISH);
+    if (status != Z_STREAM_END) {
+        inflateEnd(&zs);
+        report_error(fp, "inflate failed");
+        return -1;
+    }
+    status = inflateEnd(&zs);
+    if (status != Z_OK) {
+        report_error(fp, "inflate failed");
+        return -1;
+    }
+    return zs.total_out;
+}
+
+static
+int
+check_header(const bgzf_byte_t* header)
+{
+    return (header[0] == GZIP_ID1 &&
+            header[1] == (bgzf_byte_t) GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & FLG_FEXTRA) != 0 &&
+            unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
+            header[12] == BGZF_ID1 &&
+            header[13] == BGZF_ID2 &&
+            unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
+}
+
+static void free_cache(BGZF *fp)
+{
+	khint_t k;
+	khash_t(cache) *h = (khash_t(cache)*)fp->cache;
+	if (fp->open_mode != 'r') return;
+	for (k = kh_begin(h); k < kh_end(h); ++k)
+		if (kh_exist(h, k)) free(kh_val(h, k).block);
+	kh_destroy(cache, h);
+}
+
+static int load_block_from_cache(BGZF *fp, int64_t block_address)
+{
+	khint_t k;
+	cache_t *p;
+	khash_t(cache) *h = (khash_t(cache)*)fp->cache;
+	k = kh_get(cache, h, block_address);
+	if (k == kh_end(h)) return 0;
+	p = &kh_val(h, k);
+	if (fp->block_length != 0) fp->block_offset = 0;
+	fp->block_address = block_address;
+	fp->block_length = p->size;
+	memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
+#ifdef _USE_KNETFILE
+	knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
+#else
+	fseeko(fp->file, p->end_offset, SEEK_SET);
+#endif
+	return p->size;
+}
+
+static void cache_block(BGZF *fp, int size)
+{
+	int ret;
+	khint_t k;
+	cache_t *p;
+	khash_t(cache) *h = (khash_t(cache)*)fp->cache;
+	if (MAX_BLOCK_SIZE >= fp->cache_size) return;
+	if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
+		/* A better way would be to remove the oldest block in the
+		 * cache, but here we remove a random one for simplicity. This
+		 * should not have a big impact on performance. */
+		for (k = kh_begin(h); k < kh_end(h); ++k)
+			if (kh_exist(h, k)) break;
+		if (k < kh_end(h)) {
+			free(kh_val(h, k).block);
+			kh_del(cache, h, k);
+		}
+	}
+	k = kh_put(cache, h, fp->block_address, &ret);
+	if (ret == 0) return; // if this happens, a bug!
+	p = &kh_val(h, k);
+	p->size = fp->block_length;
+	p->end_offset = fp->block_address + size;
+	p->block = malloc(MAX_BLOCK_SIZE);
+	memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
+}
+
+static
+int
+read_block(BGZF* fp)
+{
+    bgzf_byte_t header[BLOCK_HEADER_LENGTH];
+	int size = 0;
+#ifdef _USE_KNETFILE
+    int64_t block_address = knet_tell(fp->x.fpr);
+	if (load_block_from_cache(fp, block_address)) return 0;
+    int count = knet_read(fp->x.fpr, header, sizeof(header));
+#else
+    int64_t block_address = ftello(fp->file);
+	if (load_block_from_cache(fp, block_address)) return 0;
+    int count = fread(header, 1, sizeof(header), fp->file);
+#endif
+    if (count == 0) {
+        fp->block_length = 0;
+        return 0;
+    }
+	size = count;
+    if (count != sizeof(header)) {
+        report_error(fp, "read failed");
+        return -1;
+    }
+    if (!check_header(header)) {
+        report_error(fp, "invalid block header");
+        return -1;
+    }
+    int block_length = unpackInt16((uint8_t*)&header[16]) + 1;
+    bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
+    memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
+    int remaining = block_length - BLOCK_HEADER_LENGTH;
+#ifdef _USE_KNETFILE
+    count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
+#else
+    count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
+#endif
+    if (count != remaining) {
+        report_error(fp, "read failed");
+        return -1;
+    }
+	size += count;
+    count = inflate_block(fp, block_length);
+    if (count < 0) {
+        return -1;
+    }
+    if (fp->block_length != 0) {
+        // Do not reset offset if this read follows a seek.
+        fp->block_offset = 0;
+    }
+    fp->block_address = block_address;
+    fp->block_length = count;
+	cache_block(fp, size);
+    return 0;
+}
+
+int
+bgzf_read(BGZF* fp, void* data, int length)
+{
+    if (length <= 0) {
+        return 0;
+    }
+    if (fp->open_mode != 'r') {
+        report_error(fp, "file not open for reading");
+        return -1;
+    }
+
+    int bytes_read = 0;
+    bgzf_byte_t* output = data;
+    while (bytes_read < length) {
+        int available = fp->block_length - fp->block_offset;
+        if (available <= 0) {
+            if (read_block(fp) != 0) {
+                return -1;
+            }
+            available = fp->block_length - fp->block_offset;
+            if (available <= 0) {
+                break;
+            }
+        }
+        int copy_length = bgzf_min(length-bytes_read, available);
+        bgzf_byte_t* buffer = fp->uncompressed_block;
+        memcpy(output, buffer + fp->block_offset, copy_length);
+        fp->block_offset += copy_length;
+        output += copy_length;
+        bytes_read += copy_length;
+    }
+    if (fp->block_offset == fp->block_length) {
+#ifdef _USE_KNETFILE
+        fp->block_address = knet_tell(fp->x.fpr);
+#else
+        fp->block_address = ftello(fp->file);
+#endif
+        fp->block_offset = 0;
+        fp->block_length = 0;
+    }
+    return bytes_read;
+}
+
+static
+int
+flush_block(BGZF* fp)
+{
+    while (fp->block_offset > 0) {
+        int block_length = deflate_block(fp, fp->block_offset);
+        if (block_length < 0) {
+            return -1;
+        }
+#ifdef _USE_KNETFILE
+        int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
+#else
+        int count = fwrite(fp->compressed_block, 1, block_length, fp->file);
+#endif
+        if (count != block_length) {
+            report_error(fp, "write failed");
+            return -1;
+        }
+        fp->block_address += block_length;
+    }
+    return 0;
+}
+
+int
+bgzf_write(BGZF* fp, const void* data, int length)
+{
+    if (fp->open_mode != 'w') {
+        report_error(fp, "file not open for writing");
+        return -1;
+    }
+
+    if (fp->uncompressed_block == NULL) {
+        fp->uncompressed_block = malloc(fp->uncompressed_block_size);
+    }
+
+    const bgzf_byte_t* input = data;
+    int block_length = fp->uncompressed_block_size;
+    int bytes_written = 0;
+    while (bytes_written < length) {
+        int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
+        bgzf_byte_t* buffer = fp->uncompressed_block;
+        memcpy(buffer + fp->block_offset, input, copy_length);
+        fp->block_offset += copy_length;
+        input += copy_length;
+        bytes_written += copy_length;
+        if (fp->block_offset == block_length) {
+            if (flush_block(fp) != 0) {
+                break;
+            }
+        }
+    }
+    return bytes_written;
+}
+
+int
+bgzf_close(BGZF* fp)
+{
+    if (fp->open_mode == 'w') {
+        if (flush_block(fp) != 0) {
+            return -1;
+        }
+		{ // add an empty block
+			int count, block_length = deflate_block(fp, 0);
+#ifdef _USE_KNETFILE
+			count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
+#else
+			count = fwrite(fp->compressed_block, 1, block_length, fp->file);
+#endif
+		}
+#ifdef _USE_KNETFILE
+        if (fflush(fp->x.fpw) != 0) {
+#else
+        if (fflush(fp->file) != 0) {
+#endif
+            report_error(fp, "flush failed");
+            return -1;
+        }
+    }
+    if (fp->owned_file) {
+#ifdef _USE_KNETFILE
+		int ret;
+		if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
+		else ret = knet_close(fp->x.fpr);
+        if (ret != 0) return -1;
+#else
+        if (fclose(fp->file) != 0) {
+            return -1;
+        }
+#endif
+    }
+    free(fp->uncompressed_block);
+    free(fp->compressed_block);
+	free_cache(fp);
+    free(fp);
+    return 0;
+}
+
+int64_t
+bgzf_tell(BGZF* fp)
+{
+    return ((fp->block_address << 16) | (fp->block_offset & 0xFFFF));
+}
+
+void bgzf_set_cache_size(BGZF *fp, int cache_size)
+{
+	if (fp) fp->cache_size = cache_size;
+}
+
+int bgzf_check_EOF(BGZF *fp)
+{
+	static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
+	uint8_t buf[28];
+	off_t offset;
+#ifdef _USE_KNETFILE
+	offset = knet_tell(fp->x.fpr);
+	if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
+	knet_read(fp->x.fpr, buf, 28);
+	knet_seek(fp->x.fpr, offset, SEEK_SET);
+#else
+	offset = ftello(fp->file);
+	if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
+	fread(buf, 1, 28, fp->file);
+	fseeko(fp->file, offset, SEEK_SET);
+#endif
+	return (memcmp(magic, buf, 28) == 0)? 1 : 0;
+}
+
+int64_t
+bgzf_seek(BGZF* fp, int64_t pos, int where)
+{
+    if (fp->open_mode != 'r') {
+        report_error(fp, "file not open for read");
+        return -1;
+    }
+    if (where != SEEK_SET) {
+        report_error(fp, "unimplemented seek option");
+        return -1;
+    }
+    int block_offset = pos & 0xFFFF;
+    int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
+#ifdef _USE_KNETFILE
+    if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
+#else
+    if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
+#endif
+        report_error(fp, "seek failed");
+        return -1;
+    }
+    fp->block_length = 0;  // indicates current block is not loaded
+    fp->block_address = block_address;
+    fp->block_offset = block_offset;
+    return 0;
+}

src/samtools/bgzf.h

+/* The MIT License
+
+   Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+#ifndef __BGZF_H
+#define __BGZF_H
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <zlib.h>
+#ifdef _USE_KNETFILE
+#include "knetfile.h"
+#endif
+
+//typedef int8_t bool;
+
+typedef struct {
+    int file_descriptor;
+    char open_mode;  // 'r' or 'w'
+    bool owned_file, is_uncompressed;
+#ifdef _USE_KNETFILE
+	union {
+		knetFile *fpr;
+		FILE *fpw;
+	} x;
+#else
+    FILE* file;
+#endif
+    int uncompressed_block_size;
+    int compressed_block_size;
+    void* uncompressed_block;
+    void* compressed_block;
+    int64_t block_address;
+    int block_length;
+    int block_offset;
+	int cache_size;
+    const char* error;
+	void *cache; // a pointer to a hash table
+} BGZF;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Open an existing file descriptor for reading or writing.
+ * Mode must be either "r" or "w".
+ * A subsequent bgzf_close will not close the file descriptor.
+ * Returns null on error.
+ */
+BGZF* bgzf_fdopen(int fd, const char* __restrict mode);
+
+/*
+ * Open the specified file for reading or writing.
+ * Mode must be either "r" or "w".
+ * Returns null on error.
+ */
+BGZF* bgzf_open(const char* path, const char* __restrict mode);
+
+/*
+ * Close the BGZ file and free all associated resources.
+ * Does not close the underlying file descriptor if created with bgzf_fdopen.
+ * Returns zero on success, -1 on error.
+ */
+int bgzf_close(BGZF* fp);
+
+/*
+ * Read up to length bytes from the file storing into data.
+ * Returns the number of bytes actually read.
+ * Returns zero on end of file.
+ * Returns -1 on error.
+ */
+int bgzf_read(BGZF* fp, void* data, int length);
+
+/*
+ * Write length bytes from data to the file.
+ * Returns the number of bytes written.
+ * Returns -1 on error.
+ */
+int bgzf_write(BGZF* fp, const void* data, int length);
+
+/*
+ * Return a virtual file pointer to the current location in the file.
+ * No interpetation of the value should be made, other than a subsequent
+ * call to bgzf_seek can be used to position the file at the same point.
+ * Return value is non-negative on success.
+ * Returns -1 on error.
+ */
+int64_t bgzf_tell(BGZF* fp);
+
+/*
+ * Set the file to read from the location specified by pos, which must
+ * be a value previously returned by bgzf_tell for this file (but not
+ * necessarily one returned by this file handle).
+ * The where argument must be SEEK_SET.
+ * Seeking on a file opened for write is not supported.
+ * Returns zero on success, -1 on error.
+ */
+int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
+
+/*
+ * Set the cache size. Zero to disable. By default, caching is
+ * disabled. The recommended cache size for frequent random access is
+ * about 8M bytes.
+ */
+void bgzf_set_cache_size(BGZF *fp, int cache_size);
+
+int bgzf_check_EOF(BGZF *fp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

src/samtools/khash.h

+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+/*
+  An example:
+
+#include "khash.h"
+KHASH_MAP_INIT_INT(32, char)
+int main() {
+	int ret, is_missing;
+	khiter_t k;
+	khash_t(32) *h = kh_init(32);
+	k = kh_put(32, h, 5, &ret);
+	if (!ret) kh_del(32, h, k);
+	kh_value(h, k) = 10;
+	k = kh_get(32, h, 10);
+	is_missing = (k == kh_end(h));
+	k = kh_get(32, h, 5);
+	kh_del(32, h, k);
+	for (k = kh_begin(h); k != kh_end(h); ++k)
+		if (kh_exist(h, k)) kh_value(h, k) = 1;
+	kh_destroy(32, h);
+	return 0;
+}
+*/
+
+/*
+  2008-09-19 (0.2.3):
+
+	* Corrected the example
+	* Improved interfaces
+
+  2008-09-11 (0.2.2):
+
+	* Improved speed a little in kh_put()
+
+  2008-09-10 (0.2.1):
+
+	* Added kh_clear()
+	* Fixed a compiling error
+
+  2008-09-02 (0.2.0):
+
+	* Changed to token concatenation which increases flexibility.
+
+  2008-08-31 (0.1.2):
+
+	* Fixed a bug in kh_get(), which has not been tested previously.
+
+  2008-08-31 (0.1.1):
+
+	* Added destructor
+*/
+
+
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
+
+/*!
+  @header
+
+  Generic hash table library.
+
+  @copyright Heng Li
+ */
+
+#define AC_VERSION_KHASH_H "0.2.2"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef uint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define __ac_HASH_PRIME_SIZE 32
+static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
+{
+  0ul,          3ul,          11ul,         23ul,         53ul,
+  97ul,         193ul,        389ul,        769ul,        1543ul,
+  3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
+  98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
+  3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
+  100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
+  3221225473ul, 4294967291ul
+};
+
+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+
+static const double __ac_HASH_UPPER = 0.77;
+
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	typedef struct {													\
+		khint_t n_buckets, size, n_occupied, upper_bound;				\
+		uint32_t *flags;												\
+		khkey_t *keys;													\
+		khval_t *vals;													\
+	} kh_##name##_t;													\
+	static inline kh_##name##_t *kh_init_##name() {						\
+		return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));		\
+	}																	\
+	static inline void kh_destroy_##name(kh_##name##_t *h)				\
+	{																	\
+		if (h) {														\
+			free(h->keys); free(h->flags);								\
+			free(h->vals);												\
+			free(h);													\
+		}																\
+	}																	\
+	static inline void kh_clear_##name(kh_##name##_t *h)				\
+	{																	\
+		if (h && h->flags) {											\
+			memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t)); \
+			h->size = h->n_occupied = 0;								\
+		}																\
+	}																	\
+	static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
+	{																	\
+		if (h->n_buckets) {												\
+			khint_t inc, k, i, last;									\
+			k = __hash_func(key); i = k % h->n_buckets;					\
+			inc = 1 + k % (h->n_buckets - 1); last = i;					\
+			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+				if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
+				else i += inc;											\
+				if (i == last) return h->n_buckets;						\
+			}															\
+			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
+		} else return 0;												\
+	}																	\
+	static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+	{																	\
+		uint32_t *new_flags = 0;										\
+		khint_t j = 1;													\
+		{																\
+			khint_t t = __ac_HASH_PRIME_SIZE - 1;						\
+			while (__ac_prime_list[t] > new_n_buckets) --t;				\
+			new_n_buckets = __ac_prime_list[t+1];						\
+			if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	\
+			else {														\
+				new_flags = (uint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(uint32_t));	\
+				memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
+				if (h->n_buckets < new_n_buckets) {						\
+					h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
+					if (kh_is_map)										\
+						h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+				}														\
+			}															\
+		}																\
+		if (j) {														\
+			for (j = 0; j != h->n_buckets; ++j) {						\
+				if (__ac_iseither(h->flags, j) == 0) {					\
+					khkey_t key = h->keys[j];							\
+					khval_t val;										\
+					if (kh_is_map) val = h->vals[j];					\
+					__ac_set_isdel_true(h->flags, j);					\
+					while (1) {											\
+						khint_t inc, k, i;								\
+						k = __hash_func(key);							\
+						i = k % new_n_buckets;							\
+						inc = 1 + k % (new_n_buckets - 1);				\
+						while (!__ac_isempty(new_flags, i)) {			\
+							if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
+							else i += inc;								\
+						}												\
+						__ac_set_isempty_false(new_flags, i);			\
+						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
+							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+							if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
+							__ac_set_isdel_true(h->flags, i);			\
+						} else {										\
+							h->keys[i] = key;							\
+							if (kh_is_map) h->vals[i] = val;			\
+							break;										\
+						}												\
+					}													\
+				}														\
+			}															\
+			if (h->n_buckets > new_n_buckets) {							\
+				h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
+				if (kh_is_map)											\
+					h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+			}															\
+			free(h->flags);												\
+			h->flags = new_flags;										\
+			h->n_buckets = new_n_buckets;								\
+			h->n_occupied = h->size;									\
+			h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+		}																\
+	}																	\
+	static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
+	{																	\
+		khint_t x;														\
+		if (h->n_occupied >= h->upper_bound) {							\
+			if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
+			else kh_resize_##name(h, h->n_buckets + 1);					\
+		}																\
+		{																\
+			khint_t inc, k, i, site, last;								\
+			x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
+			if (__ac_isempty(h->flags, i)) x = i;						\
+			else {														\
+				inc = 1 + k % (h->n_buckets - 1); last = i;				\
+				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+					if (__ac_isdel(h->flags, i)) site = i;				\
+					if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
+					else i += inc;										\
+					if (i == last) { x = site; break; }					\
+				}														\
+				if (x == h->n_buckets) {								\
+					if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
+					else x = i;											\
+				}														\
+			}															\
+		}																\
+		if (__ac_isempty(h->flags, x)) {								\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size; ++h->n_occupied;									\
+			*ret = 1;													\
+		} else if (__ac_isdel(h->flags, x)) {							\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size;													\
+			*ret = 2;													\
+		} else *ret = 0;												\
+		return x;														\
+	}																	\
+	static inline void kh_del_##name(kh_##name##_t *h, khint_t x)		\
+	{																	\
+		if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {			\
+			__ac_set_isdel_true(h->flags, x);							\
+			--h->size;													\
+		}																\
+	}
+
+/* --- BEGIN OF HASH FUNCTIONS --- */
+
+/*! @function
+  @abstract     Integer hash function
+  @param  key   The integer [uint32_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int_hash_func(key) (uint32_t)(key)
+/*! @function
+  @abstract     Integer comparison function
+ */
+#define kh_int_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     64-bit integer hash function
+  @param  key   The integer [uint64_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int64_hash_func(key) (uint32_t)((key)>>33^(key)^(key)<<11)
+/*! @function
+  @abstract     64-bit integer comparison function
+ */
+#define kh_int64_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     const char* hash function
+  @param  s     Pointer to a null terminated string
+  @return       The hash value
+ */
+static inline khint_t __ac_X31_hash_string(const char *s)
+{
+	khint_t h = *s;
+	if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
+	return h;
+}
+/*! @function
+  @abstract     Another interface to const char* hash function
+  @param  key   Pointer to a null terminated string [const char*]
+  @return       The hash value [khint_t]
+ */
+#define kh_str_hash_func(key) __ac_X31_hash_string(key)
+/*! @function
+  @abstract     Const char* comparison function
+ */
+#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
+
+/* --- END OF HASH FUNCTIONS --- */
+
+/* Other necessary macros... */
+
+/*!
+  @abstract Type of the hash table.
+  @param  name  Name of the hash table [symbol]
+ */
+#define khash_t(name) kh_##name##_t
+
+/*! @function
+  @abstract     Initiate a hash table.
+  @param  name  Name of the hash table [symbol]
+  @return       Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_init(name) kh_init_##name()
+
+/*! @function
+  @abstract     Destroy a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_destroy(name, h) kh_destroy_##name(h)
+
+/*! @function
+  @abstract     Reset a hash table without deallocating memory.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_clear(name, h) kh_clear_##name(h)
+
+/*! @function
+  @abstract     Resize a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  s     New size [khint_t]
+ */
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+
+/*! @function
+  @abstract     Insert a key to the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @param  r     Extra return code: 0 if the key is present in the hash table;
+                1 if the bucket is empty (never used); 2 if the element in
+				the bucket has been deleted [int*]
+  @return       Iterator to the inserted element [khint_t]
+ */
+#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
+
+/*! @function
+  @abstract     Retrieve a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @return       Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
+ */
+#define kh_get(name, h, k) kh_get_##name(h, k)
+
+/*! @function
+  @abstract     Remove a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Iterator to the element to be deleted [khint_t]
+ */
+#define kh_del(name, h, k) kh_del_##name(h, k)
+
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Alias of kh_val()
+ */
+#define kh_value(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Get the start iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->size)
+
+/*! @function
+  @abstract     Get the number of buckets in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of buckets in the hash table [khint_t]
+ */
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+/* More conenient interfaces */
+
+/*! @function
+  @abstract     Instantiate a hash set containing integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT(name)										\
+	KHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT(name, khval_t)								\
+	KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT64(name)										\
+	KHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT64(name, khval_t)								\
+	KHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+
+typedef const char *kh_cstr_t;
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_STR(name)										\
+	KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_STR(name, khval_t)								\
+	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
+#endif /* __AC_KHASH_H */

test_data/bgzf_tests/test.txt

+begin 644 -
+M7#MX8@E/CF1L8D+5C*2$BF]//54$09RCLJZ!&JQ)#I+^_(W,9?1?NTO#^7_5
+M=B4]A-]ARI+$]?`#D`"!W>Q+8`)3.&NIL9L%"L3?QQ8=S:'7\)WU0FGB?FUG
+M&HZ4;<4I"QU=CH^U8O\_F<OT+,=U1/64`"/>2HU@FOBLYO2KV\1)TQJYS$*8
+M!*O^1\K$YK[XNZF"Q-8,MMW-GPOYD'JER=B6!WYA,WDV`3KR;/<%#K0OQ,L*
+MRZ(LM#K2EUG94WH3JL&\^M5^]67$?@FO_FS?VBY+FYJ)I"WX[G)L=X4JF0'Q
+MGDU)V]'E0ZALRY->4@7N)U:8N;1".+;WT#'=QE!A>AUOCJ]F/R8_<A,:MTD2
+MFVE2F^6<.(I&V!]$=^I*!S^Z&;1!80]`"!PG4JX@@&D8EKP>?L8E(_->_"9P
+M6;T>A/<7A!"0@N6'V,AA+GHCC\Z7J3/(<=D*FGCY%HJH+UNC+M6HPVEOYQ'W
+M4.IUE2K=X0N.I\AX^O27ME>,7.*>NG-<BM+I;"V..AQQ@W_-[AZ@4?V1.K)8
+M-+C7-]7`LCP8/MY`2.D;95A=#=FE[74GJL">T#7Q#S_++=4[EI9U3TW$S,F6
+M7K)_\I87UF*,J9D'X%*UD!IM$)3.W"/#R.`#83^Z17H5QX&/C",?%R!ACEI[
+MGY17MK$GDIA-J9$?TJJXZ'1,)*Y^3KIO(Z4G4]OB8];N^0L<8_/\C&WPT=1W
+M7OW`L6QX5A75IO&],.2XNF?+[#2\*QYX^IU<0_)@UUXA#IICLH[%MWZ0+P\I
+M^$`D]&A0A:<T!ERG/IA4XT=@.PES,C?XB!RZ\=`W"U,U()&MX_Y.CQ:!F>83
+MV!W(_NBD&.P;W9#;23CB4HT>HBF34._SJD>BD[-..6VR6\$IZ`6<?B*:O)&^
+MK!36F6L@GX&V]>[VN)0S/%KCA'ULX(SF^GPMEQ$:X0-OF`^BF\=GI45X\P3)
+ML8N62\>P6JQG,I!BIX5<6^*#K/;RF"\1=Y:`T-W`3^U<>F"P=KK<$S.3`&O1
+MWEFNXI+`BWG,"+K,J3D-)M4MED^K4!D6@O?_70,69A8Z_27\C8&Y`;\2-GIC
+MZ?WM7&9N#"2`XA-^S,]W%6*#![A=_(DM(=&W^^5K8M$YB"7!Q623%6#J!WK:

test_data/bgzf_tests/test.txt.gz

Binary file added.