Commits

Takayuki KONDO committed 0a63072

add exceltable extension with xlrd module

  • Participants
  • Parent commits b8e4cb3

Comments (0)

Files changed (25)

 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('exts'))
+sys.path.insert(0, os.path.abspath('exts/xlrd'))
 
 # -- General configuration -----------------------------------------------------
 
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = []
 extensions += ['sphinxcontrib_roles']
+extensions += ['sphinxcontrib.exceltable']
 
 # configuration case.1: define roles as list (define only roles)
 roles = ['strike', 'red']

document.xls

Binary file added.
+Excel 埋め込み
+==============
+
+サンプル
+--------
+.. exceltable:: テーブル見出し
+   :file: document.xls
+   :header: 1
+   :selection: A1:C5
+   :sheet: Sheet1
+
+
+
+
+参考
+----
+
+- http://packages.python.org/sphinxcontrib-exceltable/
+- http://pypi.python.org/pypi/sphinxcontrib-exceltable/0.1

exts/sphinxcontrib/__init__.py

+# -*- coding: utf-8 -*-
+"""
+    sphinxcontrib
+    ~~~~~~~~~~~~~
+
+    This package is a namespace package that contains all extensions
+    distributed in the ``sphinx-contrib`` distribution.
+
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+__import__('pkg_resources').declare_namespace(__name__)
+

exts/sphinxcontrib/exceltable.py

+# -*- coding: utf-8 -*-
+"""
+:class:`ExcelTableDirective` implements the ``exceltable`` -directive.
+"""
+__docformat__ = 'restructuredtext'
+__author__ = 'Juha Mustonen'
+__version__ = '1.0'
+
+import os
+import sys
+import doctest
+import re
+import types
+import logging
+from datetime import datetime
+
+# Import required docutils modules
+from docutils.parsers.rst import Directive, directives
+from docutils.parsers.rst.directives.tables import ListTable
+from docutils import io, nodes, statemachine, utils, frontend
+from docutils.utils import SystemMessagePropagation, Reporter
+
+import sphinx
+
+# Uses excelent module xlrd for reading Excel sheets
+# Retrieve it from http://www.python-excel.org/
+import xlrd
+
+class Messenger(Reporter):
+  def __init__(self, src='sphinxcontrib.xyz'):
+    settings = frontend.OptionParser().get_default_values()
+
+    settings.report_level = 1
+
+    Reporter.__init__(self,
+      src,
+      settings.report_level,
+      settings.halt_level,
+      stream=settings.warning_stream,
+      debug=settings.debug,
+      encoding=settings.error_encoding,
+      error_handler=settings.error_encoding_error_handler
+    )
+
+    self.log = logging.getLogger(src)
+
+  def debug(self, *msgs):
+    #return super(Messenger, self).debug(msg)
+    pass
+
+  def info(self, *msgs):
+    #return super(Messenger, self).info(msg)
+    pass
+
+  def warning(self, *msgs):
+    #super(Messenger, self).warning(msg)
+    return literal_block(text=self._prepare(msgs))
+
+  def error(self, *msgs):
+    #super(Messenger, self).error(msg)
+    text = self._prepare(msgs)
+    #self.log.error(text)
+    return literal_block(text=text)
+
+  def _prepare(self, *msgs):
+    return ' '.join([unicode(msg) for msg in msgs])
+
+
+class DirectiveTemplate(Directive):
+  """
+  Template intended for directive development, providing
+  few handy functions
+  """
+
+  def _get_directive_path(self, path):
+    """
+    Returns transformed path from the directive
+    option/content
+    """
+    source = self.state_machine.input_lines.source(
+      self.lineno - self.state_machine.input_offset - 1)
+    source_dir = os.path.dirname(os.path.abspath(source))
+    path = os.path.normpath(os.path.join(source_dir, path))
+
+    return utils.relative_path(None, path)
+
+
+
+class ExcelTableDirective(ListTable, DirectiveTemplate):
+  """
+  ExcelTableDirective implements the directive.
+  Directive allows to create RST tables from the contents
+  of the Excel sheet. The functionality is very similar to
+  csv-table (docutils) and xmltable (:mod:`sphinxcontrib.xmltable`).
+
+  Example of the directive:
+
+  .. code-block:: rest
+
+    .. exceltable::
+       :file: path/to/document.xls
+       :header: 1
+
+  """
+  #required_arguments = 0
+  #optional_arguments = 0
+  has_content = False
+  option_spec = {
+    'file': directives.path,
+    'selection': directives.unchanged_required,
+    'encoding': directives.unchanged,
+    'header': directives.unchanged,
+    'sheet': directives.unchanged,
+    'class': directives.class_option,
+    'widths': directives.unchanged,
+  }
+
+  def run(self):
+    """
+    Implements the directive
+    """
+    # Get content and options
+    file_path = self.options.get('file', None)
+    selection = self.options.get('selection', 'A1:')
+    sheet = self.options.get('sheet', '0')
+    header = self.options.get('header', '0')
+    col_widths = self.options.get('widths', None)
+
+    # Divide the selection into from and to values
+    if u':' not in selection:
+      selection += ':'
+    fromcell, tocell = selection.split(':')
+
+    if not fromcell:
+      fromcell = 'A1'
+
+    if not tocell:
+      tocell = None
+
+    #print selection, fromcell, tocell
+
+    if not file_path:
+      return [self._report('file_path -option missing')]
+
+    # Header option
+    header_rows = 0
+    if header and header.isdigit():
+      header_rows = int(header)
+
+    # Transform the path suitable for processing
+    file_path = self._get_directive_path(file_path)
+
+    print 'file path: %s' % file_path
+
+    try:
+      et = ExcelTable(open(file_path))
+      table = et.create_table(fromcell=fromcell, tocell=tocell,
+        nheader=header_rows, sheet=sheet)
+    except Exception, e:
+      return [msgr.error('Error occured while creating table: %s' % e)]
+      pass
+
+    #print table
+
+    title, messages = self.make_title()
+    #node = nodes.Element() # anonymous container for parsing
+    #self.state.nested_parse(self.content, self.content_offset, node)
+
+    # If empty table is created
+    if not table:
+      self._report('The table generated from queries is empty')
+      return [nodes.paragraph(text='')]
+
+    try:
+      table_data = []
+
+      # If there is header defined, set the header-rows param and
+      # append the data in row =>. build_table_from_list handles the header generation
+      if header and not header.isdigit():
+
+        # Otherwise expect the header to be string with column names defined in
+        # it, separating the values with comma
+        header_rows = 1
+        table_data.append([nodes.paragraph(text=hcell.strip()) for hcell in header.split(',')])
+
+      # Put the given data in rst elements: paragraph
+      for row in table['headers']:
+        table_data.append([nodes.paragraph(text=cell['value']) for cell in row])
+
+      # Iterates rows: put the given data in rst elements
+      for row in table['rows']:
+        row_data = []
+        for cell in row:
+          class_data = ['']
+          # Node based on formatting rules
+          # NOTE: rst does not support nested, use class attribute instead
+
+          if cell['italic']:
+            class_data.append('italic')
+
+          if cell['bold']:
+            node = nodes.strong(text=cell['value'])
+          else:
+            node = nodes.paragraph(text=cell['value'])
+
+          # Add additional formatting as class attributes
+          node['classes'] = class_data
+          row_data.append([node])
+
+          # FIXME: style attribute does not get into writer
+          if cell['bgcolor']:
+            rgb = [str(val) for val in cell['bgcolor']]
+            node.attributes['style'] = 'background-color: rgb(%s);' % ','.join(rgb)
+
+          #print node
+
+        table_data.append(row_data)
+
+      # If there is no data at this point, throw an error
+      if not table_data:
+        return [msgr.error('Selection did not return any data')]
+
+      # Get params from data
+      num_cols = len(table_data[0])
+
+      # Get the widths for the columns:
+      # 1. Use provided info, if available
+      # 2. Use widths from the excelsheet
+      # 3. Use default widths (equal to all)
+      #
+      # Get content widths from the first row of the table
+      # if it fails, calculate default column widths
+      if col_widths:
+        col_widths = [int(width) for width in col_widths.split(',')]
+      else:
+        col_widths = [int(col['width']) for col in table['rows'][0]]
+        col_width_total = sum(col_widths)
+        col_widths = [int(width * 100/ col_width_total) for width in col_widths]
+
+      # If still empty for some reason, use default widths
+      if not col_widths:
+        col_widths = self.get_column_widths(num_cols)
+
+      stub_columns = 0
+
+      # Sanity checks
+
+      # Different amount of cells in first and second row (possibly header and 1 row)
+      if type(header) is not int:
+        if len(table_data) > 1 and len(table_data[0]) != len(table_data[1]):
+          error = msgr.error('Data amount mismatch: check the directive data and params')
+          return [error]
+
+      self.check_table_dimensions(table_data, header_rows, stub_columns)
+
+    except SystemMessagePropagation, detail:
+        return [detail.args[0]]
+
+    # Generate the table node from the given list of elements
+    table_node = self.build_table_from_list(
+      table_data, col_widths, header_rows, stub_columns)
+
+    # Optional class parameter
+    table_node['classes'] += self.options.get('class', [])
+
+    if title:
+      table_node.insert(0, title)
+
+    #print table_node
+
+    return [table_node] + messages
+
+# TODO: Move away
+msgr = Messenger('sphinxcontrib.exceltable')
+
+class ExcelTable(object):
+  """
+  Class generates the list based table from
+  the given excel-document, suitable for the directive.
+
+  Class also implements the custom query format,
+  is to use for the directive.::
+
+    >>> import os
+    >>> from sphinxcontrib import exceltable
+    >>>
+    >>> fo = open(os.path.join(os.path.dirname(exceltable.__file__),'../doc/example/cartoons.xls'), 'r+b')
+    >>> et = exceltable.ExcelTable(fo)
+    >>>
+    >>> table = et.create_table(fromcell='A1', tocell='C4')
+    >>> assert et.fromcell == (0, 0)
+    >>> assert et.tocell == (2,3)
+    >>>
+    >>> table = et.create_table(fromcell='B10', tocell='B11', sheet='big')
+    >>> assert et.fromcell == (1,9)
+    >>> assert et.tocell == (1,10)
+
+  """
+  def __init__(self, fobj, encoding='utf-8'):
+    """
+    """
+    assert type(fobj) is file, u'File object type expected, %s given' % type(fobj)
+
+    self.file_object = fobj
+    self.fromcell = (0, 0)
+    self.tocell = (0, 0)
+
+    # xlrd uses paths only
+    # TODO: Add support for remote files
+    self.book = xlrd.open_workbook(self.file_object.name,
+      encoding_override=encoding,
+      formatting_info=True)
+
+
+  def create_table(self, fromcell=None, tocell=None, nheader=0, sheet=0):
+    """
+    Creates a table (as a list) based on given query and columns
+
+    fromcell:
+      The index of the cell where to begin. The default
+      is from the beginning of the data set (0, 0).
+
+    tocell:
+      The index of the cell where to end the selection.
+      Default is in the end of the data set.
+
+    nheader:
+      Number of lines which are considered as a header lines.
+      Normally, the value is 0 (default) or 1.
+
+    sheet:
+      Name or index of the sheet as string/unicode. The index starts from the 0
+      and is the default value. If numeric value is given, provide it in format::
+
+        et.create_table(fromcell='A1', tocell='B2', sheet='2')
+
+    """
+    rows = []
+
+    # Select sheet by given index or name
+    if type(sheet) is int or sheet.isdigit():
+      sh1 = self.book.sheet_by_index(int(sheet))
+    else:
+      sh1 = self.book.sheet_by_name(sheet)
+
+    # Name selection, like: 'A1' or 'AB12'
+    if type(fromcell) in [str, unicode]:
+      match = re.match(r'(?P<chars>[A-Z]+)(?P<nums>[1-9]+[0-9]*)', fromcell)
+      if match:
+        parts = (match.group('chars'), int(match.group('nums')))
+        fromcell = toindex(*parts)
+      else:
+        fromcell = tuple([int(num) for num in fromcell.split(u',')])
+
+    # Name selection, like: 'A1' or 'AB12'
+    if type(tocell) in [str, unicode]:
+      match = re.match(r'(?P<chars>[A-Z]+)(?P<nums>[1-9]+[0-9]*)', tocell)
+      if match:
+        parts = (match.group('chars'), int(match.group('nums')))
+        tocell = toindex(*parts)
+      else:
+        tocell = tuple([int(num) for num in tocell.split(u',')])
+
+    if not fromcell:
+      fromcell = (0, 0)
+
+    # If ending cell is not given, calculate
+    # it from rows and cols
+    #print sh1.ncols, sh1.nrows
+    #print (tocell[0] > (sh1.ncols -1)) or (tocell[1] > (sh1.nrows -1))
+    maxrow_index = sh1.nrows -1
+    maxcol_index = sh1.ncols -1
+    if not tocell:
+      tocell = (maxcol_index, maxrow_index)
+
+    # If the value is bigger than the value, default to max value
+    if tocell[0] > maxcol_index:
+      tocell = (maxcol_index, tocell[1])
+
+    # If the value is bigger than the value, default to max value
+    if tocell[1] > maxrow_index:
+      tocell = (tocell[0], maxrow_index)
+
+    # Iterate columns
+    rows = {'headers': [], 'rows': []}
+    widths = []
+    for rnum in range(fromcell[1], tocell[1]+1):
+
+      # Iterate rows within column
+      cols = []
+      for cnum in range(fromcell[0], tocell[0]+1):
+        cell = sh1.cell(rnum, cnum)
+        width = sh1.computed_column_width(cnum)
+
+        # Put data
+        cell_data = {'type': 'row', 'width': width, 'value': self._get_value(cell)}
+
+        # If header row
+        if rnum < nheader:
+          cell_data['type'] = 'header'
+
+        # Get more format info for the cell
+        cell_data.update(self._get_formatting(cell))
+
+        cols.append(cell_data)
+
+      # If first column is header, their all headers - i think
+      if cols[0]['type'] == 'header':
+        rows['headers'].append(cols)
+      else:
+        rows['rows'].append(cols)
+
+    #widths_together = sum([cell['width'] for cols in rows])
+    #print widths_together
+    #widths = [round(val * 100.0 / widths_together) for val in widths]
+
+    # Store into object for validation purposes
+    self.fromcell = fromcell
+    self.tocell = tocell
+
+    return rows
+
+
+  def _get_value(self, cell):
+    """
+    Returns the value of the xlrd Cell, based
+    on type.
+    """
+    value = None
+
+    # String
+    if cell.ctype == xlrd.XL_CELL_TEXT:
+      return unicode(cell.value)
+
+   # Number: integer or float
+    if cell.ctype == xlrd.XL_CELL_NUMBER:
+      # There is no separation between integers
+      # and other numbers. Show it as integer if
+      # it seems like a one.
+      # NOTE: float.is_integer is available only in python 2.6 and above
+      if int(cell.value) == cell.value:
+        return u'%s' % int(cell.value)
+      return u'%s' % cell.value
+
+    # Date type
+    if cell.ctype == xlrd.XL_CELL_DATE:
+      value = xlrd.xldate_as_tuple(cell.value, 0)
+
+      date = datetime(
+        year=value[0],
+        month=value[1],
+        day=value[2],
+        hour=value[3],
+        minute=value[4],
+        second=value[5],
+      )
+
+      # Show more accurate value only if it exists
+      if not value[1]:
+        return u'%s' % value[0]
+      elif value[3] and value[4] and value[5]:
+        return unicode(date)
+      else:
+        # TODO: provide a way to define this
+        return unicode(date.strftime('%Y-%m-%d'))
+
+    # Boolean
+    if cell.ctype == xlrd.XL_CELL_BOOLEAN:
+      if cell.value:
+        return _(u'True')
+      return _(u'False')
+
+    # Error
+    if cell.ctype == xlrd.XL_CELL_ERROR:
+      return _(u'Error')
+
+    return u''
+
+
+  def _get_formatting(self, cell):
+    """
+    Returns some format related information
+    about the given cell. The information is
+    required/handy when creating the table
+
+    cell:
+      Cell object where to get formatting for
+
+    Returns:
+      dictionary containing the formatting information
+    """
+    format = {'bold':False, 'italic':False, 'bgcolor':None}
+
+    xf = self.book.xf_list[cell.xf_index]
+    font = self.book.font_list[xf.font_index]
+
+    # Weight: 400 (normal), 700 (bold)
+    if font.weight > 400:
+      format['bold'] = True
+
+    # Collect italic info
+    if font.italic:
+      format['italic'] = True
+
+    # Get bg color
+    bgcolor = self.book.colour_map[xf.background.background_colour_index]
+    if bgcolor:
+      format['bgcolor'] = bgcolor
+
+    return format
+
+def toindex(col, row):
+  """
+  Calculates the index number from
+  the Excel column name. Examples:
+
+    >>> from sphinxcontrib import exceltable
+    >>> exceltable.toindex('A', 1)
+    (0, 0)
+    >>> exceltable.toindex('B', 10)
+    (1, 9)
+    >>> exceltable.toindex('Z', 2)
+    (25, 1)
+    >>> exceltable.toindex('AA', 27)
+    (26, 26)
+    >>> exceltable.toindex('AB', 1)
+    (27, 0)
+
+  .. NOTE::
+
+     Following the naming in Excel/OOCalc,
+     the row 'index' starts from the 1 and not from 0
+
+  """
+  a2z = 'ABCDEFGHIJLKMNOPQRSTUVWXYZ'
+
+  total = 0
+  mult = 0
+  for char in col:
+    total += (a2z.find(char) + (26 * mult))
+    mult += 1
+
+  return total, row-1
+
+
+def toname(colx, rowy):
+  """
+  Opposite to `toindex`
+  """
+  colname = xlrd.colname(colx)
+  return colname, rowy+1
+
+
+def setup(app):
+  """
+  Extension setup, called by Sphinx
+  """
+
+  # Sphinx 0.5 support
+  if '5' in sphinx.__version__.split('.'):
+    app.add_directive('exceltable', ExcelTableDirective, 0, (0, 0, 0))
+  else:
+    app.add_directive('exceltable', ExcelTableDirective)
+
+if __name__ == '__main__':
+  _test()

exts/xlrd/PKG-INFO

+Metadata-Version: 1.0
+Name: xlrd
+Version: 0.8.0
+Summary: Library for developers to extract data from Microsoft Excel (tm) spreadsheet files
+Home-page: http://www.lexicon.net/sjmachin/xlrd.htm
+Author: John Machin
+Author-email: sjmachin@lexicon.net
+License: BSD
+Download-URL: http://www.lexicon.net/sjmachin/xlrd.htm
+Description: Extract data from Excel spreadsheets (XLS only, versions 2.0 to 2003) on any platform. Pure Python (2.6 to 2.7). Strong support for Excel dates. Unicode-aware.
+Keywords: xls,excel,spreadsheet,workbook
+Platform: Any platform -- don't need Windows
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Programming Language :: Python
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Database
+Classifier: Topic :: Office/Business
+Classifier: Topic :: Software Development :: Libraries :: Python Modules

exts/xlrd/scripts/runxlrd.py

+# -*- coding: ascii -*-
+# <p>Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This script is part of the xlrd package, which is released under a
+# BSD-style licence.</p>
+
+cmd_doc = """
+Commands:
+
+2rows           Print the contents of first and last row in each sheet
+3rows           Print the contents of first, second and last row in each sheet
+bench           Same as "show", but doesn't print -- for profiling
+biff_count[1]   Print a count of each type of BIFF record in the file
+biff_dump[1]    Print a dump (char and hex) of the BIFF records in the file
+fonts           hdr + print a dump of all font objects
+hdr             Mini-overview of file (no per-sheet information)
+hotshot         Do a hotshot profile run e.g. ... -f1 hotshot bench bigfile*.xls
+labels          Dump of sheet.col_label_ranges and ...row... for each sheet
+name_dump       Dump of each object in book.name_obj_list
+names           Print brief information for each NAME record
+ov              Overview of file
+profile         Like "hotshot", but uses cProfile
+show            Print the contents of all rows in each sheet
+version[0]      Print versions of xlrd and Python and exit
+xfc             Print "XF counts" and cell-type counts -- see code for details
+
+[0] means no file arg
+[1] means only one file arg i.e. no glob.glob pattern
+"""
+
+options = None
+if __name__ == "__main__":
+
+    PSYCO = 0
+
+    import xlrd
+    import sys, time, glob, traceback, pprint, gc
+    
+    try:
+        object
+    except NameError:
+        # Python 2.1
+        class object:
+            pass
+
+    class LogHandler(object):
+
+        def __init__(self, logfileobj):
+            self.logfileobj = logfileobj
+            self.fileheading = None
+            self.shown = 0
+            
+        def setfileheading(self, fileheading):
+            self.fileheading = fileheading
+            self.shown = 0
+            
+        def write(self, text):
+            if self.fileheading and not self.shown:
+                self.logfileobj.write(self.fileheading)
+                self.shown = 1
+            self.logfileobj.write(text)
+        
+    null_cell = xlrd.empty_cell
+
+    def show_row(bk, sh, rowx, colrange, printit):
+        if bk.ragged_rows:
+            colrange = range(sh.row_len(rowx))
+        if not colrange: return
+        if printit: print
+        if bk.formatting_info:
+            for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange):
+                if printit:
+                    print "cell %s%d: type=%d, data: %r, xfx: %s" \
+                        % (xlrd.colname(colx), rowx+1, ty, val, cxfx)
+        else:
+            for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange):
+                if printit:
+                    print "cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val)
+
+    def get_row_data(bk, sh, rowx, colrange):
+        result = []
+        dmode = bk.datemode
+        ctys = sh.row_types(rowx)
+        cvals = sh.row_values(rowx)
+        for colx in colrange:
+            cty = ctys[colx]
+            cval = cvals[colx]
+            if bk.formatting_info:
+                cxfx = str(sh.cell_xf_index(rowx, colx))
+            else:
+                cxfx = ''
+            if cty == xlrd.XL_CELL_DATE:
+                try:
+                    showval = xlrd.xldate_as_tuple(cval, dmode)
+                except xlrd.XLDateError:
+                    e1, e2 = sys.exc_info()[:2]
+                    showval = "%s:%s" % (e1.__name__, e2)
+                    cty = xlrd.XL_CELL_ERROR
+            elif cty == xlrd.XL_CELL_ERROR:
+                showval = xlrd.error_text_from_code.get(cval, '<Unknown error code 0x%02x>' % cval)
+            else:
+                showval = cval
+            result.append((colx, cty, showval, cxfx))
+        return result
+
+    def bk_header(bk):
+        print
+        print "BIFF version: %s; datemode: %s" \
+            % (xlrd.biff_text_from_num[bk.biff_version], bk.datemode)
+        print "codepage: %r (encoding: %s); countries: %r" \
+            % (bk.codepage, bk.encoding, bk.countries)
+        print "Last saved by: %r" % bk.user_name
+        print "Number of data sheets: %d" % bk.nsheets
+        print "Pickleable: %d; Use mmap: %d; Formatting: %d; On demand: %d" \
+            % (bk.pickleable, bk.use_mmap, bk.formatting_info, bk.on_demand)
+        print "Ragged rows: %d" % bk.ragged_rows
+        if bk.formatting_info:
+            print "FORMATs: %d, FONTs: %d, XFs: %d" \
+                % (len(bk.format_list), len(bk.font_list), len(bk.xf_list))
+        if not options.suppress_timing:        
+            print "Load time: %.2f seconds (stage 1) %.2f seconds (stage 2)" \
+                % (bk.load_time_stage_1, bk.load_time_stage_2)
+        print
+
+    def show_fonts(bk):
+        print "Fonts:"
+        for x in xrange(len(bk.font_list)):
+            font = bk.font_list[x]
+            font.dump(header='== Index %d ==' % x, indent=4)
+
+    def show_names(bk, dump=0):
+        bk_header(bk)
+        if bk.biff_version < 50:
+            print "Names not extracted in this BIFF version"
+            return
+        nlist = bk.name_obj_list
+        print "Name list: %d entries" % len(nlist)
+        for nobj in nlist:
+            if dump:
+                nobj.dump(sys.stdout,
+                    header="\n=== Dump of name_obj_list[%d] ===" % nobj.name_index)
+            else:
+                print "[%d]\tName:%r macro:%r scope:%d\n\tresult:%r\n" \
+                    % (nobj.name_index, nobj.name, nobj.macro, nobj.scope, nobj.result)
+
+    def print_labels(sh, labs, title):
+        if not labs:return
+        for rlo, rhi, clo, chi in labs:
+            print "%s label range %s:%s contains:" \
+                % (title, xlrd.cellname(rlo, clo), xlrd.cellname(rhi-1, chi-1))
+            for rx in xrange(rlo, rhi):
+                for cx in xrange(clo, chi):
+                    print "    %s: %r" % (xlrd.cellname(rx, cx), sh.cell_value(rx, cx))
+
+    def show_labels(bk):
+        # bk_header(bk)
+        hdr = 0
+        for shx in range(bk.nsheets):
+            sh = bk.sheet_by_index(shx)
+            clabs = sh.col_label_ranges
+            rlabs = sh.row_label_ranges
+            if clabs or rlabs:
+                if not hdr:
+                    bk_header(bk)
+                    hdr = 1
+                print "sheet %d: name = %r; nrows = %d; ncols = %d" % \
+                    (shx, sh.name, sh.nrows, sh.ncols)
+                print_labels(sh, clabs, 'Col')
+                print_labels(sh, rlabs, 'Row')
+            if bk.on_demand: bk.unload_sheet(shx)
+
+    def show(bk, nshow=65535, printit=1):
+        bk_header(bk)
+        if 0:
+            rclist = xlrd.sheet.rc_stats.items()
+            rclist.sort()
+            print "rc stats"
+            for k, v in rclist:
+                print "0x%04x %7d" % (k, v)
+        if options.onesheet:
+            try:
+                shx = int(options.onesheet)
+            except ValueError:
+                shx = bk.sheet_by_name(options.onesheet).number
+            shxrange = [shx]
+        else:
+            shxrange = range(bk.nsheets)
+        # print "shxrange", shxrange
+        for shx in shxrange:
+            sh = bk.sheet_by_index(shx)
+            nrows, ncols = sh.nrows, sh.ncols
+            colrange = range(ncols)
+            anshow = min(nshow, nrows)
+            print "sheet %d: name = %r; nrows = %d; ncols = %d" % \
+                (shx, sh.name, sh.nrows, sh.ncols)
+            if nrows and ncols:
+                # Beat the bounds
+                for rowx in xrange(nrows):
+                    nc = sh.row_len(rowx)
+                    if nc:
+                        _junk = sh.row_types(rowx)[nc-1]
+                        _junk = sh.row_values(rowx)[nc-1]
+                        _junk = sh.cell(rowx, nc-1)
+            for rowx in xrange(anshow-1):
+                if not printit and rowx % 10000 == 1 and rowx > 1:
+                    print "done %d rows" % (rowx-1,)
+                show_row(bk, sh, rowx, colrange, printit)
+            if anshow and nrows:
+                show_row(bk, sh, nrows-1, colrange, printit)
+            print
+            if bk.on_demand: bk.unload_sheet(shx)
+
+    def count_xfs(bk):
+        bk_header(bk)
+        for shx in range(bk.nsheets):
+            sh = bk.sheet_by_index(shx)
+            nrows, ncols = sh.nrows, sh.ncols
+            print "sheet %d: name = %r; nrows = %d; ncols = %d" % \
+                (shx, sh.name, sh.nrows, sh.ncols)
+            # Access all xfindexes to force gathering stats
+            type_stats = [0, 0, 0, 0, 0, 0, 0]
+            for rowx in xrange(nrows):
+                for colx in xrange(sh.row_len(rowx)):
+                    xfx = sh.cell_xf_index(rowx, colx)
+                    assert xfx >= 0
+                    cty = sh.cell_type(rowx, colx)
+                    type_stats[cty] += 1
+            print "XF stats", sh._xf_index_stats
+            print "type stats", type_stats
+            print
+            if bk.on_demand: bk.unload_sheet(shx)
+
+    def main(cmd_args):
+        import optparse
+        global options, PSYCO
+        usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc
+        oparser = optparse.OptionParser(usage)
+        oparser.add_option(
+            "-l", "--logfilename",
+            default="",
+            help="contains error messages")
+        oparser.add_option(
+            "-v", "--verbosity",
+            type="int", default=0,
+            help="level of information and diagnostics provided")
+        oparser.add_option(
+            "-p", "--pickleable",
+            type="int", default=1,
+            help="1: ensure Book object is pickleable (default); 0: don't bother")
+        oparser.add_option(
+            "-m", "--mmap",
+            type="int", default=-1,
+            help="1: use mmap; 0: don't use mmap; -1: accept heuristic")
+        oparser.add_option(
+            "-e", "--encoding",
+            default="",
+            help="encoding override")
+        oparser.add_option(
+            "-f", "--formatting",
+            type="int", default=0,
+            help="0 (default): no fmt info\n"
+                 "1: fmt info (all cells)\n"
+            )
+        oparser.add_option(
+            "-g", "--gc",
+            type="int", default=0,
+            help="0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc")
+        oparser.add_option(
+            "-s", "--onesheet",
+            default="",
+            help="restrict output to this sheet (name or index)")
+        oparser.add_option(
+            "-u", "--unnumbered",
+            action="store_true", default=0,
+            help="omit line numbers or offsets in biff_dump")
+        oparser.add_option(
+            "-d", "--on-demand",
+            action="store_true", default=0,
+            help="load sheets on demand instead of all at once")
+        oparser.add_option(
+            "-t", "--suppress-timing",
+            action="store_true", default=0,
+            help="don't print timings (diffs are less messy)")
+        oparser.add_option(
+            "-r", "--ragged-rows",
+            action="store_true", default=0,
+            help="open_workbook(..., ragged_rows=True)")
+        options, args = oparser.parse_args(cmd_args)
+        if len(args) == 1 and args[0] in ("version", ):
+            pass
+        elif len(args) < 2:
+            oparser.error("Expected at least 2 args, found %d" % len(args))
+        cmd = args[0]
+        xlrd_version = getattr(xlrd, "__VERSION__", "unknown; before 0.5")
+        if cmd == 'biff_dump':
+            xlrd.dump(args[1], unnumbered=options.unnumbered)
+            sys.exit(0)
+        if cmd == 'biff_count':
+            xlrd.count_records(args[1])
+            sys.exit(0)
+        if cmd == 'version':
+            print "xlrd: %s, from %s" % (xlrd_version, xlrd.__file__)
+            print "Python:", sys.version
+            sys.exit(0)
+        if options.logfilename:
+            logfile = LogHandler(open(options.logfilename, 'w'))
+        else:
+            logfile = sys.stdout
+        mmap_opt = options.mmap
+        mmap_arg = xlrd.USE_MMAP
+        if mmap_opt in (1, 0):
+            mmap_arg = mmap_opt
+        elif mmap_opt != -1:
+            print 'Unexpected value (%r) for mmap option -- assuming default' % mmap_opt
+        fmt_opt = options.formatting | (cmd in ('xfc', ))
+        gc_mode = options.gc
+        if gc_mode:
+            gc.disable()
+        for pattern in args[1:]:
+            for fname in glob.glob(pattern):
+                print "\n=== File: %s ===" % fname
+                if logfile != sys.stdout:
+                    logfile.setfileheading("\n=== File: %s ===\n" % fname)
+                if gc_mode == 1:
+                    n_unreachable = gc.collect()
+                    if n_unreachable:
+                        print "GC before open:", n_unreachable, "unreachable objects"
+                if PSYCO:
+                    import psyco
+                    psyco.full()
+                    PSYCO = 0
+                try:
+                    t0 = time.time()
+                    bk = xlrd.open_workbook(fname,
+                        verbosity=options.verbosity, logfile=logfile,
+                        pickleable=options.pickleable, use_mmap=mmap_arg,
+                        encoding_override=options.encoding,
+                        formatting_info=fmt_opt,
+                        on_demand=options.on_demand,
+                        ragged_rows=options.ragged_rows,
+                        )
+                    t1 = time.time()
+                    if not options.suppress_timing:
+                        print "Open took %.2f seconds" % (t1-t0,)
+                except xlrd.XLRDError:
+                    e0, e1 = sys.exc_info()[:2]
+                    print "*** Open failed: %s: %s" % (e0.__name__, e1)
+                    continue
+                except KeyboardInterrupt:
+                    print "*** KeyboardInterrupt ***"
+                    traceback.print_exc(file=sys.stdout)
+                    sys.exit(1)
+                except:
+                    e0, e1 = sys.exc_info()[:2]
+                    print "*** Open failed: %s: %s" % (e0.__name__, e1)
+                    traceback.print_exc(file=sys.stdout)
+                    continue
+                t0 = time.time()
+                if cmd == 'hdr':
+                    bk_header(bk)
+                elif cmd == 'ov': # OverView
+                    show(bk, 0)
+                elif cmd == 'show': # all rows
+                    show(bk)
+                elif cmd == '2rows': # first row and last row
+                    show(bk, 2)
+                elif cmd == '3rows': # first row, 2nd row and last row
+                    show(bk, 3)
+                elif cmd == 'bench':
+                    show(bk, printit=0)
+                elif cmd == 'fonts':
+                    bk_header(bk)
+                    show_fonts(bk)
+                elif cmd == 'names': # named reference list
+                    show_names(bk)
+                elif cmd == 'name_dump': # named reference list
+                    show_names(bk, dump=1)
+                elif cmd == 'labels':
+                    show_labels(bk)
+                elif cmd == 'xfc':
+                    count_xfs(bk)
+                else:
+                    print "*** Unknown command <%s>" % cmd
+                    sys.exit(1)
+                del bk
+                if gc_mode == 1:
+                    n_unreachable = gc.collect()
+                    if n_unreachable:
+                        print "GC post cmd:", fname, "->", n_unreachable, "unreachable objects"
+                if not options.suppress_timing:
+                    t1 = time.time()
+                    print "\ncommand took %.2f seconds\n" % (t1-t0,)
+
+        return None
+
+    av = sys.argv[1:]
+    if not av:
+        main(av)
+    firstarg = av[0].lower()
+    if firstarg == "hotshot":
+        import hotshot, hotshot.stats
+        av = av[1:]
+        prof_log_name = "XXXX.prof"
+        prof = hotshot.Profile(prof_log_name)
+        # benchtime, result = prof.runcall(main, *av)
+        result = prof.runcall(main, *(av, ))
+        print "result", repr(result)
+        prof.close()
+        stats = hotshot.stats.load(prof_log_name)
+        stats.strip_dirs()
+        stats.sort_stats('time', 'calls')
+        stats.print_stats(20)
+    elif firstarg == "profile":
+        import cProfile
+        av = av[1:]
+        cProfile.run('main(av)', 'YYYY.prof')
+        import pstats
+        p = pstats.Stats('YYYY.prof')
+        p.strip_dirs().sort_stats('cumulative').print_stats(30)
+    elif firstarg == "psyco":
+        PSYCO = 1
+        main(av[1:])
+    else:
+        main(av)

exts/xlrd/setup.py

+#!/usr/bin/env python
+
+from os import path
+import sys
+python_version = sys.version_info[:2]
+
+av = sys.argv
+if len(av) > 1 and av[1].lower() == "--egg":
+    if python_version < (2, 3):
+        raise Exception("Can't lay eggs with Python version %d.%d " % python_version)
+    del av[1]
+    from setuptools import setup
+else:
+    from distutils.core import setup
+
+the_url = 'http://www.lexicon.net/sjmachin/xlrd.htm'
+
+# Get version number without importing xlrd/__init__
+# (this horrificness is needed while using 2to3 for
+#  python 3 compatibility, it should go away once
+#  we stop using that.)
+sys.path.insert(0, path.join(path.dirname(__file__), 'xlrd'))
+from info import __VERSION__
+sys.path.pop(0)
+
+def mkargs(**kwargs):
+    return kwargs
+
+args = mkargs(
+    name = 'xlrd',
+    version = __VERSION__,
+    author = 'John Machin',
+    author_email = 'sjmachin@lexicon.net',
+    url = the_url,
+    packages = ['xlrd'],
+    scripts = [
+        'scripts/runxlrd.py',
+        ],
+    description = 'Library for developers to extract data from Microsoft Excel (tm) spreadsheet files',
+    long_description = \
+        "Extract data from Excel spreadsheets (XLS only, versions 2.0 to 2003) on any platform. " \
+        "Pure Python (2.6 to 2.7). Strong support for Excel dates. Unicode-aware.",
+    platforms = ["Any platform -- don't need Windows"],
+    license = 'BSD',
+    keywords = ['xls', 'excel', 'spreadsheet', 'workbook'],
+    )
+
+if python_version >= (2, 3):
+    args23 = mkargs(
+        download_url = the_url,
+        classifiers = [
+            'Development Status :: 5 - Production/Stable',
+            'Intended Audience :: Developers',
+            'License :: OSI Approved :: BSD License',
+            'Programming Language :: Python',
+            'Operating System :: OS Independent',
+            'Topic :: Database',
+            'Topic :: Office/Business',
+            'Topic :: Software Development :: Libraries :: Python Modules',
+            ],
+        )
+    args.update(args23)
+
+if python_version >= (2, 4):
+    args24 = mkargs(
+        package_data={
+            'xlrd': [
+                'doc/*.htm*',
+                # 'doc/*.txt',
+                'examples/*.*',
+                ],
+
+            },
+        )
+    args.update(args24)
+
+setup(**args)

exts/xlrd/xlrd/__init__.py

+from os import path
+
+from info import __VERSION__
+
+# <p>Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a
+# BSD-style licence.</p>
+
+import licences
+
+##
+# <p><b>A Python module for extracting data from MS Excel (TM) spreadsheet files.
+# <br /><br />
+# Version 0.7.4 -- April 2012
+# </b></p>
+#
+# <h2>General information</h2>
+#
+# <h3>Acknowledgements</h3>
+#
+# <p>
+# Development of this module would not have been possible without the document
+# "OpenOffice.org's Documentation of the Microsoft Excel File Format"
+# ("OOo docs" for short).
+# The latest version is available from OpenOffice.org in
+# <a href=http://sc.openoffice.org/excelfileformat.pdf> PDF format</a>
+# and
+# <a href=http://sc.openoffice.org/excelfileformat.odt> ODT format.</a>
+# Small portions of the OOo docs are reproduced in this
+# document. A study of the OOo docs is recommended for those who wish a
+# deeper understanding of the Excel file layout than the xlrd docs can provide.
+# </p>
+#
+# <p>Backporting to Python 2.1 was partially funded by
+#   <a href=http://journyx.com/>
+#       Journyx - provider of timesheet and project accounting solutions.
+#   </a>
+# </p>
+#
+# <p>Provision of formatting information in version 0.6.1 was funded by
+#   <a href=http://www.simplistix.co.uk>
+#       Simplistix Ltd.
+#   </a>
+# </p>
+#
+# <h3>Unicode</h3>
+#
+# <p>This module presents all text strings as Python unicode objects.
+# From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode.
+# Older files (Excel 95 and earlier) don't keep strings in Unicode;
+# a CODEPAGE record provides a codepage number (for example, 1252) which is
+# used by xlrd to derive the encoding (for same example: "cp1252") which is
+# used to translate to Unicode.</p>
+# <small>
+# <p>If the CODEPAGE record is missing (possible if the file was created
+# by third-party software), xlrd will assume that the encoding is ascii, and keep going.
+# If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and
+# you will need to determine the encoding yourself, and tell xlrd:
+# <pre>
+#     book = xlrd.open_workbook(..., encoding_override="cp1252")
+# </pre></p>
+# <p>If the CODEPAGE record exists but is wrong (for example, the codepage
+# number is 1251, but the strings are actually encoded in koi8_r),
+# it can be overridden using the same mechanism.
+# The supplied runxlrd.py has a corresponding command-line argument, which
+# may be used for experimentation:
+# <pre>
+#     runxlrd.py -e koi8_r 3rows myfile.xls
+# </pre></p>
+# <p>The first place to look for an encoding ("codec name") is
+# <a href=http://docs.python.org/lib/standard-encodings.html>
+# the Python documentation</a>.
+# </p>
+# </small>
+#
+# <h3>Dates in Excel spreadsheets</h3>
+#
+# <p>In reality, there are no such things. What you have are floating point
+# numbers and pious hope.
+# There are several problems with Excel dates:</p>
+#
+# <p>(1) Dates are not stored as a separate data type; they are stored as
+# floating point numbers and you have to rely on
+# (a) the "number format" applied to them in Excel and/or
+# (b) knowing which cells are supposed to have dates in them.
+# This module helps with (a) by inspecting the
+# format that has been applied to each number cell;
+# if it appears to be a date format, the cell
+# is classified as a date rather than a number. Feedback on this feature,
+# especially from non-English-speaking locales, would be appreciated.</p>
+#
+# <p>(2) Excel for Windows stores dates by default as the number of
+# days (or fraction thereof) since 1899-12-31T00:00:00. Excel for
+# Macintosh uses a default start date of 1904-01-01T00:00:00. The date
+# system can be changed in Excel on a per-workbook basis (for example:
+# Tools -> Options -> Calculation, tick the "1904 date system" box).
+# This is of course a bad idea if there are already dates in the
+# workbook. There is no good reason to change it even if there are no
+# dates in the workbook. Which date system is in use is recorded in the
+# workbook. A workbook transported from Windows to Macintosh (or vice
+# versa) will work correctly with the host Excel. When using this
+# module's xldate_as_tuple function to convert numbers from a workbook,
+# you must use the datemode attribute of the Book object. If you guess,
+# or make a judgement depending on where you believe the workbook was
+# created, you run the risk of being 1462 days out of kilter.</p>
+#
+# <p>Reference:
+# http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162</p>
+#
+#
+# <p>(3) The Excel implementation of the Windows-default 1900-based date system works on the
+# incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29,
+# which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the
+# result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc
+# program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored.
+# Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.</p>
+#
+# <p>Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326</p>
+#
+# <p>(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero.
+# Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00),
+# independent of the calendar,
+# or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)?
+# The xldate_* functions in this module
+# take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both
+# date systems. This is consistent with more recent Microsoft documentation
+# (for example, the help file for Excel 2002 which says that the first day
+# in the 1904 date system is 1904-01-02).
+#
+# <p>(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file,
+# in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive),
+# Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)."
+# This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era,
+# means that DATE(1899, 12, 31) is interpreted as 3799-12-31.</p>
+#
+# <p>For further information, please refer to the documentation for the xldate_* functions.</p>
+#
+# <h3> Named references, constants, formulas, and macros</h3>
+#
+# <p>
+# A name is used to refer to a cell, a group of cells, a constant
+# value, a formula, or a macro. Usually the scope of a name is global
+# across the whole workbook. However it can be local to a worksheet.
+# For example, if the sales figures are in different cells in
+# different sheets, the user may define the name "Sales" in each
+# sheet. There are built-in names, like "Print_Area" and
+# "Print_Titles"; these two are naturally local to a sheet.
+# </p><p>
+# To inspect the names with a user interface like MS Excel, OOo Calc,
+# or Gnumeric, click on Insert/Names/Define. This will show the global
+# names, plus those local to the currently selected sheet.
+# </p><p>
+# A Book object provides two dictionaries (name_map and
+# name_and_scope_map) and a list (name_obj_list) which allow various
+# ways of accessing the Name objects. There is one Name object for
+# each NAME record found in the workbook. Name objects have many
+# attributes, several of which are relevant only when obj.macro is 1.
+# </p><p>
+# In the examples directory you will find namesdemo.xls which
+# showcases the many different ways that names can be used, and
+# xlrdnamesAPIdemo.py which offers 3 different queries for inspecting
+# the names in your files, and shows how to extract whatever a name is
+# referring to. There is currently one "convenience method",
+# Name.cell(), which extracts the value in the case where the name
+# refers to a single cell. More convenience methods are planned. The
+# source code for Name.cell (in __init__.py) is an extra source of
+# information on how the Name attributes hang together.
+# </p>
+#
+# <p><i>Name information is <b>not</b> extracted from files older than
+# Excel 5.0 (Book.biff_version < 50)</i></p>
+#
+# <h3>Formatting</h3>
+#
+# <h4>Introduction</h4>
+#
+# <p>This collection of features, new in xlrd version 0.6.1, is intended
+# to provide the information needed to (1) display/render spreadsheet contents
+# (say) on a screen or in a PDF file, and (2) copy spreadsheet data to another
+# file without losing the ability to display/render it.</p>
+#
+# <h4>The Palette; Colour Indexes</h4>
+#
+# <p>A colour is represented in Excel as a (red, green, blue) ("RGB") tuple
+# with each component in range(256). However it is not possible to access an
+# unlimited number of colours; each spreadsheet is limited to a palette of 64 different
+# colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index
+# ("colour index") into this palette.
+#
+# Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue,
+# yellow, magenta, and cyan.<p>
+#
+# The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
+# can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette
+# of 7 rows of 8 colours. The last two rows are reserved for use in charts.<br />
+# The correspondence between this grid and the assigned
+# colour indexes is NOT left-to-right top-to-bottom.<br />
+# Indexes 8 to 15 correspond to changeable
+# parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
+# index 15 starts off being cyan but can be changed by the user.<br />
+#
+# The default colour for each index depends on the file version; tables of the defaults
+# are available in the source code. If the user changes one or more colours,
+# a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable
+# indexes.<br />
+# Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer
+# to colour index 7; "[COLOR16]...." will produce cyan
+# unless the user changes colour index 15 to something else.<br />
+#
+# <p>In addition, there are several "magic" colour indexes used by Excel:<br />
+# 0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines
+# (used in XF, CF, and WINDOW2 records)<br />
+# 0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background
+# (used in XF and CF records )<br />
+# 0x43: System face colour (dialogue background colour)<br />
+# 0x4D: System window text colour for chart border lines<br />
+# 0x4E: System window background colour for chart areas<br />
+# 0x4F: Automatic colour for chart border lines (seems to be always Black)<br />
+# 0x50: System ToolTip background colour (used in note objects)<br />
+# 0x51: System ToolTip text colour (used in note objects)<br />
+# 0x7FFF: System window text colour for fonts (used in FONT and CF records)<br />
+# Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT
+# records.<br />
+#
+# <h4>Default Formatting</h4>
+#
+# Default formatting is applied to all empty cells (those not described by a cell record).
+# Firstly row default information (ROW record, Rowinfo class) is used if available.
+# Failing that, column default information (COLINFO record, Colinfo class) is used if available.
+# As a last resort the worksheet/workbook default cell format will be used; this
+# should always be present in an Excel file,
+# described by the XF record with the fixed index 15 (0-based). By default, it uses the
+# worksheet/workbook default cell style, described by the very first XF record (index 0).
+#
+# <h4> Formatting features not included in xlrd version 0.6.1</h4>
+# <ul>
+#   <li>Rich text i.e. strings containing partial <b>bold</b> <i>italic</i>
+#       and <u>underlined</u> text, change of font inside a string, etc.
+#       See OOo docs s3.4 and s3.2.
+#       <i> Rich text is included in version 0.7.2</i></li>
+#   <li>Asian phonetic text (known as "ruby"), used for Japanese furigana. See OOo docs
+#       s3.4.2 (p15)</li>
+#   <li>Conditional formatting. See OOo docs
+#       s5.12, s6.21 (CONDFMT record), s6.16 (CF record)</li>
+#   <li>Miscellaneous sheet-level and book-level items e.g. printing layout, screen panes. </li>
+#   <li>Modern Excel file versions don't keep most of the built-in
+#       "number formats" in the file; Excel loads formats according to the
+#       user's locale. Currently xlrd's emulation of this is limited to
+#       a hard-wired table that applies to the US English locale. This may mean
+#       that currency symbols, date order, thousands separator, decimals separator, etc
+#       are inappropriate. Note that this does not affect users who are copying XLS
+#       files, only those who are visually rendering cells.</li>
+# </ul>
+#
+# <h3>Loading worksheets on demand</h3>
+#
+# <p>This feature, new in version 0.7.1, is governed by the on_demand argument
+# to the open_workbook() function and allows saving memory and time by loading
+# only those sheets that the caller is interested in, and releasing sheets
+# when no longer required.</p>
+#
+# <p>on_demand=False (default): No change. open_workbook() loads global data
+# and all sheets, releases resources no longer required (principally the
+# str or mmap object containing the Workbook stream), and returns.</p>
+#
+# <p>on_demand=True and BIFF version < 5.0: A warning message is emitted,
+# on_demand is recorded as False, and the old process is followed.</p>
+#
+# <p>on_demand=True and BIFF version >= 5.0: open_workbook() loads global
+# data and returns without releasing resources. At this stage, the only
+# information available about sheets is Book.nsheets and Book.sheet_names().</p>
+#
+# <p>Book.sheet_by_name() and Book.sheet_by_index() will load the requested
+# sheet if it is not already loaded.</p>
+#
+# <p>Book.sheets() will load all/any unloaded sheets.</p>
+#
+# <p>The caller may save memory by calling
+# Book.unload_sheet(sheet_name_or_index) when finished with the sheet.
+# This applies irrespective of the state of on_demand.</p>
+#
+# <p>The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx()
+#  -- except if those required resources have been released (which will
+# have happened automatically when on_demand is false). This is the only
+# case where an exception will be raised.</p>
+#
+# <p>The caller may query the state of a sheet:
+# Book.sheet_loaded(sheet_name_or_index) -> a bool</p>
+#
+# <p> Book.release_resources() may used to save memory and close
+# any memory-mapped file before proceding to examine already-loaded
+# sheets. Once resources are released, no further sheets can be loaded.</p>
+#
+# <p> When using on-demand, it is advisable to ensure that
+# Book.release_resources() is always called even if an exception
+# is raised in your own code; otherwise if the input file has been
+# memory-mapped, the mmap.mmap object will not be closed and you will
+# not be able to access the physical file until your Python process
+# terminates. This can be done by calling Book.release_resources()
+# explicitly in the finally suite of a try/finally block.
+# New in xlrd 0.7.2: the Book object is a "context manager", so if
+# using Python 2.5 or later, you can wrap your code in a "with"
+# statement.</p>
+##
+
+import sys, zipfile, pprint
+import timemachine
+from biffh import (
+    XLRDError,
+    biff_text_from_num,
+    error_text_from_code,
+    XL_CELL_BLANK,
+    XL_CELL_TEXT,
+    XL_CELL_BOOLEAN,
+    XL_CELL_ERROR,
+    XL_CELL_EMPTY,
+    XL_CELL_DATE,
+    XL_CELL_NUMBER
+    )
+from formula import * # is constrained by __all__
+from book import Book, colname #### TODO #### formula also has `colname` (restricted to 256 cols)
+from sheet import empty_cell
+from xldate import XLDateError, xldate_as_tuple
+
+if sys.version.startswith("IronPython"):
+    # print >> sys.stderr, "...importing encodings"
+    import encodings
+
+try:
+    import mmap
+    MMAP_AVAILABLE = 1
+except ImportError:
+    MMAP_AVAILABLE = 0
+USE_MMAP = MMAP_AVAILABLE
+
+##
+#
+# Open a spreadsheet file for data extraction.
+#
+# @param filename The path to the spreadsheet file to be opened.
+#
+# @param logfile An open file to which messages and diagnostics are written.
+#
+# @param verbosity Increases the volume of trace material written to the logfile.
+#
+# @param pickleable Default is true. In Python 2.4 or earlier, setting to false
+# will cause use of array.array objects which save some memory but can't be pickled.
+# In Python 2.5, array.arrays are used unconditionally. Note: if you have large files that
+# you need to read multiple times, it can be much faster to cPickle.dump() the xlrd.Book object
+# once, and use cPickle.load() multiple times.
+# @param use_mmap Whether to use the mmap module is determined heuristically.
+# Use this arg to override the result. Current heuristic: mmap is used if it exists.
+#
+# @param file_contents ... as a string or an mmap.mmap object or some other behave-alike object.
+# If file_contents is supplied, filename will not be used, except (possibly) in messages.
+#
+# @param encoding_override Used to overcome missing or bad codepage information
+# in older-version files. Refer to discussion in the <b>Unicode</b> section above.
+# <br /> -- New in version 0.6.0
+#
+# @param formatting_info Governs provision of a reference to an XF (eXtended Format) object
+# for each cell in the worksheet.
+# <br /> Default is <i>False</i>. This is backwards compatible and saves memory.
+# "Blank" cells (those with their own formatting information but no data) are treated as empty
+# (by ignoring the file's BLANK and MULBLANK records).
+# It cuts off any bottom "margin" of rows of empty (and blank) cells and
+# any right "margin" of columns of empty (and blank) cells.
+# Only cell_value and cell_type are available.
+# <br /> <i>True</i> provides all cells, including empty and blank cells.
+# XF information is available for each cell.
+# <br /> -- New in version 0.6.1
+#
+# @param on_demand Governs whether sheets are all loaded initially or when demanded
+# by the caller. Please refer back to the section "Loading worksheets on demand" for details.
+# <br /> -- New in version 0.7.1
+#
+# @param ragged_rows False (the default) means all rows are padded out with empty cells so that all
+# rows have the same size (Sheet.ncols). True means that there are no empty cells at the ends of rows.
+# This can result in substantial memory savings if rows are of widely varying sizes. See also the
+# Sheet.row_len() method.
+# <br /> -- New in version 0.7.2
+#
+# @return An instance of the Book class.
+
+def open_workbook(filename=None,
+    logfile=sys.stdout,
+    verbosity=0,
+    pickleable=True,
+    use_mmap=USE_MMAP,
+    file_contents=None,
+    encoding_override=None,
+    formatting_info=False,
+    on_demand=False,
+    ragged_rows=False,
+    ):
+    peeksz = 4
+    if file_contents:
+        peek = file_contents[:peeksz]
+    else:
+        f = open(filename, "rb")
+        peek = f.read(peeksz)
+        f.close()
+    if peek == timemachine.BYTES_LITERAL("PK\x03\x04"): # a ZIP file
+        if file_contents:
+            zf = zipfile.ZipFile(timemachine.BYTES_IO(file_contents))
+        else:
+            zf = zipfile.ZipFile(filename)
+        component_names = zf.namelist()
+        if verbosity:
+            logfile.write('ZIP component_names:\n')
+            pprint.pprint(component_names, logfile)
+        if 'xl/workbook.xml' in component_names:
+            import xlsx
+            bk = xlsx.open_workbook_2007_xml(
+                zf,
+                component_names,
+                logfile=logfile,
+                verbosity=verbosity,
+                pickleable=pickleable,
+                use_mmap=mmap,
+                formatting_info=formatting_info,
+                on_demand=on_demand,
+                ragged_rows=ragged_rows,
+                )
+            return bk
+        if 'xl/workbook.bin' in component_names:
+            raise XLRDError('Excel 2007 xlsb file; not supported')
+        if 'content.xml' in component_names:
+            raise XLRDError('Openoffice.org ODS file; not supported')
+        raise XLRDError('ZIP file contents not a known type of workbook')
+
+    import book
+    bk = book.open_workbook_xls(
+        filename=filename,
+        logfile=logfile,
+        verbosity=verbosity,
+        pickleable=pickleable,
+        use_mmap=use_mmap,
+        file_contents=file_contents,
+        encoding_override=encoding_override,
+        formatting_info=formatting_info,
+        on_demand=on_demand,
+        ragged_rows=ragged_rows,
+        )
+    return bk
+
+##
+# For debugging: dump an XLS file's BIFF records in char & hex.
+# @param filename The path to the file to be dumped.
+# @param outfile An open file, to which the dump is written.
+# @param unnumbered If true, omit offsets (for meaningful diffs).
+
+def dump(filename, outfile=sys.stdout, unnumbered=False):
+    from book import Book
+    from biffh import biff_dump
+    bk = Book()
+    bk.biff2_8_load(filename=filename, logfile=outfile, )
+    biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered)
+
+##
+# For debugging and analysis: summarise the file's BIFF records.
+# I.e. produce a sorted file of (record_name, count).
+# @param filename The path to the file to be summarised.
+# @param outfile An open file, to which the summary is written.
+
+def count_records(filename, outfile=sys.stdout):
+    from book import Book
+    from biffh import biff_count_records
+    bk = Book()
+    bk.biff2_8_load(filename=filename, logfile=outfile, )
+    biff_count_records(bk.mem, bk.base, bk.stream_len, outfile)

exts/xlrd/xlrd/biffh.py

+# -*- coding: cp1252 -*-
+
+##
+# Support module for the xlrd package.
+#
+# <p>Portions copyright � 2005-2010 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+##
+
+# 2010-03-01 SJM Reading SCL record
+# 2010-03-01 SJM Added more record IDs for biff_dump & biff_count
+# 2008-02-10 SJM BIFF2 BLANK record
+# 2008-02-08 SJM Preparation for Excel 2.0 support
+# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count
+# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
+# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
+# 2007-04-22 SJM Remove experimental "trimming" facility.
+
+DEBUG = 0
+
+from struct import unpack
+import sys
+from timemachine import *
+
+class XLRDError(Exception):
+    pass
+
+##
+# Parent of almost all other classes in the package. Defines a common "dump" method
+# for debugging.
+
+class BaseObject(object):
+
+    _repr_these = []
+
+    ##
+    # @param f open file object, to which the dump is written
+    # @param header text to write before the dump
+    # @param footer text to write after the dump
+    # @param indent number of leading spaces (for recursive calls)
+
+    def dump(self, f=None, header=None, footer=None, indent=0):
+        if f is None:
+            f = sys.stderr
+        if hasattr(self, "__slots__"):
+            alist = []
+            for attr in self.__slots__:
+                alist.append((attr, getattr(self, attr)))
+        else:
+            alist = self.__dict__.items()
+        alist.sort()
+        pad = " " * indent
+        if header is not None: print >> f, header
+        list_type = type([])
+        dict_type = type({})
+        for attr, value in alist:
+            if getattr(value, 'dump', None) and attr != 'book':
+                value.dump(f,
+                    header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
+                    indent=indent+4)
+            elif attr not in self._repr_these and (
+                isinstance(value, list_type) or isinstance(value, dict_type)
+                ):
+                print >> f, "%s%s: %s, len = %d" % (pad, attr, type(value), len(value))
+            else:
+                print >> f, "%s%s: %r" % (pad, attr, value)
+        if footer is not None: print >> f, footer
+
+FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
+DATEFORMAT = FDT
+NUMBERFORMAT = FNU
+
+(
+    XL_CELL_EMPTY,
+    XL_CELL_TEXT,
+    XL_CELL_NUMBER,
+    XL_CELL_DATE,
+    XL_CELL_BOOLEAN,
+    XL_CELL_ERROR,
+    XL_CELL_BLANK, # for use in debugging, gathering stats, etc
+) = range(7)
+
+biff_text_from_num = {
+    0:  "(not BIFF)",
+    20: "2.0",
+    21: "2.1",
+    30: "3",
+    40: "4S",
+    45: "4W",
+    50: "5",
+    70: "7",
+    80: "8",
+    85: "8X",
+    }
+
+##
+# <p>This dictionary can be used to produce a text version of the internal codes
+# that Excel uses for error cells. Here are its contents:
+# <pre>
+# 0x00: '#NULL!',  # Intersection of two cell ranges is empty
+# 0x07: '#DIV/0!', # Division by zero
+# 0x0F: '#VALUE!', # Wrong type of operand
+# 0x17: '#REF!',   # Illegal or deleted cell reference
+# 0x1D: '#NAME?',  # Wrong function or range name
+# 0x24: '#NUM!',   # Value range overflow
+# 0x2A: '#N/A',    # Argument or function not available
+# </pre></p>
+
+error_text_from_code = {
+    0x00: '#NULL!',  # Intersection of two cell ranges is empty
+    0x07: '#DIV/0!', # Division by zero
+    0x0F: '#VALUE!', # Wrong type of operand
+    0x17: '#REF!',   # Illegal or deleted cell reference
+    0x1D: '#NAME?',  # Wrong function or range name
+    0x24: '#NUM!',   # Value range overflow
+    0x2A: '#N/A',    # Argument or function not available
+}
+
+BIFF_FIRST_UNICODE = 80
+
+XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
+XL_WORKBOOK_GLOBALS_4W = 0x100
+XL_WORKSHEET = WRKSHEET = 0x10
+
+XL_BOUNDSHEET_WORKSHEET = 0x00
+XL_BOUNDSHEET_CHART     = 0x02
+XL_BOUNDSHEET_VB_MODULE = 0x06
+
+# XL_RK2 = 0x7e
+XL_ARRAY  = 0x0221
+XL_ARRAY2 = 0x0021
+XL_BLANK = 0x0201
+XL_BLANK_B2 = 0x01
+XL_BOF = 0x809
+XL_BOOLERR = 0x205
+XL_BOOLERR_B2 = 0x5
+XL_BOUNDSHEET = 0x85
+XL_BUILTINFMTCOUNT = 0x56
+XL_CF = 0x01B1
+XL_CODEPAGE = 0x42
+XL_COLINFO = 0x7D
+XL_COLUMNDEFAULT = 0x20 # BIFF2 only
+XL_COLWIDTH = 0x24 # BIFF2 only
+XL_CONDFMT = 0x01B0
+XL_CONTINUE = 0x3c
+XL_COUNTRY = 0x8C
+XL_DATEMODE = 0x22
+XL_DEFAULTROWHEIGHT = 0x0225
+XL_DEFCOLWIDTH = 0x55
+XL_DIMENSION = 0x200
+XL_DIMENSION2 = 0x0
+XL_EFONT = 0x45
+XL_EOF = 0x0a
+XL_EXTERNNAME = 0x23
+XL_EXTERNSHEET = 0x17
+XL_EXTSST = 0xff
+XL_FEAT11 = 0x872
+XL_FILEPASS = 0x2f
+XL_FONT = 0x31
+XL_FONT_B3B4 = 0x231
+XL_FORMAT = 0x41e
+XL_FORMAT2 = 0x1E # BIFF2, BIFF3
+XL_FORMULA = 0x6
+XL_FORMULA3 = 0x206
+XL_FORMULA4 = 0x406
+XL_GCW = 0xab
+XL_HLINK = 0x01B8
+XL_QUICKTIP = 0x0800
+XL_HORIZONTALPAGEBREAKS = 0x1b
+XL_INDEX = 0x20b
+XL_INTEGER = 0x2 # BIFF2 only
+XL_IXFE = 0x44 # BIFF2 only
+XL_LABEL = 0x204
+XL_LABEL_B2 = 0x04
+XL_LABELRANGES = 0x15f
+XL_LABELSST = 0xfd
+XL_LEFTMARGIN = 0x26
+XL_TOPMARGIN = 0x28
+XL_RIGHTMARGIN = 0x27
+XL_BOTTOMMARGIN = 0x29
+XL_HEADER = 0x14
+XL_FOOTER = 0x15
+XL_HCENTER = 0x83
+XL_VCENTER = 0x84
+XL_MERGEDCELLS = 0xE5
+XL_MSO_DRAWING = 0x00EC
+XL_MSO_DRAWING_GROUP = 0x00EB
+XL_MSO_DRAWING_SELECTION = 0x00ED
+XL_MULRK = 0xbd
+XL_MULBLANK = 0xbe
+XL_NAME = 0x18
+XL_NOTE = 0x1c
+XL_NUMBER = 0x203
+XL_NUMBER_B2 = 0x3
+XL_OBJ = 0x5D
+XL_PAGESETUP = 0xA1
+XL_PALETTE = 0x92
+XL_PANE = 0x41
+XL_PRINTGRIDLINES = 0x2B
+XL_PRINTHEADERS = 0x2A
+XL_RK = 0x27e
+XL_ROW = 0x208
+XL_ROW_B2 = 0x08
+XL_RSTRING = 0xd6
+XL_SCL = 0x00A0
+XL_SHEETHDR = 0x8F # BIFF4W only
+XL_SHEETPR = 0x81
+XL_SHEETSOFFSET = 0x8E # BIFF4W only
+XL_SHRFMLA = 0x04bc
+XL_SST = 0xfc
+XL_STANDARDWIDTH = 0x99
+XL_STRING = 0x207
+XL_STRING_B2 = 0x7
+XL_STYLE = 0x293
+XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs
+XL_TABLEOP = 0x236
+XL_TABLEOP2 = 0x37
+XL_TABLEOP_B2 = 0x36
+XL_TXO = 0x1b6
+XL_UNCALCED = 0x5e
+XL_UNKNOWN = 0xffff
+XL_VERTICALPAGEBREAKS = 0x1a
+XL_WINDOW2    = 0x023E
+XL_WINDOW2_B2 = 0x003E
+XL_WRITEACCESS = 0x5C
+XL_WSBOOL = XL_SHEETPR
+XL_XF = 0xe0
+XL_XF2 = 0x0043 # BIFF2 version of XF record
+XL_XF3 = 0x0243 # BIFF3 version of XF record
+XL_XF4 = 0x0443 # BIFF4 version of XF record
+
+boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
+bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
+
+XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
+
+_cell_opcode_list = [
+    XL_BOOLERR,
+    XL_FORMULA,
+    XL_FORMULA3,
+    XL_FORMULA4,
+    XL_LABEL,
+    XL_LABELSST,
+    XL_MULRK,
+    XL_NUMBER,
+    XL_RK,
+    XL_RSTRING,
+    ]
+_cell_opcode_dict = {}
+for _cell_opcode in _cell_opcode_list:
+    _cell_opcode_dict[_cell_opcode] = 1
+is_cell_opcode = _cell_opcode_dict.has_key
+
+# def fprintf(f, fmt, *vargs): f.write(fmt % vargs)
+
+def fprintf(f, fmt, *vargs):
+    if fmt.endswith('\n'):
+        print >> f, fmt[:-1] % vargs
+    else:
+        print >> f, fmt % vargs,
+
+def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
+    for n, mask, attr in manifest:
+        local_setattr(tgt_obj, attr, (src & mask) >> n)
+
+def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
+    for n, mask, attr in manifest:
+        local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
+
+def unpack_string(data, pos, encoding, lenlen=1):
+    nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+    pos += lenlen
+    return unicode(data[pos:pos+nchars], encoding)
+
+def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
+    if known_len is not None:
+        # On a NAME record, the length byte is detached from the front of the string.
+        nchars = known_len
+    else:
+        nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+        pos += lenlen
+    newpos = pos + nchars
+    return (unicode(data[pos:newpos], encoding), newpos)
+
+def unpack_unicode(data, pos, lenlen=2):
+    "Return unicode_strg"
+    nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+    if not nchars:
+        # Ambiguous whether 0-length string should have an "options" byte.
+        # Avoid crash if missing.
+        return u""
+    pos += lenlen
+    options = ord(data[pos])
+    pos += 1
+    # phonetic = options & 0x04
+    # richtext = options & 0x08
+    if options & 0x08:
+        # rt = unpack('<H', data[pos:pos+2])[0] # unused
+        pos += 2
+    if options & 0x04:
+        # sz = unpack('<i', data[pos:pos+4])[0] # unused
+        pos += 4
+    if options & 0x01:
+        # Uncompressed UTF-16-LE
+        rawstrg = data[pos:pos+2*nchars]
+        # if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
+        strg = unicode(rawstrg, 'utf_16_le')
+        # pos += 2*nchars
+    else:
+        # Note: this is COMPRESSED (not ASCII!) encoding!!!
+        # Merely returning the raw bytes would work OK 99.99% of the time
+        # if the local codepage was cp1252 -- however this would rapidly go pear-shaped
+        # for other codepages so we grit our Anglocentric teeth and return Unicode :-)
+
+        strg = unicode(data[pos:pos+nchars], "latin_1")
+        # pos += nchars
+    # if richtext:
+    #     pos += 4 * rt
+    # if phonetic:
+    #     pos += sz
+    # return (strg, pos)
+    return strg
+
+def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
+    "Return (unicode_strg, updated value of pos)"
+    if known_len is not None:
+        # On a NAME record, the length byte is detached from the front of the string.
+        nchars = known_len
+    else:
+        nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+        pos += lenlen
+    if not nchars and not data[pos:]:
+        # Zero-length string with no options byte
+        return (u"", pos)
+    options = ord(data[pos])
+    pos += 1
+    phonetic = options & 0x04
+    richtext = options & 0x08
+    if richtext:
+        rt = unpack('<H', data[pos:pos+2])[0]
+        pos += 2
+    if phonetic:
+        sz = unpack('<i', data[pos:pos+4])[0]
+        pos += 4
+    if options & 0x01:
+        # Uncompressed UTF-16-LE
+        strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
+        pos += 2*nchars
+    else:
+        # Note: this is COMPRESSED (not ASCII!) encoding!!!
+        strg = unicode(data[pos:pos+nchars], "latin_1")
+        pos += nchars
+    if richtext:
+        pos += 4 * rt
+    if phonetic:
+        pos += sz
+    return (strg, pos)
+
+def unpack_cell_range_address_list_update_pos(
+    output_list, data, pos, biff_version, addr_size=6):
+    # output_list is updated in situ
+    assert addr_size in (6, 8)
+    # Used to assert size == 6 if not BIFF8, but pyWLWriter writes
+    # BIFF8-only MERGEDCELLS records in a BIFF5 file!
+    n, = unpack("<H", data[pos:pos+2])
+    pos += 2
+    if n:
+        if addr_size == 6:
+            fmt = "<HHBB"
+        else:
+            fmt = "<HHHH"
+        for _unused in xrange(n):
+            ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
+            output_list.append((ra, rb+1, ca, cb+1))
+            pos += addr_size
+    return pos
+
+_brecstrg = """\
+0000 DIMENSIONS_B2
+0001 BLANK_B2
+0002 INTEGER_B2_ONLY
+0003 NUMBER_B2
+0004 LABEL_B2
+0005 BOOLERR_B2
+0006 FORMULA
+0007 STRING_B2
+0008 ROW_B2
+0009 BOF_B2
+000A EOF
+000B INDEX_B2_ONLY
+000C CALCCOUNT
+000D CALCMODE
+000E PRECISION
+000F REFMODE
+0010 DELTA
+0011 ITERATION
+0012 PROTECT
+0013 PASSWORD
+0014 HEADER
+0015 FOOTER
+0016 EXTERNCOUNT
+0017 EXTERNSHEET
+0018 NAME_B2,5+
+0019 WINDOWPROTECT
+001A VERTICALPAGEBREAKS
+001B HORIZONTALPAGEBREAKS
+001C NOTE
+001D SELECTION
+001E FORMAT_B2-3
+001F BUILTINFMTCOUNT_B2
+0020 COLUMNDEFAULT_B2_ONLY
+0021 ARRAY_B2_ONLY
+0022 DATEMODE
+0023 EXTERNNAME
+0024 COLWIDTH_B2_ONLY
+0025 DEFAULTROWHEIGHT_B2_ONLY
+0026 LEFTMARGIN
+0027 RIGHTMARGIN
+0028 TOPMARGIN
+0029 BOTTOMMARGIN
+002A PRINTHEADERS
+002B PRINTGRIDLINES
+002F FILEPASS
+0031 FONT
+0032 FONT2_B2_ONLY
+0036 TABLEOP_B2
+0037 TABLEOP2_B2
+003C CONTINUE
+003D WINDOW1
+003E WINDOW2_B2