Commits

Joseph Tate  committed d8b0260

Fix xlsx files missing the 'dimensions' element

  • Participants
  • Parent commits afb64d2

Comments (0)

Files changed (4)

File openpyxl/reader/worksheet.py

 
     it = iterparse(source)
 
+    smax_col = None
+    smax_row = None
+    smin_col = None
+    smin_row = None
+
     for event, element in it:
 
         if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension':
 
             return min_col, min_row, max_col, max_row
 
+        if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c':
+            # Supposedly the dimension is mandatory, but in practice it can be
+            # left off sometimes, if so, observe the max/min extants and return
+            # those instead.
+            col, row = coordinate_from_string(element.get('r'))
+            if smin_row is None:
+                #initialize the observed max/min values
+                smin_col = smax_col = col
+                smin_row = smax_row = row
+            else:
+                #Keep track of the seen max and min (fallback if there's no dimension)
+                smin_col = min(smin_col, col)
+                smin_row = min(smin_row, row)
+                smax_col = max(smax_col, col)
+                smax_row = max(smax_row, row)
         else:
             element.clear()
 
-    return None
+    return smin_col, smin_row, smax_col, smax_row
 
 def filter_cells(pair):
     (event, element) = pair

File openpyxl/tests/test_data/genuine/empty_no_dimensions.xlsx

Binary file added.

File openpyxl/tests/test_iter.py

 
     workbook_name = osp.join(DATADIR, 'genuine', 'empty.xlsx')
 
+    def _open_wb(self):
+        return load_workbook(filename = self.workbook_name, use_iterators = True)
+
+class TestDims(TestWorksheet):
+    expected = [ 'A1:G5', 'D1:K30', 'D2:D2', 'A1:C1' ]
+    def test_get_dimensions(self):
+        wb = self._open_wb()
+        for i, sheetn in enumerate(wb.get_sheet_names()):
+            ws = wb.get_sheet_by_name(name = sheetn)
+
+            eq_(ws._dimensions, self.expected[i])
+
 class TestText(TestWorksheet):
     sheet_name = 'Sheet1 - Text'
 

File openpyxl/tests/test_iter_stream.py

+# file openpyxl/tests/test_iter_stream.py
+
+# Copyright (c) 2011 openpyxl
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# @license: http://www.opensource.org/licenses/mit-license.php
+# @author: see AUTHORS file
+
+from nose.tools import eq_, raises, assert_raises
+import os.path as osp
+from openpyxl.tests.helper import DATADIR
+from openpyxl.reader.iter_worksheet import get_range_boundaries
+from openpyxl.reader.excel import load_workbook
+import openpyxl.tests.test_iter as test_iter
+import datetime
+
+class StreamTestWorksheet(object):
+    workbook_name = osp.join(DATADIR, 'genuine', 'empty_no_dimensions.xlsx')
+
+    def _open_wb(self):
+        ff = open(self.workbook_name, 'rb')
+        return load_workbook(filename = ff, use_iterators = True)
+
+class TestDims(StreamTestWorksheet, test_iter.TestDims):
+    pass
+
+class TestText(StreamTestWorksheet, test_iter.TestText):
+    def test_get_boundaries_range(self):
+        pass
+
+    def test_get_boundaries_one(self):
+        pass
+
+class TestIntegers(StreamTestWorksheet, test_iter.TestIntegers):
+    workbook_name = osp.join(DATADIR, 'genuine', 'empty_no_dimensions.xlsx')
+
+class TestFloats(StreamTestWorksheet, test_iter.TestFloats):
+    workbook_name = osp.join(DATADIR, 'genuine', 'empty_no_dimensions.xlsx')
+
+class TestDates(StreamTestWorksheet, test_iter.TestDates):
+    workbook_name = osp.join(DATADIR, 'genuine', 'empty_no_dimensions.xlsx')
+