Exception in load_workbook

Issue #659 wontfix
Alexandr Zamaraev created an issue
$ ipython3
Python 3.4.3 (default, Oct 14 2015, 20:28:29) 
Type "copyright", "credits" or "license" for more information.

IPython 1.2.1 -- An enhanced Interactive Python.
?         -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help      -> Python's own help system.
object?   -> Details about 'object', use 'object??' for extra details.

In [1]:  import requests
In [2]: import openpyxl
In [3]: rsp = requests.get('http://eglo-msk.ru/downloadFiles/ostatki_msk.xlsx', stream=True)
In [4]: xls = openpyxl.load_workbook(rsp.raw)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-4-9cb7f1cedc92> in <module>()
----> 1 xls = openpyxl.load_workbook(rsp.raw)

/home/tonal/.local/lib/python3.4/site-packages/openpyxl/reader/excel.py in load_workbook(filename, read_only, use_iterators, keep_vba, guess_types, data_only)
    190         if strings_path.startswith("/"):
    191             strings_path = strings_path[1:]
--> 192         shared_strings = read_string_table(archive.read(strings_path))
    193     else:
    194         shared_strings = []

/usr/lib/python3.4/zipfile.py in read(self, name, pwd)
   1115     def read(self, name, pwd=None):
   1116         """Return file bytes (as a string) for name."""
-> 1117         with self.open(name, "r", pwd) as fp:
   1118             return fp.read()
   1119 

/usr/lib/python3.4/zipfile.py in open(self, name, mode, pwd)
   1146             else:
   1147                 # Get info object for name
-> 1148                 zinfo = self.getinfo(name)
   1149             zef_file.seek(zinfo.header_offset, 0)
   1150 

/usr/lib/python3.4/zipfile.py in getinfo(self, name)
   1082         if info is None:
   1083             raise KeyError(
-> 1084                 'There is no item named %r in the archive' % name)
   1085 
   1086         return info

KeyError: "There is no item named 'xl/sharedStrings.xml' in the archive"

In archive exists xl/SharedStrings.xml

Bat this file open Ok libreoffice (v 5.1.4.2)

Comments (8)

  1. CharlieC

    It's interesting but the file is technically invalid: the manifest refers to sharedStrings.xml but the archive contains SharedStrings.xml.

    What program created the initial file?

  2. Aleksandr Nizovoy

    I also catch this problem with 1C. Try this:

    # module fixer.py
    import zipfile
    import tempfile
    import os
    
    
    def fix_xlsx(in_file):
        tmpfd, tmp = tempfile.mkstemp(dir=os.path.dirname(in_file))
        os.close(tmpfd)
        filename = '[Content_Types].xml'
        data = ''
        with zipfile.ZipFile(in_file, 'r') as zin:
            with zipfile.ZipFile(tmp, 'w') as zout:
                for item in zin.infolist():
                    if item.filename != filename:
                        zout.writestr(item, zin.read(item.filename))
                    else:
                        data = zin.read(filename).decode()
        os.remove(in_file)
        os.rename(tmp, in_file)
        data = data.replace('/xl/sharedStrings.xml', '/xl/SharedStrings.xml')
        with zipfile.ZipFile(in_file, mode='a', compression=zipfile.ZIP_DEFLATED) as zf:
            zf.writestr(filename, data)
    

    Using like that:

    from openpyxl import load_workbook
    from fixer import fix_xlsx
    
    
        try:
            wb = load_workbook(filename)
        except KeyError:
            fix_xlsx(filename)
            wb = load_workbook(filename)
    
  3. Алексей Коробко

    Thank you for the good solution, but it brakes regular Excel files. I helped rename file SharedStrings.xml.

    import os
    import tempfile
    import zipfile
    
    
    def fix_xlsx(in_file):
        zin = zipfile.ZipFile(in_file, 'r')
        if 'xl/SharedStrings.xml' in zin.namelist():
            tmpfd, tmp = tempfile.mkstemp(dir=os.path.dirname(in_file))
            os.close(tmpfd)
    
            with zipfile.ZipFile(tmp, 'w') as zout:
                for item in zin.infolist():
                    if item.filename == 'xl/SharedStrings.xml':
                        zout.writestr('xl/sharedStrings.xml', zin.read(item.filename))
                    else:
                        zout.writestr(item, zin.read(item.filename))
    
            zin.close()
            os.remove(in_file)
            os.rename(tmp, in_file)
    
  4. Log in to comment