Commits

David Jean Louis committed 68e0e04

* polib is now fully PEP8 compliant
* avoid catch-all excepts (fixes #37)
* added Makefile for convenience

Comments (0)

Files changed (4)

 .DS_Store
 .svn
 .coverage
-.tox
+.tox
+.venv
+# polib Makefile, useful for developers only.
+# Make sure you have pep8 and tox python modules installed.
+
+all: lint test clean
+
+clean:
+	@find . -name '*.pyc' |xargs rm -f
+	@rm -rf MANIFEST build dist .coverage .tox .venv __pycache__ docs/_build
+
+lint:
+	@type pep8 >/dev/null 2>&1 || { echo >&2 "Please install pep8 package."; exit 1; }
+	@pep8 -r polib.py && { echo >&2 "PEP8: congrats, everything is clean !"; }
+
+test:
+	@type tox >/dev/null 2>&1 && { tox; } || { ./runtests.sh; }
+
+dist: clean
+	@python setup.py sdist
-# -*- coding: utf-8 -*-
+# -* coding: utf-8 -*-
 #
 # License: MIT (see LICENSE file provided)
 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
 :func:`~polib.mofile` convenience functions.
 """
 
-__author__    = 'David Jean Louis <izimobil@gmail.com>'
-__version__   = '1.0.2'
-__all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
-                 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
+__author__ = 'David Jean Louis <izimobil@gmail.com>'
+__version__ = '1.0.2'
+__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+           'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
 
 import array
 import codecs
 import sys
 import textwrap
 
+
 # the default encoding to use when encoding cannot be detected
 default_encoding = 'utf-8'
 
 # python 2/3 compatibility helpers {{{
 
+
 if sys.version_info[:2] < (3, 0):
     PY3 = False
     text_type = unicode
 
     def u(s):
         return s
-
 # }}}
 # _pofile_or_mofile {{{
 
+
 def _pofile_or_mofile(f, type, **kwargs):
     """
     Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
         f,
         encoding=enc,
         check_for_duplicates=kwargs.get('check_for_duplicates', False),
-        klass = kwargs.get('klass')
+        klass=kwargs.get('klass')
     )
     instance = parser.parse()
     instance.wrapwidth = kwargs.get('wrapwidth', 78)
     return instance
-
 # }}}
 # function pofile() {{{
 
+
 def pofile(pofile, **kwargs):
     """
     Convenience function that parses the po or pot file ``pofile`` and returns
     ``check_for_duplicates``
         whether to check for duplicate entries when adding entries to the
         file (optional, default: ``False``).
-        
+
     ``klass``
         class which is used to instantiate the return value (optional,
         default: ``None``, the return value with be a :class:`~polib.POFile`
         instance).
     """
     return _pofile_or_mofile(pofile, 'pofile', **kwargs)
-
 # }}}
 # function mofile() {{{
 
+
 def mofile(mofile, **kwargs):
     """
     Convenience function that parses the mo file ``mofile`` and returns a
     ``check_for_duplicates``
         whether to check for duplicate entries when adding entries to the
         file (optional, default: ``False``).
-        
+
     ``klass``
         class which is used to instantiate the return value (optional,
         default: ``None``, the return value with be a :class:`~polib.POFile`
         instance).
     """
     return _pofile_or_mofile(mofile, 'mofile', **kwargs)
-
 # }}}
 # function detect_encoding() {{{
 
+
 def detect_encoding(file, binary_mode=False):
     """
     Try to detect the encoding used by the ``file``. The ``file`` argument can
                     return enc
         f.close()
     return default_encoding
-
 # }}}
 # function escape() {{{
 
+
 def escape(st):
     """
     Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
              .replace('\r', r'\r')\
              .replace('\n', r'\n')\
              .replace('\"', r'\"')
-
 # }}}
 # function unescape() {{{
 
+
 def unescape(st):
     """
     Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
             return '\r'
         if m == '\\':
             return '\\'
-        return m # handles escaped double quote
+        return m  # handles escaped double quote
     return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
-
 # }}}
 # class _BaseFile {{{
 
+
 class _BaseFile(list):
     """
     Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
             an instance of :class:`~polib._BaseEntry`.
         """
         return self.find(entry.msgid, by='msgid') is not None
-    
+
     def __eq__(self, other):
         return str(self) == str(other)
 
     def ordered_metadata(self):
         """
         Convenience method that returns an ordered version of the metadata
-        dictionnary. The return value is list of tuples (metadata name,
+        dictionary. The return value is list of tuples (metadata name,
         metadata_value).
         """
         # copy the dict first
         """
         offsets = []
         entries = self.translated_entries()
+
         # the keys are sorted in the .mo file
         def cmp(_self, other):
             # msgfmt compares entries with msgctxt if it exists
                 msgid += self._encode(e.msgid)
                 msgstr = self._encode(e.msgstr)
             offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
-            ids  += msgid  + b('\0')
+            ids += msgid + b('\0')
             strs += msgstr + b('\0')
 
         # The header is 7 32-bit unsigned integers.
-        keystart = 7*4+16*entries_len
+        keystart = 7 * 4 + 16 * entries_len
         # and the values start after the keys
         valuestart = keystart + len(ids)
         koffsets = []
         # The string table first has the list of keys, then the list of values.
         # Each entry has first the size of the string, then the file offset.
         for o1, l1, o2, l2 in offsets:
-            koffsets += [l1, o1+keystart]
-            voffsets += [l2, o2+valuestart]
+            koffsets += [l1, o1 + keystart]
+            voffsets += [l2, o2 + valuestart]
         offsets = koffsets + voffsets
         # check endianness for magic number
         if struct.pack('@h', 1) == struct.pack('<h', 1):
 
         output = struct.pack(
             "Iiiiiii",
-            magic_number,      # Magic number
-            0,                 # Version
-            entries_len,       # # of entries
-            7*4,               # start of key index
-            7*4+entries_len*8, # start of value index
-            0, keystart        # size and offset of hash table
-                               # Important: we don't use hash tables
+            # Magic number
+            magic_number,
+            # Version
+            0,
+            # number of entries
+            entries_len,
+            # start of key index
+            7 * 4,
+            # start of value index
+            7 * 4 + entries_len * 8,
+            # size and offset of hash table, we don't use hash tables
+            0, keystart
+
         )
         if PY3 and sys.version_info.minor > 1:  # python 3.2 or superior
             output += array.array("i", offsets).tobytes()
         if isinstance(mixed, text_type):
             mixed = mixed.encode(self.encoding)
         return mixed
-
 # }}}
 # class POFile {{{
 
+
 class POFile(_BaseFile):
     """
     Po (or Pot) file reader/writer.
         """
         Convenience method that returns the list of untranslated entries.
         """
-        return [e for e in self if not e.translated() and not e.obsolete \
+        return [e for e in self if not e.translated() and not e.obsolete
                 and not 'fuzzy' in e.flags]
 
     def fuzzy_entries(self):
         for entry in self:
             if entry.msgid not in refpot_msgids:
                 entry.obsolete = True
-
 # }}}
 # class MOFile {{{
 
+
 class MOFile(_BaseFile):
     """
     Mo file reader/writer.
     This class inherits the :class:`~polib._BaseFile` class and, by
     extension, the python ``list`` type.
     """
-    BIG_ENDIAN    = 0xde120495
+    BIG_ENDIAN = 0xde120495
     LITTLE_ENDIAN = 0x950412de
 
     def __init__(self, *args, **kwargs):
         """
-        Constructor, accepts all keywords arguments accepted by 
+        Constructor, accepts all keywords arguments accepted by
         :class:`~polib._BaseFile` class.
         """
         _BaseFile.__init__(self, *args, **kwargs)
         Convenience method to keep the same interface with POFile instances.
         """
         return []
-
 # }}}
 # class _BaseEntry {{{
 
+
 class _BaseEntry(object):
     """
     Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
         ret = []
         # write the msgctxt if any
         if self.msgctxt is not None:
-            ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
+            ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
+                                   wrapwidth)
         # write the msgid
         ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
         # write the msgid_plural if any
         if self.msgid_plural:
-            ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
+            ret += self._str_field("msgid_plural", delflag, "",
+                                   self.msgid_plural, wrapwidth)
         if self.msgstr_plural:
             # write the msgstr_plural if any
             msgstrs = self.msgstr_plural
             for index in keys:
                 msgstr = msgstrs[index]
                 plural_index = '[%s]' % index
-                ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
+                ret += self._str_field("msgstr", delflag, plural_index, msgstr,
+                                       wrapwidth)
         else:
             # otherwise write the msgstr
-            ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
+            ret += self._str_field("msgstr", delflag, "", self.msgstr,
+                                   wrapwidth)
         ret.append('')
         ret = u('\n').join(ret)
         return ret
             Returns the string representation of the entry.
             """
             return unicode(self).encode(self.encoding)
-    
+
     def __eq__(self, other):
         return str(self) == str(other)
 
-    def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
+    def _str_field(self, fieldname, delflag, plural_index, field,
+                   wrapwidth=78):
         lines = field.splitlines(True)
         if len(lines) > 1:
-            lines = [''] + lines # start with initial empty line
+            lines = [''] + lines  # start with initial empty line
         else:
             escaped_field = escape(field)
             specialchars_count = 0
             for c in ['\\', '\n', '\r', '\t', '"']:
                 specialchars_count += field.count(c)
-            # comparison must take into account fieldname length + one space 
+            # comparison must take into account fieldname length + one space
             # + 2 quotes (eg. msgid "<string>")
             flength = len(fieldname) + 3
             if plural_index:
                 # Wrap the line but take field name into account
                 lines = [''] + [unescape(item) for item in wrap(
                     escaped_field,
-                    wrapwidth - 2, # 2 for quotes ""
+                    wrapwidth - 2,  # 2 for quotes ""
                     drop_whitespace=False,
                     break_long_words=False
                 )]
             #import pdb; pdb.set_trace()
             ret.append('%s"%s"' % (delflag, escape(mstr)))
         return ret
-
 # }}}
 # class POEntry {{{
 
+
 class POEntry(_BaseEntry):
     """
     Represents a po file entry.
                     filelist.append(fpath)
             filestr = ' '.join(filelist)
             if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
-                # textwrap split words that contain hyphen, this is not 
-                # what we want for filenames, so the dirty hack is to 
-                # temporally replace hyphens with a char that a file cannot 
+                # textwrap split words that contain hyphen, this is not
+                # what we want for filenames, so the dirty hack is to
+                # temporally replace hyphens with a char that a file cannot
                 # contain, like "*"
                 ret += [l.replace('*', '-') for l in wrap(
                     filestr.replace('-', '*'),
             ret.append('#, %s' % ', '.join(self.flags))
 
         # previous context and previous msgid/msgid_plural
-        fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
+        fields = ['previous_msgctxt', 'previous_msgid',
+                  'previous_msgid_plural']
         for f in fields:
             val = getattr(self, f)
             if val:
                 else:
                     return -1
         # Finally: Compare message ID
-        if self.msgid > other.msgid: return 1
-        elif self.msgid < other.msgid: return -1
+        if self.msgid > other.msgid:
+            return 1
+        elif self.msgid < other.msgid:
+            return -1
         return 0
 
     def __gt__(self, other):
                     self.msgstr_plural[pos]
                 except KeyError:
                     self.msgstr_plural[pos] = ''
-
 # }}}
 # class MOEntry {{{
 
+
 class MOEntry(_BaseEntry):
     """
     Represents a mo file entry.
     """
     pass
-
 # }}}
 # class _POFileParser {{{
 
+
 class _POFileParser(object):
     """
     A finite state machine to parse efficiently and correctly po
         self.add('PP', all,                                              'PP')
         self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
                         'PP', 'MS', 'MX'],                               'CT')
-        self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC', 
+        self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
                  'PM', 'PP', 'MS', 'MX'],                                'MI')
         self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'],             'MP')
         self.add('MS', ['MI', 'MP', 'TC'],                               'MS')
             if tokens[0] in keywords and nb_tokens > 1:
                 line = line[len(tokens[0]):].lstrip()
                 if re.search(r'([^\\]|^)"', line[1:-1]):
-                    raise IOError('Syntax error in po file %s (line %s): '\
-                                  'unescaped double quote found' % \
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'unescaped double quote found' %
                                   (self.instance.fpath, i))
                 self.current_token = line
                 self.process(keywords[tokens[0]], i)
             elif line[:1] == '"':
                 # we are on a continuation line
                 if re.search(r'([^\\]|^)"', line[1:-1]):
-                    raise IOError('Syntax error in po file %s (line %s): '\
-                                  'unescaped double quote found' % \
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'unescaped double quote found' %
                                   (self.instance.fpath, i))
                 self.process('MC', i)
 
                 self.process('FL', i)
 
             elif tokens[0] == '#' or tokens[0].startswith('##'):
-                if line == '#': line += ' '
+                if line == '#':
+                    line += ' '
                 # we are on a translator comment line
                 self.process('TC', i)
 
 
             elif tokens[0] == '#|':
                 if nb_tokens <= 1:
-                    raise IOError('Syntax error in po file %s (line %s)' % \
+                    raise IOError('Syntax error in po file %s (line %s)' %
                                   (self.instance.fpath, i))
 
                 # Remove the marker and any whitespace right after that.
 
                 if nb_tokens == 2:
                     # Invalid continuation line.
-                    raise IOError('Syntax error in po file %s (line %s): '\
-                                  'invalid continuation line' % \
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'invalid continuation line' %
                                   (self.instance.fpath, i))
 
                 # we are on a "previous translation" comment line,
                 if tokens[1] not in prev_keywords:
                     # Unknown keyword in previous translation comment.
-                    raise IOError('Syntax error in po file %s (line %s): '\
-                                  'unknown keyword %s' % \
+                    raise IOError('Syntax error in po file %s (line %s): '
+                                  'unknown keyword %s' %
                                   (self.instance.fpath, i, tokens[1]))
 
                 # Remove the keyword and any whitespace
                 self.process(prev_keywords[tokens[1]], i)
 
             else:
-                raise IOError('Syntax error in po file %s (line %s)' % \
+                raise IOError('Syntax error in po file %s (line %s)' %
                               (self.instance.fpath, i))
 
         if self.current_entry:
             # since entries are added when another entry is found, we must add
             # the last entry here (only if there are lines)
             self.instance.append(self.current_entry)
-        # before returning the instance, check if there's metadata and if 
+        # before returning the instance, check if there's metadata and if
         # so extract it in a dict
         metadataentry = self.instance.find('')
-        if metadataentry: # metadata found
+        if metadataentry:  # metadata found
             # remove the entry
             self.instance.remove(metadataentry)
             self.instance.metadata_is_fuzzy = metadataentry.flags
                 try:
                     key, val = msg.split(':', 1)
                     self.instance.metadata[key] = val.strip()
-                except:
+                except (ValueError, KeyError):
                     if key is not None:
-                        self.instance.metadata[key] += '\n'+ msg.strip()
+                        self.instance.metadata[key] += '\n' + msg.strip()
         # close opened file
         if not isinstance(self.fhandle, list):  # must be file
             self.fhandle.close()
                 try:
                     fil, line = occurrence.split(':')
                     if not line.isdigit():
-                        fil  = fil + line
+                        fil = fil + line
                         line = ''
                     self.current_entry.occurrences.append((fil, line))
-                except:
+                except (ValueError, AttributeError):
                     self.current_entry.occurrences.append((occurrence, ''))
         return True
 
             self.current_entry.previous_msgctxt += token
         # don't change the current state
         return False
-
 # }}}
 # class _MOFileParser {{{
 
+
 class _MOFileParser(object):
     """
     A class to parse binary mo files.
 
             self.fhandle.seek(msgstrs_index[i][1])
             msgstr = self.fhandle.read(msgstrs_index[i][0])
-            if i == 0: # metadata
+            if i == 0:  # metadata
                 raw_metadata, metadata = msgstr.split(b('\n')), {}
                 for line in raw_metadata:
                     tokens = line.split(b(':'), 1)
                 entry = self._build_entry(
                     msgid=msgid_tokens[0],
                     msgid_plural=msgid_tokens[1],
-                    msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split(b('\0'))))
+                    msgstr_plural=dict((k, v) for k, v in
+                                       enumerate(msgstr.split(b('\0'))))
                 )
             else:
                 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
         if len(msgctxt_msgid) > 1:
             kwargs = {
                 'msgctxt': msgctxt_msgid[0].decode(encoding),
-                'msgid'  : msgctxt_msgid[1].decode(encoding),
+                'msgid': msgctxt_msgid[1].decode(encoding),
             }
         else:
             kwargs = {'msgid': msgid.decode(encoding)}
         if len(tup) == 1:
             return tup[0]
         return tup
-
 # }}}
 # class TextWrapper {{{
 
+
 class TextWrapper(textwrap.TextWrapper):
     """
     Subclass of textwrap.TextWrapper that backport the
     drop_whitespace option.
     """
     def __init__(self, *args, **kwargs):
-        drop_whitespace = kwargs.pop('drop_whitespace', True) 
+        drop_whitespace = kwargs.pop('drop_whitespace', True)
         textwrap.TextWrapper.__init__(self, *args, **kwargs)
         self.drop_whitespace = drop_whitespace
 
                 self._handle_long_word(chunks, cur_line, cur_len, width)
 
             # If the last chunk on this line is all whitespace, drop it.
-            if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
+            if self.drop_whitespace and cur_line and not cur_line[-1].strip():
                 del cur_line[-1]
 
             # Convert current line back to a string and store it in list
                 lines.append(indent + ''.join(cur_line))
 
         return lines
-
 # }}}
 # function wrap() {{{
 
+
 def wrap(text, width=70, **kwargs):
     """
     Wrap a single paragraph of text, returning a list of wrapped lines.
         return TextWrapper(width=width, **kwargs).wrap(text)
     return textwrap.wrap(text, width=width, **kwargs)
 
-#}}}
+# }}}
 [tox]
-envlist = py24, py25, py26, py27, py31, py32
+envlist = py25, py26, py27, py31, py32, py33
 
 [testenv]
 commands =