Commits

Benoît Allard committed f7a138c

Fix unicode handling from mercurial strings

Previously, the decoded strings where not necessarly utf-8, but in the
local encoding. THis might have failed if the local encoding is not
"UTF-8"

The user name was also not decoded.

  • Participants
  • Parent commits 33016c8

Comments (0)

Files changed (1)

File blohg/hgapi/models.py

 import re
 import time
 
+from mercurial import encoding
+
 from datetime import datetime
 from werkzeug.utils import cached_property
 
 re_read_more = re.compile(r'\.\. +read_more')
 re_author = re.compile(r'^(?P<name>[^<]*[^ ])( ?<(?P<email>[^<]*)>)?$')
 
+def hg2u(s):
+    """ returns a unicode object representing the mercurial string """
+    return encoding.fromlocal(s).decode("utf-8")
 
 class Page(object):
     """Pages are the very basic content element of a blog. They don't have tags
 
         # get metadata variables from rst source
         for i in re_metadata.finditer(self._filecontent):
-            self._vars[i.group(1)] = i.group(2).decode('utf-8')
+            self._vars[i.group(1)] = hg2u(i.group(2))
 
         # handle aliases
         if 'aliases' in self._vars:
         # commiter of this content to the repository.
         if 'author' not in self._vars:
             try:
-                self._vars['author'] = str(first_changeset.user())
+                self._vars['author'] = hg2u(first_changeset.user())
             except:
                 del self._vars['author']
             else:
-                if self._vars['author'] == '':
+                if self._vars['author'] == u'':
                     try:
-                        self._vars['author'] = str(self._filectx.user())
+                        self._vars['author'] = hg2u(self._filectx.user())
                     except:
                         del self._vars['author']
 
 
     @cached_property
     def abstract(self):
-        return re_read_more.split(self._filecontent)[0].decode('utf-8')
+        return hg2u(re_read_more.split(self._filecontent)[0])
 
     @cached_property
     def abstract_html(self):
 
     @cached_property
     def full(self):
-        return self._filecontent.decode('utf-8')
+        return hg2u(self._filecontent)
 
     @cached_property
     def full_html(self):