Benoît Allard committed f7a138c

Fix unicode handling from mercurial strings

Previously, the decoded strings where not necessarly utf-8, but in the
local encoding. THis might have failed if the local encoding is not

The user name was also not decoded.

  • Participants
  • Parent commits 33016c8

Comments (0)

Files changed (1)

File blohg/hgapi/

 import re
 import time
+from mercurial import encoding
 from datetime import datetime
 from werkzeug.utils import cached_property
 re_read_more = re.compile(r'\.\. +read_more')
 re_author = re.compile(r'^(?P<name>[^<]*[^ ])( ?<(?P<email>[^<]*)>)?$')
+def hg2u(s):
+    """ returns a unicode object representing the mercurial string """
+    return encoding.fromlocal(s).decode("utf-8")
 class Page(object):
     """Pages are the very basic content element of a blog. They don't have tags
         # get metadata variables from rst source
         for i in re_metadata.finditer(self._filecontent):
-            self._vars[] ='utf-8')
+            self._vars[] = hg2u(
         # handle aliases
         if 'aliases' in self._vars:
         # commiter of this content to the repository.
         if 'author' not in self._vars:
-                self._vars['author'] = str(first_changeset.user())
+                self._vars['author'] = hg2u(first_changeset.user())
                 del self._vars['author']
-                if self._vars['author'] == '':
+                if self._vars['author'] == u'':
-                        self._vars['author'] = str(self._filectx.user())
+                        self._vars['author'] = hg2u(self._filectx.user())
                         del self._vars['author']
     def abstract(self):
-        return re_read_more.split(self._filecontent)[0].decode('utf-8')
+        return hg2u(re_read_more.split(self._filecontent)[0])
     def abstract_html(self):
     def full(self):
-        return self._filecontent.decode('utf-8')
+        return hg2u(self._filecontent)
     def full_html(self):