1. Olemis Lang
  2. bloodhound-t-h.o

Commits

jun...@7322e99d-02ea-0310-aa39-e9a107903beb  committed b87fa44

fixed #2252: autowikify works with CJK wiki name

  • Participants
  • Parent commits 21a409c
  • Branches default

Comments (0)

Files changed (3)

File autowikifyplugin/trunk/tracautowikify/autowikify.py

View file
  • Ignore whitespace
 # you should have received as part of this distribution.
 
 import re
+
 from genshi.builder import tag
 from trac.config import IntOption, ListOption
 from trac.core import Component, implements
 from trac.wiki.api import IWikiChangeListener, IWikiSyntaxProvider, WikiParser, WikiSystem
 
 
+def _get_breakable_pattern():
+    # From trac:source:branches/0.12-stable/trac/util/text.py, introduced in
+    # trac:r10539
+    _breakable_char_ranges = [
+        (0x1100, 0x11FF),   # Hangul Jamo
+        (0x2E80, 0x2EFF),   # CJK Radicals Supplement
+        (0x3000, 0x303F),   # CJK Symbols and Punctuation
+        (0x3040, 0x309F),   # Hiragana
+        (0x30A0, 0x30FF),   # Katakana
+        (0x3130, 0x318F),   # Hangul Compatibility Jamo
+        (0x3190, 0x319F),   # Kanbun
+        (0x31C0, 0x31EF),   # CJK Strokes
+        (0x3200, 0x32FF),   # Enclosed CJK Letters and Months
+        (0x3300, 0x33FF),   # CJK Compatibility
+        (0x3400, 0x4DBF),   # CJK Unified Ideographs Extension A
+        (0x4E00, 0x9FFF),   # CJK Unified Ideographs
+        (0xA960, 0xA97F),   # Hangul Jamo Extended-A
+        (0xAC00, 0xD7AF),   # Hangul Syllables
+        (0xD7B0, 0xD7FF),   # Hangul Jamo Extended-B
+        (0xF900, 0xFAFF),   # CJK Compatibility Ideographs
+        (0xFE30, 0xFE4F),   # CJK Compatibility Forms
+        (0xFF00, 0xFFEF),   # Halfwidth and Fullwidth Forms
+        (0x20000, 0x2FFFF), # Plane 2
+        (0x30000, 0x3FFFF), # Plane 3
+    ]
+    char_ranges = []
+    for val in _breakable_char_ranges:
+        try:
+            low = unichr(val[0])
+            high = unichr(val[1])
+            char_ranges.append(u'%s-%s' % (low, high))
+        except ValueError:
+            # Narrow build, `re` cannot use characters >= 0x10000
+            char_ranges.append(u'\\U%08x-\\U%08x' % (val[0], val[1]))
+    return u'[%s]' % u''.join(char_ranges)
+
+_breakable_pattern = _get_breakable_pattern()
+_breakable_re = re.compile(_breakable_pattern)
+_alnum_re = re.compile(r'\w', re.UNICODE)
+
+
 class AutoWikify(Component):
     """ Automatically create links for all known Wiki pages, even those that
     do not have CamelCase names. """
     ### IWikiSyntaxProvider methods
 
     def get_wiki_syntax(self):
-
-        pages_re = r'!?\b(?P<autowiki>' + \
-            '|'.join([re.escape(page) for page in self.pages]) + r')\b'
-
         def page_formatter(formatter, ns, match):
             page = match.group('autowiki')
             return tag.a(page, href=formatter.href.wiki(page), class_='wiki')
 
-        yield (pages_re, page_formatter)
+        pages_re = self._get_pages_re(self.pages)
+        if pages_re:
+            yield (pages_re, page_formatter)
 
     def get_link_resolvers(self):
         return []
     def _update_compiled_rules(self):
         # Force an update of cached WikiParser.rules
         WikiParser(self.env)._compiled_rules = None
+
+    def _get_pages_re(self, pages):
+        def is_boundary(c):
+            return _alnum_re.match(c) and not _breakable_re.match(c)
+        groups = [list() for i in xrange(4)]
+        for page in pages:
+            idx = (not is_boundary(page)) * 2 + (not is_boundary(page[-1])) * 1
+            groups[idx].append(re.escape(page))
+
+        fmts = [r'(?:\b|(?<=%(break)s))(?:%(names)s)(?=\b|%(break)s)',
+                r'(?:\b|(?<=%(break)s))(?:%(names)s)',
+                r'(?:%(names)s)(?=\b|%(break)s)',
+                r'(?:%(names)s)']
+        groups = [fmts[idx] % {'names': '|'.join(names),
+                               'break': _breakable_pattern}
+                  for idx, names in enumerate(groups) if names]
+        if groups:
+            return '!?(?P<autowiki>%s)' % '|'.join(groups)
+        else:
+            return None

File autowikifyplugin/trunk/tracautowikify/tests/__init__.py

View file
  • Ignore whitespace
+# -*- coding: utf-8 -*-
+
+import unittest
+
+from tracautowikify.tests import autowikify
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(autowikify.suite())
+    return suite
+
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')

File autowikifyplugin/trunk/tracautowikify/tests/autowikify.py

View file
  • Ignore whitespace
+# -*- coding: utf-8 -*-
+
+import unittest
+
+from genshi.core import Markup
+
+from trac.mimeview.api import Context
+from trac.test import EnvironmentStub, Mock, MockPerm
+from trac.web.href import Href
+from trac.wiki.formatter import format_to_oneliner
+from trac.wiki.model import WikiPage
+
+from tracautowikify.autowikify import AutoWikify
+
+
+class AutoWikifyTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.env = EnvironmentStub(enable=[AutoWikify])
+        self.req = Mock(
+            authname='anonymous', perm=MockPerm(), tz=None, args={},
+            href=Href('/'), abs_href=Href('http://www.example.com/'))
+        self.autowikify = AutoWikify(self.env)
+
+        for name in (u'autowikify', u'あいうName', u'Nameあいう',
+                     u'かきくけこ'):
+            page = WikiPage(self.env, name)
+            page.text = name
+            page.save('admin', '', '::1')
+        self.context = Context.from_request(self.req, WikiPage(self.env, name))
+
+    def tearDown(self):
+        self.env.reset_db()
+
+    def format_to_oneliner(self, wikidom):
+        return format_to_oneliner(self.env, self.context, wikidom)
+
+    def test_format(self):
+        self.assertEqual(
+            Markup(
+                u'autowikify - This plugin is '
+                u'<a class="wiki" href="/wiki/autowikify">autowikify</a>'
+                u'.'
+            ),
+            self.format_to_oneliner(u'!autowikify - This plugin is autowikify.'))
+
+    def test_format_cjk_name(self):
+        self.assertEqual(
+            Markup(
+                u'Wiki'
+                u'<a class="wiki" href="/wiki/%E3%81%82%E3%81%84%E3%81%86Name">あいうName</a>'
+                u'ABC'
+                u'<a class="wiki" href="/wiki/Name%E3%81%82%E3%81%84%E3%81%86">Nameあいう</a>'
+                u'A'
+                u'<a class="wiki" href="/wiki/%E3%81%8B%E3%81%8D%E3%81%8F%E3%81%91%E3%81%93">かきくけこ</a>'
+                u'abc'
+            ),
+            self.format_to_oneliner(
+                u'WikiあいうNameABCNameあいうAかきくけこabc'))
+        self.assertEqual(
+            Markup(
+                u'<a class="wiki" href="/wiki/%E3%81%82%E3%81%84%E3%81%86Name">あいうName</a>'
+                u'<a class="wiki" href="/wiki/%E3%81%8B%E3%81%8D%E3%81%8F%E3%81%91%E3%81%93">かきくけこ</a>'
+            ),
+            self.format_to_oneliner(
+                u'あいうNameかきくけこ'))
+        self.assertEqual(
+            Markup(
+                u'<a class="wiki" href="/wiki/%E3%81%8B%E3%81%8D%E3%81%8F%E3%81%91%E3%81%93">かきくけこ</a>'
+                u'<a class="wiki" href="/wiki/%E3%81%8B%E3%81%8D%E3%81%8F%E3%81%91%E3%81%93">かきくけこ</a>'
+            ),
+            self.format_to_oneliner(
+                u'かきくけこかきくけこ'))
+
+    def test_format_cjk_name_ucs4(self):
+        self.assertEqual(
+            Markup(
+                u'𠀋'
+                u'<a class="wiki" href="/wiki/%E3%81%82%E3%81%84%E3%81%86Name">あいうName</a>'
+                u'𠀋'
+                u'<a class="wiki" href="/wiki/Name%E3%81%82%E3%81%84%E3%81%86">Nameあいう</a>'
+                u'𠀋'
+                u'<a class="wiki" href="/wiki/%E3%81%8B%E3%81%8D%E3%81%8F%E3%81%91%E3%81%93">かきくけこ</a>'
+                u'𠀋'
+            ),
+            self.format_to_oneliner(
+                u'𠀋あいうName𠀋Nameあいう𠀋かきくけこ𠀋'))
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(AutoWikifyTestCase, 'test'))
+    return suite