Christian Boos avatar Christian Boos committed 2edf2a2

0.11.7dev: more straightforward regular expression for detecting CamelCase, taking directly into account the full list of upper and lower case unicode characters instead of playing tricks with negative look-behinds...

The previous support for unicode (#230) used a regexp which could lead to some pathological run-time in presence of unicode words that didn't contain any alphanumerical characters.

The runtime is now a tad bit slower and the WikiPageNames now also accept names like Page/Sub (which can be seen as a good thing), but the pathological behavior is gone.

Fixes #9025.

Comments (0)

Files changed (2)

     # here adapted to exclude terminal "." and ":" characters
     PAGE_SPLIT_RE = re.compile(r"([a-z])([A-Z])(?=[a-z])")
+    Lu = ''.join([unichr(c) for c in range(0, 0x10000) if unichr(c).isupper()])
+    Ll = ''.join([unichr(c) for c in range(0, 0x10000) if unichr(c).islower()])
     def format_page_name(self, page, split=False):
         if split or self.split_page_names:
         return page
     def get_wiki_syntax(self):
-        from import Formatter
-        lower = r'(?<![A-Z0-9_])' # No Upper case when looking behind
-        upper = r'(?<![a-z0-9_])' # No Lower case when looking behind
         wiki_page_name = (
-            r"\w%s(?:\w%s)+(?:\w%s(?:\w%s)*[\w/]%s)+" % # wiki words
-            (upper, lower, upper, lower, lower) +
-            r"(?:@\d+)?" # optional version
-            r"(?:#%s)?" % self.XML_NAME + # optional fragment id
-            r"(?=:(?:\Z|\s)|[^:a-zA-Z]|\s|\Z)" # what should follow it
-            )
+            r"(?:[%(upper)s](?:[%(lower)s])+/?){2,}" # wiki words
+            r"(?:@\d+)?"                             # optional version
+            r"(?:#%(xml)s)?"                         # optional fragment id
+            r"(?=:(?:\Z|\s)|[^:%(upper)s%(lower)s]|\s|\Z)"
+            # what should follow it
+            % {'upper': self.Lu, 'lower': self.Ll, 'xml': self.XML_NAME})
         # Regular WikiPageNames
         def wikipagename_link(formatter, match, fullmatch):
-            if not _check_unicode_camelcase(match):
-                return match
             return self._format_link(formatter, 'wiki', match,
                                      self.ignore_missing_pages, match)
         def wikipagename_with_label_link(formatter, match, fullmatch):
             page ='wiki_page')
             label ='wiki_label')
-            if not _check_unicode_camelcase(page):
-                return label
             return self._format_link(formatter, 'wiki', page, label.strip(),
                                      self.ignore_missing_pages, match)
         yield (r"!?\[(?P<wiki_page>%s)\s+(?P<wiki_label>%s|[^\]]+)\]"
         'Wiki Start'
         return self.format_page_name(
-def _check_unicode_camelcase(pagename):
-    """A camelcase word must have at least 2 humps (well...)
-    >>> _check_unicode_camelcase(u"\xc9l\xe9phant")
-    False
-    >>> _check_unicode_camelcase(u"\xc9l\xe9Phant")
-    True
-    >>> _check_unicode_camelcase(u"\xe9l\xe9Phant")
-    False
-    >>> _check_unicode_camelcase(u"\xc9l\xe9PhanT")
-    False
-    """
-    if not pagename[0].isupper():
-        return False
-    pagename = pagename.split('@', 1)[0].split('#', 1)[0]
-    if not pagename[-1].islower():
-        return False
-    humps = 0
-    for i in xrange(1, len(pagename)):
-        if pagename[i-1].isupper():
-            if pagename[i].islower():
-                humps += 1
-            else:
-                return False
-    return humps > 1


 ============================== WikiPageNames counter examples (paths)
 /absolute/path/is/NotWiki and relative/path/is/NotWiki
 /ThisIsNotWikiEither and /ThisIs/NotWikiEither but ThisIs/SubWiki
+and now This/Also.
 /absolute/path/is/NotWiki and relative/path/is/NotWiki
 /ThisIsNotWikiEither and /ThisIs/NotWikiEither but <a class="missing wiki" href="/wiki/ThisIs/SubWiki" rel="nofollow">ThisIs/SubWiki?</a>
+and now <a class="missing wiki" href="/wiki/This/Also" rel="nofollow">This/Also?</a>.
 ============================== WikiPageNames counter examples (numbers)
 Småbokstaver should not produce a link
 neither should AbAbÅ nor AbAbÅÅb
+============================== not a WikiPageNames at all (#9025 regression)
 ============================== MoinMoin style forced links
 This is a ["Wiki"] page link.
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.