Sergey Maranchuk avatar Sergey Maranchuk committed eb8c630

added regexp for checking URL and email addr.; fixed xss :|

Comments (0)

Files changed (1)

bbmarkup/__init__.py

 
 __all__ = ('BBCODE_RULES', 'bbcode')
 
+#regexp for url validation from django URLField + added ftp:// and allowing spaces around
+URL_RE = r'\s*((ftp|https?)://(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'\
+    'localhost|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?(?:/?|[/?]\S+))\s*'
+
+#regexp for email from django + allowing spaces around
+EMAIL_RE = r"""\s*(([-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*|^"""\
+        """([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*")"""\
+        """@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?)\s*"""
+
+
 def code_parser(matchobj):
     """
     Escaping bbcode and html tags between [code] tags.
 
 BBCODE_RULES = [
         {'pattern': r'\[code\](.*?)\[/code\]', 'repl': code_parser, 'sortkey': 100},
-        {'pattern': r'\[url\](.*?)\[/url\]', 'repl': r'<a href="\1">\1</a>'},
-        {'pattern': r'\[url=(.*?)\](.*?)\[/url\]', 'repl': r'<a href="\1">\2</a>'},
-        {'pattern': r'\[link\](.*?)\[/link\]', 'repl': r'<a href="\1">\1</a>'},
-        {'pattern': r'\[link=(.*?)\](.*?)\[/link\]', 'repl': r'<a href="\1">\2</a>'},
-        {'pattern': r'\[email\](.*?)\[/email\]', 'repl': r'<a href="mailto:\1">\1</a>'},
-        {'pattern': r'\[email=(.*?)\](.*?)\[/email\]', 'repl': r'<a href="mailto:\1">\2</a>'},
-        {'pattern': r'\[img\](.*?)\[/img\]', 'repl': r'<img src="\1">'},
-        {'pattern': r'\[img=(.*?)\](.*?)\[/img\]', 'repl': r'<img src="\1" alt="\2">'},
+        {'pattern': r'\[url\]%s\[/url\]' % URL_RE, 'repl': r'<a href="\1">\1</a>'},
+        {'pattern': r'\[url=%s\](.*?)\[/url\]' % URL_RE, 'repl': r'<a href="\1">\3</a>'},
+        {'pattern': r'\[link\]%s\[/link\]' % URL_RE, 'repl': r'<a href="\1">\1</a>'},
+        {'pattern': r'\[link=%s\](.*?)\[/link\]' % URL_RE, 'repl': r'<a href="\1">\3</a>'},
+        {'pattern': r'\[email\]%s\[/email\]' % EMAIL_RE, 'repl': r'<a href="mailto:\1">\1</a>'},
+        {'pattern': r'\[email=%s\](.*?)\[/email\]' % EMAIL_RE, 'repl': r'<a href="mailto:\1">\5</a>'},
+        {'pattern': r'\[img\]%s\[/img\]' % URL_RE, 'repl': r'<img src="\1">'},
+        {'pattern': r'\[img=%s\](.*?)\[/img\]' % URL_RE, 'repl': r'<img src="\1" alt="\3">'},
         {'pattern': r'\[color=([a-zA-Z]*|\#?[0-9a-fA-F]{6})\](.*?)\[/color\]', 'repl': r'<span style="color:\1">\2</span>'},
         {'pattern': r'\[b\](.*?)\[/b\]', 'repl': r'<strong>\1</strong>'},
         {'pattern': r'\[i\](.*?)\[/i\]', 'repl': r'<em>\1</em>'},
 
 BBCODE_RULES_COMPILED = []
 for bbset in (getattr(settings, 'BBMARKUP_CUSTOM_RULES', []) or BBCODE_RULES):
-    bbset['pattern'] = re.compile(bbset['pattern'], re.DOTALL)
+    bbset['pattern'] = re.compile(bbset['pattern'], re.DOTALL | re.IGNORECASE)
     bbset.setdefault('sortkey', 0)
     bbset.setdefault('nested', 0)
     BBCODE_RULES_COMPILED.append(bbset)
     u'<span style="color:#FAaF12">Lorem</span>'
     >>> bbcode('[color=#FAaF121]Lorem[/color]')
     u'[color=#FAaF121]Lorem[/color]'
-    
+    >>> bbcode('[url]http://slav0nic.org.ua[/url]]')
+    u'<a href="http://slav0nic.org.ua">http://slav0nic.org.ua</a>]'
+    >>> bbcode('[url]http://slav0nic.org.ua[/url]')
+    u'<a href="http://slav0nic.org.ua">http://slav0nic.org.ua</a>'
+    >>> bbcode('[url]  ftp://slav0nic.org.ua/test  [/url]')
+    u'<a href="ftp://slav0nic.org.ua/test">ftp://slav0nic.org.ua/test</a>'
+    >>> bbcode('[url]  http://slav0nic.org.ua:80/test/foo.py?s=bar#foo1[/url]')
+    u'<a href="http://slav0nic.org.ua:80/test/foo.py?s=bar#foo1">http://slav0nic.org.ua:80/test/foo.py?s=bar#foo1</a>'
+    >>> bbcode('[link=http://test.com/] test [/link]')
+    u'<a href="http://test.com/"> test </a>'
+    >>> bbcode('[url= http://test.com/  ] test [/url]')
+    u'<a href="http://test.com/"> test </a>'
+    >>> bbcode('[url= http://test.com/][/url]')
+    u'<a href="http://test.com/"></a>'
+    >>> bbcode('[img]  https://slav0nic.org.ua:80/test/logo.png [/img]')
+    u'<img src="https://slav0nic.org.ua:80/test/logo.png">'
+    >>> bbcode('[img]javascript:alert("XSS");[/img]')
+    u'[img]javascript:alert("XSS");[/img]'
+    >>> bbcode('''[email]blabla@test.com" onmouseover="alert('Hacked');[/email]''')
+    u'[email]blabla@test.com&quot; onmouseover=&quot;alert(&#39;Hacked&#39;);[/email]'
+    >>> bbcode('[email]  blabla@test.com   [/email]')
+    u'<a href="mailto:blabla@test.com">blabla@test.com</a>'
+    >>> bbcode('[email]blabla@test.com[/email]')
+    u'<a href="mailto:blabla@test.com">blabla@test.com</a>'
+    >>> bbcode('[email=  blabla@test.com  ] Blablasha :][/email]')
+    u'<a href="mailto:blabla@test.com"> Blablasha :]</a>'
     """
 
     value = escape(value)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.