Anonymous avatar Anonymous committed 384665d

make javascript optionnal and now render html in page.
fix wikilinks

Comments (0)

Files changed (13)

couchit/contrib/markdown.py

 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 """
 Python-Markdown
 ===============

couchit/contrib/markdown_extensions/codehilite.py

-#!/usr/bin/python
-
+# -*- coding: utf-8 -*-
 """
 CodeHilite Extension for Python-Markdown
 =======================================

couchit/contrib/markdown_extensions/footnotes.py

+# -*- coding: utf-8 -*-
 """
 ========================= FOOTNOTES =================================
 

couchit/contrib/markdown_extensions/wikilinks.py

-#!/usr/bin/env python
-
+# -*- coding: utf-8 -*-
 '''
 WikiLinks Extension for Python-Markdown
 ======================================
 * [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
 '''
 
+import codecs
+import re
+from werkzeug.utils import url_quote
 from couchit.contrib import markdown
+from couchit.utils import CouchitUnicodeDecodeError, force_unicode, smart_str, utf8
+
+re_page = re.compile(r'[^\w^\s^-]', re.U)
 
 class WikiLinkExtension (markdown.Extension) :
     def __init__(self, configs):
         if m.group(2).strip():
             base_url, end_url, html_class = self._getMeta()
             label = m.group(2).strip()
-            url = '%s%s%s'% (base_url, label.replace(' ', '_'), end_url)
+            label = re_page.sub("", label)
+            url = '%s%s%s'% (base_url, label, end_url)
             a = markdown.etree.Element('a')
             a.text = markdown.AtomicString(label)
             a.set('href', url)
                 a.set('class', html_class)
         else:
             a = ''
+        
         return a
 
     def _getMeta(self):

couchit/models.py

     claimed = BooleanField(default=False)
     created = DateTimeField()
     updated = DateTimeField()
+    allow_javascript = BooleanField(default=False)
     default_theme = BooleanField(default=True)
     theme = DictField(Schema.build(
         background_color = TextField(default='E7E7E7'),

couchit/template.py

 from couchit import settings
 from couchit.http import BCResponse, BCRequest
 from couchit.contrib import markdown
-
- 
-from couchit.utils import local, datetime_tojson, datetimestr_topython
+from couchit.utils import local, datetime_tojson, datetimestr_topython,force_unicode, smart_str
+from couchit.utils.html import sanitize_html
 
 template_env = Environment(loader=FileSystemLoader(settings.TEMPLATES_PATH))
 template_env.charset = 'utf-8'
 template_env.globals['DEBUG'] = settings.DEBUG
 template_env.filters['rfc3339'] = datetime_tojson
 
-def convert_markdown(value):
+re_script = re.compile("\"\'][\s]*javascript:(.*)[\"\']/g")
+
+def convert_markdown(value, javascript=False):
     if local.site_url:
         base_url = local.site_url + '/'
     else:
                                         ('html_class', ''),
                                         ('end_url', '') ]}
     )
+    _parsed_value = sanitize_html(md.convert(value), javascript=javascript)
+    _parsed_value = force_unicode(_parsed_value)
+    return _parsed_value
     
-    return md.convert(value)
 template_env.filters['markdown'] = convert_markdown
 
 
-
-
 def format_datetime(value):
     value = datetimestr_topython(value)
     return value.strftime("%a %b %d %Y at %H:%M")

couchit/utils/__init__.py

     def subclass_exception(name, parent, module):
         return type(name, (parent,), {'__module__': module})
 
+class CouchitUnicodeDecodeError(Exception):
+    """ raised when unicode error"""
+
+
     
 def slugify(value):
     import unicodedata
         return s.encode('utf-8', 'strict')
     else:
         return s
+        
+def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+    """
+    Similar to smart_unicode, except that lazy instances are resolved to
+    strings, rather than kept as lazy objects.
+
+    If strings_only is True, don't convert (some) non-string-like objects.
+    """
+    if strings_only and isinstance(s, (types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float)):
+        return s
+    try:
+        if not isinstance(s, basestring,):
+            if hasattr(s, '__unicode__'):
+                s = unicode(s)
+            else:
+                try:
+                    s = unicode(str(s), encoding, errors)
+                except UnicodeEncodeError:
+                    if not isinstance(s, Exception):
+                        raise
+                    # If we get to here, the caller has passed in an Exception
+                    # subclass populated with non-ASCII data without special
+                    # handling to display as a string. We need to handle this
+                    # without raising a further exception. We do an
+                    # approximation to what the Exception's standard str()
+                    # output should be.
+                    s = ' '.join([force_unicode(arg, encoding, strings_only,
+                            errors) for arg in s])
+        elif not isinstance(s, unicode):
+            # Note: We use .decode() here, instead of unicode(s, encoding,
+            # errors), so that if s is a SafeString, it ends up being a
+            # SafeUnicode at the end.
+            s = s.decode(encoding, errors)
+    except UnicodeDecodeError, e:
+        raise CouchitUnicodeDecodeError(s, *e.args)
+    return s
+
+def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
+    """
+    Returns a bytestring version of 's', encoded as specified in 'encoding'.
+
+    If strings_only is True, don't convert (some) non-string-like objects.
+    """
+    if strings_only and isinstance(s, (types.NoneType, int)):
+        return s
+   
+    if not isinstance(s, basestring):
+        try:
+            return str(s)
+        except UnicodeEncodeError:
+            if isinstance(s, Exception):
+                # An Exception subclass containing non-ASCII data that doesn't
+                # know how to print itself properly. We shouldn't raise a
+                # further exception.
+                return ' '.join([smart_str(arg, encoding, strings_only,
+                        errors) for arg in s])
+            return unicode(s).encode(encoding, errors)
+    elif isinstance(s, unicode):
+        return s.encode(encoding, errors)
+    elif s and encoding != 'utf-8':
+        return s.decode('utf-8', errors).encode(encoding, errors)
+    else:
+        return s
 
 def get_tag_name_list(tag_names):
     """

couchit/utils/html.py

+# -*- coding: utf-8 -
+# Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
+#
+# from FeedParser <http://code.google.com/p/feedparser> 
+# with some adaptation for couchit to remove or not javascript
+#
+import re
+import sys
+import sgmllib
+
+_debug = 0
+
+class _BaseHTMLProcessor(sgmllib.SGMLParser):
+    elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
+      'img', 'input', 'isindex', 'link', 'meta', 'param']
+    
+    def __init__(self, encoding):
+        self.encoding = encoding
+        if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
+        sgmllib.SGMLParser.__init__(self)
+        
+    def reset(self):
+        self.pieces = []
+        sgmllib.SGMLParser.reset(self)
+
+    def _shorttag_replace(self, match):
+        tag = match.group(1)
+        if tag in self.elements_no_end_tag:
+            return '<' + tag + ' />'
+        else:
+            return '<' + tag + '></' + tag + '>'
+        
+    def feed(self, data):
+        data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) 
+        data = data.replace('&#39;', "'")
+        data = data.replace('&#34;', '"')
+        if self.encoding and type(data) == type(u''):
+            data = data.encode(self.encoding)
+        sgmllib.SGMLParser.feed(self, data)
+
+    def normalize_attrs(self, attrs):
+        # utility method to be called by descendants
+        attrs = [(k.lower(), v) for k, v in attrs]
+        attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
+        return attrs
+
+    def unknown_starttag(self, tag, attrs):
+        # called for each start tag
+        # attrs is a list of (attr, value) tuples
+        # e.g. for <pre class='screen'>, tag='pre', attrs=[('class', 'screen')]
+        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
+        uattrs = []
+        # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
+        for key, value in attrs:
+            if type(value) != type(u''):
+                value = unicode(value, self.encoding)
+            uattrs.append((unicode(key, self.encoding), value))
+        strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
+        if tag in self.elements_no_end_tag:
+            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
+        else:
+            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
+
+    def unknown_endtag(self, tag):
+        # called for each end tag, e.g. for </pre>, tag will be 'pre'
+        # Reconstruct the original end tag.
+        if tag not in self.elements_no_end_tag:
+            self.pieces.append("</%(tag)s>" % locals())
+
+    def handle_charref(self, ref):
+        # called for each character reference, e.g. for '&#160;', ref will be '160'
+        # Reconstruct the original character reference.
+        self.pieces.append('&#%(ref)s;' % locals())
+        
+    def handle_entityref(self, ref):
+        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
+        # Reconstruct the original entity reference.
+        self.pieces.append('&%(ref)s;' % locals())
+
+    def handle_data(self, text):
+        # called for each block of plain text, i.e. outside of any tag and
+        # not containing any character or entity references
+        # Store the original text verbatim.
+        if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text)
+        self.pieces.append(text)
+        
+    def handle_comment(self, text):
+        # called for each HTML comment, e.g. <!-- insert Javascript code here -->
+        # Reconstruct the original comment.
+        self.pieces.append('<!--%(text)s-->' % locals())
+        
+    def handle_pi(self, text):
+        # called for each processing instruction, e.g. <?instruction>
+        # Reconstruct original processing instruction.
+        self.pieces.append('<?%(text)s>' % locals())
+
+    def handle_decl(self, text):
+        # called for the DOCTYPE, if present, e.g.
+        # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        #     "http://www.w3.org/TR/html4/loose.dtd">
+        # Reconstruct original DOCTYPE
+        self.pieces.append('<!%(text)s>' % locals())
+        
+    _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match
+    def _scan_name(self, i, declstartpos):
+        rawdata = self.rawdata
+        n = len(rawdata)
+        if i == n:
+            return None, -1
+        m = self._new_declname_match(rawdata, i)
+        if m:
+            s = m.group()
+            name = s.strip()
+            if (i + len(s)) == n:
+                return None, -1  # end of buffer
+            return name.lower(), m.end()
+        else:
+            self.handle_data(rawdata)
+#            self.updatepos(declstartpos, i)
+            return None, -1
+
+    def output(self):
+        '''Return processed HTML as a single string'''
+        return ''.join([str(p) for p in self.pieces])
+
+
+class _HTMLSanitizer(_BaseHTMLProcessor):
+    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
+      'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col',
+      'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset',
+      'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
+      'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
+      'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike',
+      'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th',
+      'thead', 'tr', 'tt', 'u', 'ul', 'var']
+
+    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
+      'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing',
+      'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols',
+      'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled',
+      'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace',
+      'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
+      'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
+      'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
+      'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type',
+      'usemap', 'valign', 'value', 'vspace', 'width']
+
+    unacceptable_elements_with_end_tag = ['script', 'applet']
+    
+    def __init__(self, encoding, javascript=False):
+        _BaseHTMLProcessor.__init__(self, encoding)
+        
+        if javascript:
+            self.unacceptable_elements_with_end_tag = ['applet']
+            self.acceptable_elements.append('script')
+        else: # reset
+            unacceptable_elements_with_end_tag = ['script', 'applet']
+            if 'script' in self.acceptable_elements:
+                del self.acceptable_elements[self.acceptable_elements.index('script')]
+
+    def reset(self):
+        _BaseHTMLProcessor.reset(self)
+        self.unacceptablestack = 0
+        
+    def unknown_starttag(self, tag, attrs):
+        if not tag in self.acceptable_elements:
+            if tag in self.unacceptable_elements_with_end_tag:
+                self.unacceptablestack += 1
+            return
+        attrs = self.normalize_attrs(attrs)
+        attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes]
+        _BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
+        
+    def unknown_endtag(self, tag):
+        if not tag in self.acceptable_elements:
+            if tag in self.unacceptable_elements_with_end_tag:
+                self.unacceptablestack -= 1
+            return
+        _BaseHTMLProcessor.unknown_endtag(self, tag)
+
+    def handle_pi(self, text):
+        pass
+
+    def handle_decl(self, text):
+        pass
+
+    def handle_data(self, text):
+        if not self.unacceptablestack:
+            _BaseHTMLProcessor.handle_data(self, text)
+
+def sanitize_html(htmlSource, encoding='utf-8', javascript=False):
+    p = _HTMLSanitizer(encoding=encoding, javascript=javascript)
+    p.feed(htmlSource)
+    data = p.output()
+    
+    data = data.strip().replace('\r\n', '\n')
+    return data
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from datetime import datetime
 import random
 import re
 from time import asctime, gmtime, time
             site=request.site.id,
             title=pagename.replace("_", " ")
         )
-    
+
     # get all pages
     pages = all_pages(local.db, request.site.id)
     
-    
-   
-    return render_response('page/show.html', page=page, pages=pages, 
+    response = render_response('page/show.html', page=page, pages=pages, 
         lexers=LEXERS_CHOICE, redirect_from=redirect_from)
+        
+    need_update = request.session.get('need_update', False)
+    if need_update:
+        response.headers['Cache-Control'] = 'public'
+        response.headers['Expires'] = asctime(gmtime(time() - 3600))
+        del request.session['need_update']
+    return response
 
 @can_edit
 def edit_page(request, pagename=None):
 def site_settings(request):
     if request.is_xhr and request.method == "POST":
         data = json.loads(request.data)
+        allow_javascript = data.get('allow_javascript', False) and True or False
+        if request.site.allow_javascript != allow_javascript:
+            request.session['need_update'] = True
+        
         site = get_site(local.db, request.site.cname)
         site.title = data.get('title', site.title)
         site.subtitle = data.get('subtitle', site.subtitle)
         site.email = data.get('email', site.email)
         site.privacy = data.get('privacy', site.privacy)
+        site.allow_javascript = allow_javascript
         site.store(local.db)
         request.site = site
         return send_json({ 'ok': True })

static/css/src/layout.css

     margin: 0 auto 0 auto;
     width: 550px;
     text-align: center;
+}
+
+.tableSettings th {
+    width: 120px;
+    vertical-align: middle;
 }

static/js/src/application.js

                'title': $('site_title').getValue(),
                'subtitle': $('site_subtitle').getValue(),
                'email': $('email').getValue(),
-               'privacy': $$('input:checked[type="radio"][name="privacy"]').pluck('value')[0]
+               'privacy': $$('input:checked[type="radio"][name="privacy"]').pluck('value')[0],
+               'allow_javascript': $('allow_javascript').getValue()
            }
        } catch(e) {
            data = {

templates/page/show.html

             </ul>
         </header>
         {% endif %}
-        {{ page.content|markdown }}
+        {{ page.content|markdown(site.allow_javascript) }}
         <footer>
             
         </footer>

templates/site/settings.html

 <section id="psettings">
     <form name="fsettings" id="fsettings" action="{{ url_for('site_settings') }}" method="post">
         <h2>Settings</h2>
-        <table class="">
+        <table class="tableSettings">
             <tr>
-                <th>Title</th>
+                <th>Title:</th>
                 <td><input type="text" name="site_title" id="site_title" value="{{ site.title }}" /></td>
                 <td>Appears at the top of every page</td>
             </tr>
             <tr>
-                <th>Subtitle</th>
+                <th>Subtitle:</th>
                 <td><input type="text" name="site_subtitle" id="site_subtitle" value="{{ site.subtitle }}" /></td>
                 <td>Appears below the title</td>
             </tr>
             <tr>
-                <th>Site address</th>
+                <th>Site address:</th>
                 <td>{% if site_address %}<strong>{{ site_address }}</strong> (<a href="{{ site_url }}/site/change-site-address">change</a>){% else %}<a href="{{ site_url }}/site/change-site-address">Give your site an address</a>{% endif %}</td>
                 <td>{% if not site_address %}Give your website an address that's easy to remembe{% endif %}</td>
             </tr>
             {% if site.claimed %}
                 <tr>
-                    <th>Password</th>
+                    <th>Password:</th>
                     <td><a href="{{ site_url }}/site/change-password">Change password</a></td>
                     <td></td>
                 </tr>
                     </table>
                 </td>
             </tr>
+            <tr>
+                <th>Allow javascript:</th>
+                <td><input type="checkbox" name="allow_javascript" id="allow_javascript" value="1"{% if site.allow_javascript %} checked="checked"{% endif %} /></td>
+                <td>Allow users to include javascript in pages</td>
             {% endif %}
         </table>
     </form>
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.