moin-2.0 / MoinMoin / wikiutil.py

# Copyright: 2000-2004 Juergen Hermann <jh@web.de>
# Copyright: 2004 by Florian Festi
# Copyright: 2006 by Mikko Virkkil
# Copyright: 2005-2010 MoinMoin:ThomasWaldmann
# Copyright: 2007 MoinMoin:ReimarBauer
# Copyright: 2008 MoinMoin:ChristopherDenter
# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.

"""
    MoinMoin - Wiki Utility Functions
"""


from __future__ import absolute_import, division

import os
import re
import time

from MoinMoin import log
logging = log.getLogger(__name__)

from flask import current_app as app
from flask import g as flaskg
from flask import request

from MoinMoin import config
from MoinMoin.config import IS_SYSITEM

from MoinMoin.i18n import _, L_, N_
from MoinMoin.util import pysupport, lock
from MoinMoin.util.mimetype import MimeType
from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError

import werkzeug

# constants for page names
PARENT_PREFIX = "../"
PARENT_PREFIX_LEN = len(PARENT_PREFIX)
CHILD_PREFIX = "/"
CHILD_PREFIX_LEN = len(CHILD_PREFIX)

#############################################################################
### Data validation / cleanup
#############################################################################

# TODO: use similar code in a flatland validator
def clean_input(text, max_len=201):
    """ Clean input:
        replace CR, LF, TAB by whitespace
        delete control chars

        :param text: unicode text to clean (if we get str, we decode)
        :rtype: unicode
        :returns: cleaned text
    """
    # we only have input fields with max 200 chars, but spammers send us more
    length = len(text)
    if length == 0 or length > max_len:
        return u''
    else:
        if isinstance(text, str):
            # the translate() below can ONLY process unicode, thus, if we get
            # str, we try to decode it using the usual coding:
            text = text.decode(config.charset)
        return text.translate(config.clean_input_translation_map)


# TODO: use similar code in a flatland validator
def normalize_pagename(name, cfg):
    """ Normalize page name

    Prevent creating page names with invisible characters or funny
    whitespace that might confuse the users or abuse the wiki, or
    just does not make sense.

    Restrict even more group pages, so they can be used inside acl lines.

    :param name: page name, unicode
    :rtype: unicode
    :returns: decoded and sanitized page name
    """
    # Strip invalid characters
    name = config.page_invalid_chars_regex.sub(u'', name)

    # Split to pages and normalize each one
    pages = name.split(u'/')
    normalized = []
    for page in pages:
        # Ignore empty or whitespace only pages
        if not page or page.isspace():
            continue

        # Cleanup group pages.
        # Strip non alpha numeric characters, keep white space
        if isGroupItem(page):
            page = u''.join([c for c in page
                             if c.isalnum() or c.isspace()])

        # Normalize white space. Each name can contain multiple
        # words separated with only one space. Split handle all
        # 30 unicode spaces (isspace() == True)
        page = u' '.join(page.split())

        normalized.append(page)

    # Assemble components into full pagename
    name = u'/'.join(normalized)
    return name


#############################################################################
### Item types / Item names
#############################################################################

def isSystemItem(itemname):
    """ Is this a system page?

    :param itemname: the item name
    :rtype: bool
    :returns: True if page is a system item
    """
    try:
        item = flaskg.storage.get_item(itemname)
        return item.get_revision(-1)[IS_SYSITEM]
    except (NoSuchItemError, NoSuchRevisionError, KeyError):
        pass

    return False


def isGroupItem(itemname):
    """ Is this a name of group item?

    :param itemname: the item name
    :rtype: bool
    :returns: True if item is a group item
    """
    return app.cfg.cache.item_group_regexact.search(itemname) is not None


def AbsItemName(context, itemname):
    """
    Return the absolute item name for a (possibly) relative item name.

    :param context: name of the item where "itemname" appears on
    :param itemname: the (possibly relative) item name
    :rtype: unicode
    :returns: the absolute item name
    """
    if itemname.startswith(PARENT_PREFIX):
        while context and itemname.startswith(PARENT_PREFIX):
            context = '/'.join(context.split('/')[:-1])
            itemname = itemname[PARENT_PREFIX_LEN:]
        itemname = '/'.join(filter(None, [context, itemname, ]))
    elif itemname.startswith(CHILD_PREFIX):
        if context:
            itemname = context + '/' + itemname[CHILD_PREFIX_LEN:]
        else:
            itemname = itemname[CHILD_PREFIX_LEN:]
    return itemname

def RelItemName(context, itemname):
    """
    Return the relative item name for some context.

    :param context: name of the item where "itemname" appears on
    :param itemname: the absolute item name
    :rtype: unicode
    :returns: the relative item name
    """
    if context == '':
        # special case, context is some "virtual root" item with name == ''
        # every item is a subitem of this virtual root
        return CHILD_PREFIX + itemname
    elif itemname.startswith(context + CHILD_PREFIX):
        # simple child
        return itemname[len(context):]
    else:
        # some kind of sister/aunt
        context_frags = context.split('/')   # A, B, C, D, E
        itemname_frags = itemname.split('/') # A, B, C, F
        # first throw away common parents:
        common = 0
        for cf, pf in zip(context_frags, itemname_frags):
            if cf == pf:
                common += 1
            else:
                break
        context_frags = context_frags[common:] # D, E
        itemname_frags = itemname_frags[common:] # F
        go_up = len(context_frags)
        return PARENT_PREFIX * go_up + '/'.join(itemname_frags)


def ParentItemName(itemname):
    """
    Return the parent item name.

    :param itemname: the absolute item name (unicode)
    :rtype: unicode
    :returns: the parent item name (or empty string for toplevel items)
    """
    if itemname:
        pos = itemname.rfind('/')
        if pos > 0:
            return itemname[:pos]
    return u''


#############################################################################
### Misc
#############################################################################

def drawing2fname(drawing):
    config.drawing_extensions = ['.tdraw', '.adraw',
                                 '.svg',
                                 '.png', '.jpg', '.jpeg', '.gif',
                                ]
    fname, ext = os.path.splitext(drawing)
    # note: do not just check for empty extension or stuff like drawing:foo.bar
    # will fail, instead of being expanded to foo.bar.tdraw
    if ext not in config.drawing_extensions:
        # for backwards compatibility, twikidraw is the default:
        drawing += '.tdraw'
    return drawing


def getUnicodeIndexGroup(name):
    """
    Return a group letter for `name`, which must be a unicode string.
    Currently supported: Hangul Syllables (U+AC00 - U+D7AF)

    :param name: a string
    :rtype: string
    :returns: group letter or None
    """
    c = name[0]
    if u'\uAC00' <= c <= u'\uD7AF': # Hangul Syllables
        return unichr(0xac00 + (int(ord(c) - 0xac00) / 588) * 588)
    else:
        return c.upper() # we put lower and upper case words into the same index group


def is_URL(arg, schemas=config.url_schemas):
    """ Return True if arg is a URL (with a schema given in the schemas list).

        Note: there are not that many requirements for generic URLs, basically
        the only mandatory requirement is the ':' between schema and rest.
        Schema itself could be anything, also the rest (but we only support some
        schemas, as given in config.url_schemas, so it is a bit less ambiguous).
    """
    if ':' not in arg:
        return False
    for schema in schemas:
        if arg.startswith(schema + ':'):
            return True
    return False


def containsConflictMarker(text):
    """ Returns true if there is a conflict marker in the text. """
    return "/!\\ '''Edit conflict" in text

def anchor_name_from_text(text):
    """
    Generate an anchor name from the given text.
    This function generates valid HTML IDs matching: [A-Za-z][A-Za-z0-9:_.-]*

    Note: this transformation has a special feature: when you feed it with a
    valid ID/name, it will return it without modification (identity
    transformation).
    """
    quoted = werkzeug.url_quote_plus(text, charset='utf-7', safe=':')
    res = quoted.replace('%', '.').replace('+', '_')
    if not res[:1].isalpha():
        return 'A%s' % res
    return res

def split_anchor(pagename):
    """
    Split a pagename that (optionally) has an anchor into the real pagename
    and the anchor part. If there is no anchor, it returns an empty string
    for the anchor.

    Note: if pagename contains a # (as part of the pagename, not as anchor),
          you can use a trick to make it work nevertheless: just append a
          # at the end:
          "C##" returns ("C#", "")
          "Problem #1#" returns ("Problem #1", "")

    TODO: We shouldn't deal with composite pagename#anchor strings, but keep
          it separate.
          Current approach: [[pagename#anchor|label|attr=val,&qarg=qval]]
          Future approach:  [[pagename|label|attr=val,&qarg=qval,#anchor]]
          The future approach will avoid problems when there is a # in the
          pagename part (and no anchor). Also, we need to append #anchor
          at the END of the generated URL (AFTER the query string).
    """
    parts = pagename.rsplit('#', 1)
    if len(parts) == 2:
        return parts
    else:
        return pagename, ""


def get_hostname(addr):
    """
    Looks up the DNS hostname for some IP address.

    :param addr: IP address to look up (str)
    :returns: host dns name (unicode) or
              None (if lookup is disallowed or failed)
    """
    if app.cfg.log_reverse_dns_lookups:
        import socket
        try:
            return unicode(socket.gethostbyaddr(addr)[0], config.charset)
        except (socket.error, UnicodeError):
            pass


def file_headers(filename=None, content_type=None, content_length=None):
        """
        Compute http headers for sending a file

        :param filename: filename for autodetecting content_type (unicode, default: None)
        :param content_type: content-type header value (str, default: autodetect from filename)
        :param content_length: for content-length header (int, default:None)
        """
        if filename:
            # make sure we just have a simple filename (without path)
            filename = os.path.basename(filename)
            mt = MimeType(filename=filename)
        else:
            mt = None

        if content_type is None:
            if mt is not None:
                content_type = mt.content_type()
            else:
                content_type = 'application/octet-stream'
        else:
            mt = MimeType(mimestr=content_type)

        headers = [('Content-Type', content_type)]
        if content_length is not None:
            headers.append(('Content-Length', str(content_length)))
        return headers
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.