Source

wikify / wikify.py

"""
wikify - micro-framework for text wikification

goals - easy to extend and debug

operation (flat algorithm)
 for each region
   - find region in processed text
   - process region matched
   - exclude processed text from further processing

example - replace all wiki:something with HTML links
 [ ] wrap text into list with single item
 [ ] split text into three parts using regexp `wiki:\w+`
 [ ] copy 1st part (not-matched) into the resulting list
 [ ] replace matched part with link, insert (processed)
     into the resulting list
 [ ] process (the-rest) until text list doesn't change

 [ ] repeat the above for the rest of rules, skipping
     (processed) parts

 [ ] reassemble text from the list


notes - (flat algorithm) doesn't process nested markup,
        for example *`bold preformatted text`*
"""

__author__ = "anatoly techtonik <techtonik@gmail.com>"
__license__ = "MIT | Public Domain"
__version__ = "0.1"

# --- define rules ---

# rule is a function that takes text and returns either
# None (not mathed) or a list of three text items:
# [ not-matched, processed, the-rest ]

import re

def rule_linkify(text):
  """ replace text links with HTML links """
  linkre = re.compile('https?://\S+')
  # [ ] test with commas and other URL escaped symbols
  match = linkre.search(text)
  if match == None:
    return None
  return (text[:match.start()], "[link]", text[match.end():])

print rule_linkify("a web site http://google.com")

# [ ] sets of common rules
# [ ] configurable replacements
# [ ] indented prints after every step