Source

babwikiserver / wikiserver.py

#!/usr/bin/env python
# encoding: utf-8

"""A simple wikiserver in python which uses Mercurial as backend for revision data. 

It uses the pymarkdown_minisite module for parsing markdown content to static sites and 
provides an interface to edit the pages. 

Usage: 
	- wikserver.py [options] [path to the site folder]
	  start the server

Options: 
	None yet. 

Programming notes: 
	- If we add options, we have to rewrite the argument parsing to first strip all 
options before we check for a site folder path, because the site folder isn't required. 
"""

### Imports

# The basic HTTP Server from Python. 
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
# And markdown for parsing the sites
from markdown import Markdown
# Also we need to be able to serve files relative to a source dir. 
from os.path import join, isfile, isdir, exists
# Also get the filename (basename) and the parent directory (dirname)
from os.path import basename, dirname, curdir, abspath, normpath
# and creating new dirs
from os import makedirs
# And the arguments this script was called with
from sys import argv
# Also we need to be able to undo urlencoding of webforms
from urllib import unquote_plus as unquote
from urllib import quote
# And open unicode files
from codecs import open as codecs_open
# And process POST requests
import cgi

# And we need to be able to call parse_and_list_markdown_files.py
from subprocess import call


### Config ###

# If we got a sites argument, set it. 
if argv[1:]: 
    BASE_PATH = argv[-1]
else: # use the path from where this script got called + /wiki
    BASE_PATH = join(abspath(curdir), "wiki")
# Make sure it doesn't end in a slash. 
BASE_PATH = normpath(BASE_PATH)

#: The relative path of the static dir (inside the base dir). 
STATIC_PATH = "static"

#: The name of this wiki
SITE_NAME = "wikiserver"
#: The backlink for this site
SITE_BACKLINK = "http://draketo.de"
#: The username to use for wiki-commits.
SITE_USERNAME = "wiki@terra.sol"

TEMPLATE_EDIT_FORM = "edit_form.html"
TEMPLATE_DATA = "<!--DATA-->"
TEMPLATE_TITLE = "<!--TITLE-->"
TEMPLATE_COMMIT_MESSAGE = "<!--COMMIT_MESSAGE-->"
TEMPLATE_PREVIEW = "<!--PREVIEW-->"
TEMPLATE_FILE_PATH = "<!--EDIT_FILE_PATH-->"
TEMPLATE_ERROR = "<!--ERROR-->"
TEMPLATE_SAVED = """<html><head><title>Successfully saved</title></head><body><h1>Successfully saved <a href="<!--EDIT_FILE_PATH-->"><!--EDIT_FILE_PATH--></a></h1></body></html>"""


TEMPLATE_NEW_FILE_TEXT = """# Title

please take care when editing, and remember to include a Change description. """

#: The signifier that the URL is an edit URL - if this is inside an URL, we treat it as an edit URL. 
EDIT_URL_SEGMENT = "/edit?"

#: Encoding of the source files. We assume utf-8
FILE_ENCODING = "utf-8"

#: The call from committing via mercurial - excepting the commit message. 
HG_CALL = ["hg", "-R", BASE_PATH, "ci", "-X", join(BASE_PATH, STATIC_PATH), "-u", SITE_USERNAME, "-A", "-m"]

### Functions and classes ###

# First a help function - return the dosctring without the programming notes :) 
def help(): 
	return __doc__.split("\n\n")[:-1]

if "--help" in argv: 
	print help()
	exit()

# Now a Markdown parsing function. To make this fast, we use a global markdown object. 
# We can just use 
# data = markdown.convert(data)
markdown = Markdown(extensions=['toc'])

# Now we need a function to serve the plain html files
def serve_html(path="/"): 
    """Simply serve the static sites

    >>> BASE_PATH = dirname(argv[0])
    >>> serve_html()
    <HTML>Shouts going out.</HTML>
    """
    # first make the path absolute. 
    # but reject paths containing a "..". 
    if ".." in path.split("/"): 
        return "Bad request (path contained '..')"
    # make absolute paths relative (remove all leading slashes)
    while path.startswith("/"): 
	path = path[1:]
    # Make the path locally absolute
    local_path = join(STATIC_PATH, path)
    local_path = join(BASE_PATH, local_path)
    # If the path points to a directory, we want the index page. 
    if isdir(local_path): 
        path = join(local_path, "index.html")
    f = codecs_open(local_path, encoding=FILE_ENCODING)
    data = f.read()
    f.close()
    # insert an edit link
    da = data.split("</body>")
    editlink = "<a href='/edit?file=" + path + "'>edit</a></body>"
    if da[1:] and not da[2:]: 
        data = editlink.join(da)
    else:
        data = "".join(da) + editlink
    return data.encode('ascii', 'xmlcharrefreplace')

def create_html(data, title):
    """Create the HTML data for a given file."""
    with open("template.html") as f: 
        html = f.read()
    html = data.join(html.split(TEMPLATE_DATA))
    html = title.join(html.split(TEMPLATE_TITLE))
    return html

def create_edit_html(path, data, commit_message, title, error): 
    """Create the basic HTML data for the edit form."""
    # first read the form
    f = codecs_open(TEMPLATE_EDIT_FORM, encoding=FILE_ENCODING)
    html = f.read()
    f.close()
    # Make sure data is seen as utf-8
    data = data.decode('utf-8')
    # insert the values
    html = data.join(html.split(TEMPLATE_DATA))
    html = title.join(html.split(TEMPLATE_TITLE))
    html = commit_message.join(html.split(TEMPLATE_COMMIT_MESSAGE))
    html = path.join(html.split(TEMPLATE_FILE_PATH))
    html = error.join(html.split(TEMPLATE_ERROR))
    # Also insert a preview
    preview = markdown.convert(data)
    html = preview.join(html.split(TEMPLATE_PREVIEW))
    return html

# To maKe all this interesting, we now add a function to edit the files from the web-interface. 
def edit_file(request_data): 
    """Edit a markdown-file in an edit form. 
    >>> BASE_PATH = dirname(argv[0])
    >>> data = edit_file()
    """
    # but reject paths containing a "..". 
    if ".." in request_data.split("/"): 
        return "Bad request (path contained '..')"
    ## Parse the form data
    segments = cgi.parse_qs(request_data)

    print segments
    # Remove the path from the segments - and make sure it exists! 
    if "file" in segments: 
	path = segments["file"][0]
	del segments["file"]
    else:
	return "Error: No file given. We can't edit fog :) "
    
    # Now replace path with the path to the file. 
    # make absolute paths relative (remove all leading slashes)
    while path.startswith("/"): 
	path = path[1:]
    # Make the path locally absolute
    p = join(BASE_PATH, path)
    # If the path points to a directory, we want the index page.
    if isdir(p) or not "." in path: 
	# it either ends in .txt or .mdwn
	if isfile(join(p, "index.txt")): 
	    p = join(p, "index.txt")
	elif isfile(join(p, "index.mdwn")): 
	    p = join(p, "index.mdwn")
	else: 
	    return "<html><head><title></title><body>Sorry, there's no index file here. Do you want to <a href='/edit?file="+join(path, "index.txt")+"&text=" + quote(TEMPLATE_NEW_FILE_TEXT) + "'>create it?</a>"
    
    # If we have "button_save" in the segments, the user wanted to save, else we show a preview.
    if "button_save" in segments: 
	return save_file(p, segments).encode('ascii', 'xmlcharrefreplace')
    else: 
	return preview_file(p, segments).encode('ascii', 'xmlcharrefreplace')

    
def preview_file(path, segments, error=""): 
    """Load, parse and show the data. 
    """
    ## Get the data
    # If we have no data, get the data from the given file. 
    if not "text" in segments: 
	if path.endswith(".html"): 
	    for suff in [".txt", ".mdwn"]: 
		p = path[:-5] + suff
		if isfile(p): 
		    f = codecs_open(p, encoding=FILE_ENCODING)
		    segments["text"] = [f.read().encode('ascii', 'xmlcharrefreplace')]
		    f.close()
                    break
    if not "text" in segments: # now we know that we have no file -> standard content
	segments["text"] = [TEMPLATE_NEW_FILE_TEXT]
	
    # Add noncritical missing values
    if not "commit_message" in segments: 
	segments["commit_message"] = [""]
    
    # compile the html data
    html = create_edit_html(path = path[len(BASE_PATH) + 1:],
	title="editing " + path[len(BASE_PATH) + 1:],
	commit_message = segments["commit_message"][0], 
	data = segments["text"][0], 
	error = error)
    return html

def save_file(path, segments): 
    """Save and commit (saves the file with the new contents and commits, requires a changelog message and the data).
    
    Also trigger a rebuild of the static files. 
    """
    if not "text" in segments: 
	return "Can't save: No text given."
    if not "commit_message" in segments or segments["commit_message"] == TEMPLATE_COMMIT_MESSAGE: 
	return preview_file(path, segments, error="Error: Saving requires a real commit message")

    # Remember that we need to save the file as txt
    if path.endswith(".html"):
	plainpath = path[:-5] + ".txt"
    else: plainpath = path

    # if the given directories don’t exist yet, create them.
    directory = dirname(path)
    if not exists(directory):
        makedirs(directory)
    f = codecs_open(plainpath, encoding=FILE_ENCODING, mode="w")
    
    data = segments["text"][0].decode("utf-8")
    f.write(data)
    f.close()
    # Rewrite the path to a local path with html ending
    
    path = path[len(BASE_PATH) + 1:]
    if path.endswith(".txt"): 
	path = path[:-4] + ".html"
    if path.endswith(".mdwn"): 
	path = path[:-5] + ".html"
    # Now parse all files. TODO: Only parse the changed file and parse it internally.
    #call(["parse_and_list_markdown_files.py", SITE_NAME, BASE_PATH, SITE_BACKLINK])
    directory = dirname(join(BASE_PATH, "static", path))
    if not isdir(directory):
        makedirs(directory)
    if path.endswith(".html"):
        with open(join(BASE_PATH, "static", path), "w") as f:
            f.write(create_html(markdown.convert(data), title=path))
            print join(BASE_PATH, "static", path)
    # And commit them via Mercurial
    call(HG_CALL + segments["commit_message"] + [join(BASE_PATH, plainpath)])
    
    path = path.decode("utf-8")
    # make the path absolute
    path = "/" + path
    return path.join(TEMPLATE_SAVED.split(TEMPLATE_FILE_PATH))

# A basic HTTPHandler. 
class HTTPHtmlHandler(BaseHTTPRequestHandler): 
    """Simple handler for Get requests, using the before defined func."""
    def do_GET(self):
        if ".." in self.path.split("/"):
            self.send_response(400)
	    self.send_header("Content-Type", "text/plain")
            self.end_headers()
            self.wfile.write("Bad request (path contained '..').")
            return
	# First we need the type: css if the path ends in .css, else html
	self.send_response(200)
	if self.path.endswith(".css"):
	    self.send_header("Content-Type", "text/css")
	elif self.path.endswith(".ogg") or self.path.endswith(".ogv"):
	    self.send_header("Content-Type", "media/ogg")
	else: 
	    self.send_header("Content-Type", "text/html")
	self.end_headers()
	if self.path.startswith(EDIT_URL_SEGMENT): 
	    # strip the edit url segment, then call edit_file
	    self.wfile.write(edit_file(self.path[len(EDIT_URL_SEGMENT):]))
        elif self.path == "/":
            self.wfile.write(serve_html("index.txt"))
        elif isdir(join(BASE_PATH, "static", self.path[1:])):
            print "blia", self.path
            self.wfile.write(serve_html(join(self.path, "index.html")))
	else: 
	    # Just serve the site.
            try: 
                self.wfile.write(serve_html(self.path))
            except IOError: # file does not exist
                if self.path.endswith(".html"):
                    p = "file=" + self.path[:-len(".html")] + ".txt"
                else:
                    p = "file=" + self.path
                self.wfile.write(edit_file(p))

    def do_POST(self):
        global rootnode
        try:
            ctype, pdict = cgi.parse_header(self.headers.getheader('content-type'))
	    if ctype == 'multipart/form-data':
                query=cgi.parse_multipart(self.rfile, pdict)
	    self.send_response(301)
	    
	    self.send_header("Content-Type", "text/html")
	    
            self.end_headers()
	    content_length = int(self.headers.dict['content-length'])
	    data = self.rfile.read(content_length)
	    query = cgi.parse_qs(data)
	    print query
	    data = edit_file(data).decode("utf-8")
	    self.wfile.write(data)
        except OSError:
            print "error processing POST request: OSError. Likely someone tries to use a file as directory. Don't :)"
            self.wfile.write("""<html><head><title>Error: Can't save to this path</title></head><body><h1>Can't save to that path</h1>The given filepath likely contains a directory name which is the same as the name of an existing file. This is not supported. Sorry for that - please choose a different filepath.</body></html>""")
	except: 
	    print "error processing POST request"


def main():
    try: 
        if not exists(BASE_PATH):
            makedirs(BASE_PATH)
            call(["hg", "init", BASE_PATH])
        segments={"text": ("", ), "commit_message": ["initial commit"]}
        save_file(join(BASE_PATH, "static", "index.html"), segments)
    except IOError, e: 
        print "could not create the needed wiki directory:", e
    try:
        server = HTTPServer(('', 8081), HTTPHtmlHandler)
        print 'started httpserver...'
        server.serve_forever()
    except KeyboardInterrupt:
        print '^C received, shutting down server'
        server.socket.close()

if __name__ == "__main__": 
    main()