contentbrowser / src / browser.py

# -*- coding: utf-8 -*-
import os

from werkzeug.utils import redirect
from werkzeug.routing import Map, Rule
from werkzeug.exceptions import HTTPException
from werkzeug.wrappers import Request, Response
from werkzeug.urls import url_quote_plus, url_unquote_plus
from jinja2 import Environment, FileSystemLoader

import requests

from whoosh.query import Or, Term
from whoosh.highlight import ContextFragmenter

from storage import Storage
from settings import PROXY_URL, CSS_THEME
from utils import get_hostname, highlights, is_valid_url, prepend_proxy_url


class Browser(object):
    """The WSGI application to browse contents from URIs."""

    def __init__(self):
        # Storage configuration
        self.storage = Storage()

        # Jinja environment and filters
        template_path = os.path.join(os.path.dirname(__file__), 'templates')
        self.jinja_env = Environment(loader=FileSystemLoader(template_path),
                                     autoescape=True)
        self.jinja_env.filters['hostname'] = get_hostname
        self.jinja_env.filters['highlights'] = highlights
        self.jinja_env.filters['proxify'] = prepend_proxy_url

        # Local URLs
        self.url_map = Map([
            Rule('/', endpoint='new_url'),
            Rule('/r', endpoint='render_content'),
            Rule('/s', endpoint='search_content'),
            Rule('/d', endpoint='delete_content'),
            Rule('/doc', endpoint='documentation'),
        ])

    def on_new_url(self, request):
        """View that displays forms to submit a URL or search contents."""
        error = None
        url = ''
        if request.method == 'POST':
            if 'url' in request.form:
                # Deals with the submitted URL (validity + quoting)
                url = request.form['url']
                if not is_valid_url(url):
                    error = 'Please enter a valid URL'
                else:
                    quoted_url = url_quote_plus(url)
                    return redirect('/r?url=%s' % quoted_url)
            elif 'search' in request.form:
                search = request.form['search']
                return redirect('/s?terms=%s' % url_quote_plus(search))

        return self.render_template('new_url.html',
            url=url,
            error=error,
            theme=CSS_THEME,
            proxy_url=PROXY_URL
        )

    def on_render_content(self, request):
        """View that renders content from the `url` GET argument."""
        error = None
        title, content = '', ''

        # Retrieves the url from GET arguments and unquote it
        quoted_url = request.args.get('url')
        if quoted_url is None:
            return redirect('/')
        url = url_unquote_plus(quoted_url)

        # Retrieves title and content from the given URL
        try:
            title, content = self.storage.retrieve_content(url)
        except requests.exceptions.SSLError:
            error = ('You tried to reach a web page with an invalid '
                     '(or self-signed) SSL certificate. This case is '
                     'not handled for now due to related security issues.')
        except requests.exceptions.ConnectionError:
            error = ('You tried to reach a web page without being '
                     'connected to the Internet. So sad.')

        return self.render_template('render_content.html',
            url=url,
            title=title,
            error=error,
            theme=CSS_THEME,
            content=content,
            quoted_url=quoted_url
        )

    def on_search_content(self, request):
        """View that returns URLs related to the `terms` GET argument."""
        # Retrieves terms from GET arguments and unquote it
        quoted_terms = request.args.get('terms')
        if quoted_terms is None:
            return redirect('/')
        terms = url_unquote_plus(quoted_terms)

        # Searches terms in Whoosh index
        with self.storage.searcher as searcher:
            query = Or([Term("content", terms), Term("title", terms)])
            results = searcher.search(query)
            results.fragmenter = ContextFragmenter(maxchars=100, surround=150)

            return self.render_template('render_results.html',
                results=results,
                theme=CSS_THEME
            )

    def on_delete_content(self, request):
        """View that deletes data related to the `url` GET argument."""
        # Retrieves terms from GET arguments and unquote it
        # Retrieves the url from GET arguments and unquote it
        quoted_url = request.args.get('url')
        if quoted_url is None:
            return redirect('/')
        url = url_unquote_plus(quoted_url)
        self.storage.delete_content(url)

        return redirect('/')

    def on_documentation(self, request):
        """View that displays documentation."""
        return self.render_template('documentation.html',
            theme=CSS_THEME
        )

    ## WERKZEUG INTERNALS (see http://werkzeug.pocoo.org/docs/tutorial/)

    def render_template(self, template_name, **context):
        t = self.jinja_env.get_template(template_name)
        return Response(t.render(context), mimetype='text/html')

    def dispatch_request(self, request):
        adapter = self.url_map.bind_to_environ(request.environ)
        try:
            endpoint, values = adapter.match()
            return getattr(self, 'on_' + endpoint)(request, **values)
        except HTTPException, e:
            return e

    def wsgi_app(self, environ, start_response):
        request = Request(environ)
        response = self.dispatch_request(request)
        return response(environ, start_response)

    def __call__(self, environ, start_response):
        return self.wsgi_app(environ, start_response)


if __name__ == '__main__':
    print "BACKWARD INCOMPATIBILITY: You need to run `python src/launch.py` now."
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.