Snippets

Ilnur Ibragimov habraproxy

Created by Ilnur Ibragimov last modified
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import logging
import functools
import webbrowser

import lxml.html
import tornado
import tornado.web
from tornado import gen
from tornado.httpclient import AsyncHTTPClient

RE = re.compile(ur'(?u)(\b\w{6}\b)')
logger = logging.getLogger('habraproxy')


def tm_adder(mo):
    if mo.group(1):
        return u'%s™' % mo.group(1)
    return mo.group(0)


class ProxyHandler(tornado.web.RequestHandler):

    def initialize(self, site=None, host=None, port=None):
        self.site = site
        self.host = host
        self.port = port

    @gen.coroutine
    def get(self, *args, **kwargs):
        logger.debug(self.request.uri)
        http_client = AsyncHTTPClient()
        response = yield http_client.fetch(
            "%s%s" % (self.site, self.request.uri))

        body = response.body
        content_type = response.headers.get('Content-Type', '')

        if content_type:
            if 'text/html' in content_type:
                body = self.change(body)
            self.set_header('Content-Type', content_type)

        self.write(body)
        self.finish()

    def change(self, body):
        body = body.replace(
            self.site,
            'http://%s:%s' % (self.host, self.port))

        tree = lxml.html.document_fromstring(body)

        for element in tree.iter():
            tag = ('%s' % element.tag).lower()
            if tag in ('script', 'style'):
                continue
            if element.text:
                element.text = RE.sub(tm_adder, element.text)
            if element.tail:
                element.tail = RE.sub(tm_adder, element.tail)

        body = lxml.html.tostring(tree.getroottree(), encoding='utf-8')
        return body


def open_browser(url):
    webbrowser.open_new_tab(url)


def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', default='127.0.0.1')
    parser.add_argument('--port', default=8232, type=int)
    parser.add_argument('--site', default='https://habrahabr.ru')
    parser.add_argument('--dont-start-browser', dest='dont_start_browser', action='store_true',
                        default=False)
    args = parser.parse_args()

    logging.basicConfig()
    logger.setLevel(logging.DEBUG)

    application = tornado.web.Application([
        (r'.*', ProxyHandler, dict(
            site=args.site,
            host=args.host,
            port=args.port)),
    ], debug=True)
    application.listen(args.port)

    ioloop = tornado.ioloop.IOLoop.instance()
    logger.info('Proxy started at %s:%s. Site: %s',
                args.host, args.port, args.site)
    if not args.dont_start_browser:
        index_url = 'http://%s:%s/' % (args.host, args.port)
        ioloop.add_callback(functools.partial(open_browser, index_url))
    ioloop.start()


if __name__ == '__main__':
    main()

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.