Source

trustlink / trustlink / client.py

Full commit
from urllib import unquote_plus
from urlparse import urlsplit
import shutil
import phpserialize
import logging
import os.path
import anydbm
import json
import urllib

SPECIAL_KEYS = ('__test_tl_link__', '__trustlink_end__', '__trustlink__start__',
                '__trustlink_robots__', '__trustlink_delimiter__',
                '__trustlink__after_text__', '__trustlink_before_text__')


class TrustlinkError(Exception):
    pass


def smart_str(val):
    if isinstance(val, unicode):
        return val.encode('utf-8')
    else:
        return val


class TrustlinkClient(object):
    def __init__(self, trustlink_user, url, data_dir,
                 remote_ip=None, test=False,
                 static=False, multi_site=False,
                 verbose=False, charset='utf-8', force_show_code=False,
                 request_headers=None, request_params=None):
        url_info = urlsplit(url)
        host_prepared = url_info.hostname.lower()
        self.static = static
        self.data_dir = data_dir
        self.remote_ip = remote_ip
        self.test = test

        if host_prepared.startswith('www.'):
            self.host = host_prepared[4:]
        else:
            self.host = host_prepared

        if self.static:
            url_prepared = url_info.path
        else:
            url_prepared = url

        self.url = unquote_plus(url_prepared)
        self.multi_site = multi_site
        self.verbose = verbose

        # ???
        #if self.tl_links['__trustlink_debug__']:
            #self.verbose = True

        self.charset = charset
        self.force_show_code = force_show_code

        # ???
        #if self.tl_links['__trustlink_debug__']:
            #self.force_show_code = True

        self.trustlink_user = trustlink_user
        self.isrobot = False

        if request_headers and request_headers.get('HTTP_TRUSTLINK') == self.trustlink_user:
            self.test = True
            self.isrobot = True
            self.verbose = True

        if request_params and request_params.get('trustlink_test') == self.trustlink_user:
            self.force_show_code = True
            self.verbose = True


    def build_links(self):
        db = self.init_database_handler()
        prepend = ''
        append = ''

        def get(key, default=None):
            val = db.get(key)
            if val is not None:
                val_decoded = json.loads(val)
                if isinstance(val_decoded, dict):
                    val_decoded = val_decoded.values()
                return val_decoded
            else:
                return default

        start = get('__trustlink_start__', None)
        if start:
            if (self.remote_ip and self.remote_ip in get('__trustlink_robots__', [])
                or self.force_show_code):
                prepend += start



        if self.test:
            links = [
                get('__test_tl_link__', []),
            ]
        else:
            links = [x for x in get(smart_str(self.url), [])]

        items = []
        for link in links:
            item = {'anchor': link['anchor'], 'text': link['text']}

            if link.get('punicode_url'):
                item['url'] = link['punicode_url']
            else:
                item['url'] = link['url']

            link_info = urlsplit(item['url'])
            host = link_info.hostname
            if host.startswith('www.'):
                host = host[4:]
            item['host'] = host
            items.append(item)

        end = get('__trustlink_end__', None)
        if end:
            if (self.remote_ip and self.remote_ip in get('__trustlink_robots__', [])
                or self.force_show_code):
                append += end

        if self.test:
            prepend = '<noindex>' + prepend
            append = append + '</noindex>'

        return {
            'prepend': prepend,
            'links': items,
            'append': append,
        }

    def init_database_handler(self):
        path = os.path.join(self.data_dir, 'trustlink.%s.db' % self.host)
        if not os.path.exists(path):
            raise TrustlinkError('Link database does not exist at %s' % path)
        db = anydbm.open(path)
        return db


class TrustlinkDownloader(object):
    def __init__(self, trustlink_user, host, data_dir):
        if host.startswith('www.'):
            self.host = host[4:]
        else:
            self.host = host
        self.trustlink_user = trustlink_user
        self.data_dir = data_dir

    def download(self):
        path = os.path.join(self.data_dir, 'trustlink.%s.db' % self.host)
        tmp_path = os.path.join(self.data_dir, 'trustlink.%s.tmp' % self.host)

        tl_url = 'http://db.trustlink.ru/%s/%s/UTF-8' % (self.trustlink_user,
                                                         self.host)
        logging.debug('Downloading %s' % tl_url)
        data = self.network_request(tl_url)
        if data.startswith('FATAL ERROR:'):
            raise Exception('Trustlink fatal error: %s' % data)
        logging.debug('Downloaded %d bytes' % len(data))

        mapping = phpserialize.loads(data)
        logging.debug('Found %d items in unserialized data' % len(mapping))
        
        logging.debug('Writing data dbm file: %s' % tmp_path)
        db = anydbm.open(tmp_path, 'n')
        for key, value in mapping.items():
            db[key] = json.dumps(value)
        db.close()

        logging.debug('Copying %s to %s' % (tmp_path, path))
        shutil.copy(tmp_path, path)

    def network_request(self, url):
        return urllib.urlopen(url).read()