Source

trustlink / trustlink.py

from urllib import unquote_plus
from urlparse import urlsplit
import shutil
import phpserialize
import logging

class TrustlinkClient(object):
    def __init__(self, trustlink_user, url, static=False, multi_site=False,
                 verbose=False, charset='utf-8', force_show_code=False,
                 headers=None, get=None):
        url_info = urlsplit(url)
        host_prepared = url_info.hostname.lower()
        self.static = static

        if host_prepared.startswith('www.'):
            self.host = host_prepared[4:]
        else:
            self.host = host_prepared

        if self.static:
            url_prepared = url_info.path
        else:
            url_prepared = url

        self.url = unquote_plus(url_prepared)
        self.multi_site = multi_site
        self.verbose = verbose

        # ???
        #if self.tl_links['__trustlink_debug__']:
            #self.verbose = True

        self.charset = charset
        self.force_show_code = force_show_code

        # ???
        #if self.tl_links['__trustlink_debug__']:
            #self.force_show_code = True

        self.trustlink_user = trustlink_user
        self.isrobot = False
        self.test = False

        if headers and headers.get('Trustlink') == self.trustlink_user:
            self.test = True
            self.isrobot = True
            self.verbose = True

        if get and get.get('trustlink_test') == self.trustlink_user:
            self.force_show_code = True
            self.verbose = True

    def build_links(self):
        links = self.load_page_links()
        prepend = ''
        append = ''

        start = self.db.get('__trustlink_start__', None)
        if start:
            # TODO: check remote_ip & self.db['__trustlink_robots__']
            prepend += start

        items = []
        for link in links:
            item = {'anchor': link['anchor'], 'text': link['text']}

            if link['punicode_url']:
                item['url'] = link['punicode_url']
            else:
                item['url'] = link['url']

            link_info = urlsplit(link)
            host = link_info.hostname
            if host.startswith('www.'):
                host_prepared = host[4:]
            else:
                host_preapred = host
            item['host'] = host_prepared
            items.append(item)

        start = self.db.get('__trustlink_end', None)
        if end:
            # TODO: check remote_ip & self.db['__trustlink_robots__']
            append += end

        #TODO
        #if ($this->tl_test && !$this->tl_isrobot)
            #$result = '<noindex>'.$result.'</noindex>';
        #return $result;
        return {'prepend': prepend, 'items': items, 'append': append}


class TrustlinkDownloader(object):
    def __init__(self, trustlink_user, host, data_dir):
        if host.startswith('www.'):
            self.host = host[4:]
        else:
            self.host = host
        self.truslink_user = trustlink_user

    def download(self):
        path = os.path.join(data_dir, 'trustlink.%s.db' % self.host)
        tmp_path = os.path.join(data_dir, 'trustlink.%s.tmp' % self.host)
        db = anydbm.open(tmp_path, 'n')

        tl_url = 'http://db.trustlink.ru/%s/%s/UTF-8' % (self.trustlink_user,
                                                         self.host)
        logging.debug('Downloading %s' % tl_url)
        data = urllib.urlopen(tl_url).read()
        if data.startswith('FATAL ERROR:'):
            raise Exception('Trustlink fatal error: %s' % data)
        logging.debug('Downloaded %d bytes' % len(data))

        mapping = phpserize.loads(data)
        logging.debug('Found %d items in unserialized data' % len(mapping))
        
        logging.debug('Writing data dbm file: %s' % tmp_path)
        for key, value in mapping.items():
            db[key] = value
        db.close()

        logging.debug('Copying %s to %s' % (tmp_path, path))
        shutil.copy(tmp_path, path)