Source

VPlayer / search.py

# -*- coding: utf-8 -*-

import re
import urllib
import urllib2
from time import sleep
from math import ceil
from threading import Thread

from PyQt4 import QtCore

from track import Track
from logger import log
from htmldecoder import decode_entities
from vplayer import libxml2dom

class Search(QtCore.QThread):
    target = str()
    results = []
    track_count = 0
    page_count = 0
    last_page = -1

    def __init__(self, target, act = 0):
        QtCore.QThread.__init__(self)
        settings = QtCore.QSettings()
        cookies = unicode(settings.value('cookies',
                                            QtCore.QVariant('')).toString())
        if not cookies:
            self.run = lambda:self.emit(QtCore.SIGNAL('status'), 'nologin')
            return
        self.url = 'http://vkontakte.ru/gsearch.php'
        self.headers = {'User-agent': 'Mozilla/5.0 (X11; U; Linux; ' + \
                        'en-US) AppleWebKit/527+ (KHTML, like Gecko, ' + \
                        'Safari/419.3) Arora/0.6',
                        'Cookie': cookies }
        self.target = target.encode('cp1251', 'xmlcharrefreplace')
        self.act = act

    def run(self):
        act = self.act
        target = self.target
        if act == 0:
            res = self.search(target)
            #if res:
            self.emit(QtCore.SIGNAL('results'), (0, res))
        else:
            res = self.search_more(act)
            #if res:
            self.emit(QtCore.SIGNAL('results'), (act, res))

    def search(self, target):
        self.emit(QtCore.SIGNAL('status'), 'begin')
        if target != self.target:
            self.target = target
            self.results = []
            self.track_count = 0
            self.page_count = 0
            self.last_page = -1
        post = urllib.urlencode({ 'section': 'audio', 'q': self.target })
        req = urllib2.Request(self.url, post, self.headers)
        try:
            urlreq = urllib2.urlopen(req)
        except:
            self.emit(QtCore.SIGNAL('status'), 'end')
            return
        pagetext = urlreq.read()

        self.results = self._parse_page(pagetext)
        self.emit(QtCore.SIGNAL('status'), 'end')
        return self.results

        rx = re.compile('<div class="summary">[^\d]*(\d+).*</')
        rxres = rx.findall(pagetext)
        if len(rxres)>0:
            self.track_count = int(rx.findall(pagetext)[0])
            self.page_count = int(ceil(self.track_count/100.))
            self.last_page = 0
            self.results = self._parse_page(pagetext)
            self.emit(QtCore.SIGNAL('status'), 'end')
            return self.results
        else:
            if (u'Аудиозаписей не найдено.').encode('cp1251') in pagetext:
                self.emit(QtCore.SIGNAL('status'), 'notfound')
                return self.results
            elif 'action="/login.php"' in pagetext:
                self.emit(QtCore.SIGNAL('status'), 'wronglogin')
                return self.results

    def search_more(self, page = None):
        if not self.target:
            self.emit(QtCore.SIGNAL('status'), 'end')
            return []
        self.emit(QtCore.SIGNAL('status'), 'begin')
        post = urllib.urlencode({ 'section': 'audio', 'q': self.target,
                                  'offset': page*100 })
        req = urllib2.Request(self.url, post, self.headers)
        try:
            urlreq = urllib2.urlopen(req)
            pagetext = urlreq.read()
            self.last_page += 1
            results = self._parse_page(pagetext)
            self.emit(QtCore.SIGNAL('status'), 'end')
            self.results += results
            return results
        except:
            self.emit(QtCore.SIGNAL('status'), 'end')
            return []

    def _parse_page(self, text):
        print 'parsing page...'
        tracks = []
        dom = libxml2dom.parseString(text, html=1)
        doc = dom.childNodes[0]
        table = doc.getElementById('searchResults')
        results = table.childNodes[0].childNodes[0].childNodes
        rx = re.compile('return operate\((?P<audioid>\d+),(?P<server>\d+),(?P<user>\d+),\'(?P<filename>\w+)\',(?P<length>\d+)\)')
        for node in results:
            if node.name != 'div': continue
            info = node.childNodes[1].childNodes[0].childNodes[0]
            artist = info.childNodes[2].childNodes[0].childNodes[1].childNodes[0].data
            title = info.childNodes[2].childNodes[0].childNodes[3].childNodes[0].data
            operate = info.childNodes[0].childNodes[1].getAttribute('onclick')
            data = rx.search(operate).groupdict()
            link = 'http://cs' + data['server'] + '.vkontakte.ru/u' + data['user'] \
                + '/audio/' + data['filename'] + '.mp3'
            length = data['length']
            track = Track(artist = artist, title = title, length = int(length),
                          url = link)
            tracks.append(track)
        return tracks


class SearchTrack(Search):
    def __init__(self, artist, title):
        QtCore.QThread.__init__(self)
        settings = QtCore.QSettings()
        cookies = unicode(settings.value('cookies',
                                            QtCore.QVariant('')).toString())
        if not cookies:
            self.run = lambda:self.emit(QtCore.SIGNAL('status'), 'nologin')
            return
        self.url = 'http://vkontakte.ru/gsearch.php'
        self.headers = {'User-agent': 'Mozilla/5.0 (X11; U; Linux; ' + \
                        'en-US) AppleWebKit/527+ (KHTML, like Gecko, ' + \
                        'Safari/419.3) Arora/0.6',
                        'Cookie': cookies }

        self.artist = artist
        self.title = title
        self.req = '%s - %s' % (artist, title)

    def run(self):
        res = self.do_search()
        if res:
            self.emit(QtCore.SIGNAL('results'), (0, res))

    def do_search(self):
        tmp = self.search(self.req.encode('utf-8'))
        if not tmp:
            log.debug('Could not find track %s ' % self.req.encode('utf-8'))
            return
        res = []
        for track in tmp:
            artistcheck = track.artist.encode('utf-8').upper() ==  self.artist.upper()
            titlecheck = track.title.encode('utf-8').upper() ==  self.title.upper()
            if artistcheck and titlecheck:
                was = False
                for item in res:
                    if item.artist.upper() == track.artist.upper() \
                        and item.title.upper() == track.title.upper():
                        was = True
                        if item.length < track.length:
                            res.remove(item)
                            res.append(track)
                if not was:
                    res.append(track)
        log.debug('Found %d tracks' % len(res))
        if res:
            return res


class SearchTrackWithBitrate(SearchTrack):
    def do_search_maxbitrate(self):
        tracks = self.do_search()
        res = []
        for track in tracks:
            addr = httplib.urlsplit(track.url)
            conn = httplib.HTTPConnection(addr.netloc)
            try:
                conn.request("GET", addr.path)
            except:
                continue
            r1 = conn.getresponse()
            if r1.status != 200:
                log.error('Could not get track')
                raise DownloadError()
            length = r1.length