Source

dA downloader / da_download.py

import os
import logging
import Queue as queue
import shutil
import threading
from requests import session
from lxml import html

log = logging.getLogger(__name__)

class DABrowser(object):
    user_agent = ('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 '
                  '(KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17')
    max_tasks = 8

    def __init__(self, path='.'):
        self.session = session()
        self.session.headers.update({'User-Agent': self.user_agent})
        self.path = path
        self.img_urls = queue.Queue()
        self.deactivate = threading.Event()

    def login(self, user=None, pwd=None):
        url = 'https://www.deviantart.com/users/login'
        response = self.session.get(url)
        params = {
            'ref': 'https://www.deviantart.com/',
            'username': user if user else raw_input('user: '),
            'password': pwd if pwd else raw_input('password: '),
            'remember_me': 0,
        }
        response = self.session.post(url, data=params)
        tree = html.fromstring(response.text)
        return tree.get_element_by_id('oh-menu-deviant', None) is not None

    def logout(self):
        response = self.session.post('https://www.deviantart.com/users/logout')
        tree = html.fromstring(response.text)
        return tree.get_element_by_id('oh-menu-deviant', None) is None

    def parse_profile(self, url):
        pass

    def parse_group(self, url):
        pass

    def parse_gallery(self, url):
        offset = 0
        total = 0
        page = 1
        while total == 0 or offset < total:
            response = self.session.get(url, params={'offset': offset})
            tree = html.fromstring(response.text)
            gallery = tree.find_class('stream')
            if not gallery:
                log.info('Incorrect page format')
                return
            gallery = gallery[0]
            part = int(gallery.attrib['gmi-count_per_page'])
            total = int(gallery.attrib.get('gmi-total', 0))
            log.info('Page %d [%d/%d]', page, offset, total)
            item_nr = 0
            for item in gallery.iterfind(".//div[@userid]"):
                log.info(item)
                item_nr += 1
                item = item.find_class('thumb')
                if not item:
                    log.info('Item not available')
                    continue
                item = item[0]
                img_url = item.attrib.get('data-super-full-img') or item.attrib.get('data-super-img')
                if img_url:
                    log.info('Queuing %s', item.attrib['title'])
                    self.img_urls.put(img_url)
                else:
                    self.parse_page(item.attrib['href'], item.attrib['title'])
            if item_nr == 0:
                break
            offset += part
            page += 1

    def parse_page(self, url, name='download'):
        response = self.session.get(url)
        tree = html.fromstring(response.text)
        item = tree.get_element_by_id('download-button', None)
        if item is not None:
            log.info('Queuing %s', name)
            self.img_urls.put(item.attrib['href'])
        else:
            log.info('Item locked')

    def start_downloading(self):
        self.deactivate.clear()
        log.info('Starting %d threads', self.max_tasks)
        for i in range(self.max_tasks):
            t = threading.Thread(target=self.download_task)
            t.daemon = True
            t.start()

    def stop_downloading(self):
        self.deactivate.set()

    def wait_for_end(self):
        self.img_urls.join()

    def download_task(self):
        while not self.deactivate.is_set():
            try:
                self.download_image(self.img_urls.get())
            finally:
                self.img_urls.task_done()

    def download_image(self, url, path=None):
        if path is None:
            path = self.path
        path = os.path.join(path, os.path.split(url))
        if os.path.exists(path):
            log.info('Skipping %s', url)
        else:
            log.info('Downloading %s', url)
            response = self.session.get(url, stream=True)
            if response.status_code == 200:
                with open(path, 'wb') as f:
                    shutil.copyfileobj(response.raw, f)
                    return True


def main():
    logging.basicConfig(level=logging.INFO)
    br = DABrowser('download')
    if br.login():
        log.info('Login successful')
    #br.start_downloading()
    #br.parse_gallery('http://bluex-pl.deviantart.com/favourites/48404227')
    #br.parse_gallery('http://bluex-pl.deviantart.com/favourites/38926098')
    #br.parse_gallery('http://shadowsinking.deviantart.com/gallery/?catpath=/')
    br.parse_gallery('http://browse.deviantart.com/?q=dragon')
    #br.wait_for_end()
    log.info('end')

if __name__ == '__main__':
    main()