Commits

Szymon Wróblewski  committed 48746e3

initial commit

  • Participants

Comments (0)

Files changed (2)

+syntax: glob
+.*
+*.pyc
+*.egg-info
+dist
+build
+MANIFEST

File da_download.py

+import os
+import logging
+import Queue as queue
+import shutil
+import threading
+from requests import session
+from lxml import html
+
+log = logging.getLogger(__name__)
+
+class DABrowser(object):
+    user_agent = ('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 '
+                  '(KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17')
+    max_tasks = 8
+
+    def __init__(self, path='.'):
+        self.session = session()
+        self.session.headers.update({'User-Agent': self.user_agent})
+        self.path = path
+        self.img_urls = queue.Queue()
+        self.deactivate = threading.Event()
+
+    def login(self, user=None, pwd=None):
+        url = 'https://www.deviantart.com/users/login'
+        response = self.session.get(url)
+        params = {
+            'ref': 'https://www.deviantart.com/',
+            'username': user if user else raw_input('user: '),
+            'password': pwd if pwd else raw_input('password: '),
+            'remember_me': 0,
+        }
+        response = self.session.post(url, data=params)
+        tree = html.fromstring(response.text)
+        return tree.get_element_by_id('oh-menu-deviant', None) is not None
+
+    def logout(self):
+        response = self.session.post('https://www.deviantart.com/users/logout')
+        tree = html.fromstring(response.text)
+        return tree.get_element_by_id('oh-menu-deviant', None) is None
+
+    def parse_profile(self, url):
+        pass
+
+    def parse_group(self, url):
+        pass
+
+    def parse_gallery(self, url):
+        offset = 0
+        total = 0
+        page = 1
+        while total == 0 or offset < total:
+            response = self.session.get(url, params={'offset': offset})
+            tree = html.fromstring(response.text)
+            gallery = tree.find_class('stream')
+            if not gallery:
+                log.info('Incorrect page format')
+                return
+            gallery = gallery[0]
+            part = int(gallery.attrib['gmi-count_per_page'])
+            total = int(gallery.attrib.get('gmi-total', 0))
+            log.info('Page %d [%d/%d]', page, offset, total)
+            item_nr = 0
+            for item in gallery.iterfind(".//div[@userid]"):
+                log.info(item)
+                item_nr += 1
+                item = item.find_class('thumb')
+                if not item:
+                    log.info('Item not available')
+                    continue
+                item = item[0]
+                img_url = item.attrib.get('data-super-full-img') or item.attrib.get('data-super-img')
+                if img_url:
+                    log.info('Queuing %s', item.attrib['title'])
+                    self.img_urls.put(img_url)
+                else:
+                    self.parse_page(item.attrib['href'], item.attrib['title'])
+            if item_nr == 0:
+                break
+            offset += part
+            page += 1
+
+    def parse_page(self, url, name='download'):
+        response = self.session.get(url)
+        tree = html.fromstring(response.text)
+        item = tree.get_element_by_id('download-button', None)
+        if item is not None:
+            log.info('Queuing %s', name)
+            self.img_urls.put(item.attrib['href'])
+        else:
+            log.info('Item locked')
+
+    def start_downloading(self):
+        self.deactivate.clear()
+        log.info('Starting %d threads', self.max_tasks)
+        for i in range(self.max_tasks):
+            t = threading.Thread(target=self.download_task)
+            t.daemon = True
+            t.start()
+
+    def stop_downloading(self):
+        self.deactivate.set()
+
+    def wait_for_end(self):
+        self.img_urls.join()
+
+    def download_task(self):
+        while not self.deactivate.is_set():
+            try:
+                self.download_image(self.img_urls.get())
+            finally:
+                self.img_urls.task_done()
+
+    def download_image(self, url, path=None):
+        if path is None:
+            path = self.path
+        path = os.path.join(path, os.path.split(url))
+        if os.path.exists(path):
+            log.info('Skipping %s', url)
+        else:
+            log.info('Downloading %s', url)
+            response = self.session.get(url, stream=True)
+            if response.status_code == 200:
+                with open(path, 'wb') as f:
+                    shutil.copyfileobj(response.raw, f)
+                    return True
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+    br = DABrowser('download')
+    if br.login():
+        log.info('Login successful')
+    #br.start_downloading()
+    #br.parse_gallery('http://bluex-pl.deviantart.com/favourites/48404227')
+    #br.parse_gallery('http://bluex-pl.deviantart.com/favourites/38926098')
+    #br.parse_gallery('http://shadowsinking.deviantart.com/gallery/?catpath=/')
+    br.parse_gallery('http://browse.deviantart.com/?q=dragon')
+    #br.wait_for_end()
+    log.info('end')
+
+if __name__ == '__main__':
+    main()