Commits

Vasilij Schneidermann committed cbe5b4a

prepared packaging

  • Participants
  • Parent commits ac386af

Comments (0)

Files changed (1)

File fug/__init__.py

+#!/usr/bin/env python3
+
+import argparse
+import logging
+import os
+import sys
+from urllib.parse import urlparse
+
+import lxml.html
+import requests
+
+
+RELATIVE_PLUGIN_DIRECTORY = 'plugins'
+PARSER_DESCRIPTION = "a simple webcomic downloader"
+PARSER_EPILOG = "example usage: fug qc images"
+PLUGIN_LISTER = 'Plugins:\n{}'
+PLUGIN_MISSING = '{}'
+DIRECTORY_MISSING = "The following directory wasn't found: {}"
+STATUS = '#{}: {} -> {}'
+STATUS_VERBOSE = '{} = {} + \'.\' + {}'
+STATUS2 = '{} -> {}'
+EXCEPTION_ERROR = "(S)kip, (R)etry, (Q)uit? "
+
+
+# due to trickery with argparse and the parser arguments being read from data,
+# I have to define a helper class for a custom argparse function here, right
+# before I reference ListPlugins in PARSER_ARGUMENTS
+class ListPlugins(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        plugins = ['- {}'.format(plugin) for plugin
+                   in os.listdir(RELATIVE_PLUGIN_DIRECTORY)
+                   if os.path.isfile(os.path.join(RELATIVE_PLUGIN_DIRECTORY,
+                                                  plugin))
+                   and plugin != 'base.py' and plugin != '__init__.py'
+                   and os.path.splitext(plugin)[1] == '.py']
+        sys.exit(PLUGIN_LISTER.format('\n'.join(sorted(plugins))))
+
+
+PARSER_ARGUMENTS = [
+    ('plugin', {'help': "webcomic plugin to use"}),
+    ('--list', {'action': ListPlugins, 'nargs': 0,
+                'help': "list available plugins"}),
+    ('-o', {'action': 'store', 'nargs': '?', 'default': os.getcwd(),
+            'dest': 'directory', 'help': "output directory (default: ./)"}),
+    ('-v', {'action': 'store_true', 'default': False, 'dest': 'verbose',
+            'help': "turn verbose mode on"}),
+    ('-d', {'action': 'store', 'dest': 'dump_file', 'help':
+            "dump urls to specified file instead of downloading images"}),
+    ('-f', {'action': 'store', 'dest': 'filename_format',
+            'help': "override filename format"}),
+    ('-s', {'action': 'store', 'dest': 'start_url', 'help':
+            "override start url"}),
+    ('-e', {'action': 'store', 'dest': 'end_url', 'help': "override end url"}),
+    ('-i', {'action': 'store', 'default': 1, 'type': int, 'dest': 'offset',
+            'help': "override offset (default: 1), "
+            "use with -s and -e to resume"})]
+
+
+def main():
+    """Main function"""
+    args = get_args()
+    if args.verbose:
+        log_level = logging.DEBUG
+    else:
+        log_level = logging.INFO
+    logging.basicConfig(format='%(message)s', level=log_level)
+    # requests needs to be silenced in the presence of logging
+    requests_log = logging.getLogger("requests")
+    requests_log.setLevel(logging.WARNING)
+
+    plugin_name = args.plugin
+    plugin = __import__("{}.{}".format(RELATIVE_PLUGIN_DIRECTORY, plugin_name),
+                        fromlist=[RELATIVE_PLUGIN_DIRECTORY]).plugin()
+    # for now the script won't create directories by itself
+    if not os.path.exists(args.directory):
+        sys.exit(DIRECTORY_MISSING.format(args.directory))
+
+    logging.info("Creating TODO list...")
+    todo_list = fetch_general_info(plugin, args)
+    logging.info("Done!")
+    if args.dump_file:
+        logging.info("Dumping progress...")
+        urls = [entry['image_url'] for entry in todo_list]
+        with open(args.dump_file, 'w') as dump_file:
+            dump_file.write('\n'.join(urls))
+    else:
+        logging.info("Processing TODO list...")
+        process_info(plugin, todo_list, args)
+    logging.info("Done!")
+
+
+def get_args():
+    """Argument parsing"""
+    parser = argparse.ArgumentParser(description=PARSER_DESCRIPTION,
+                                     epilog=PARSER_EPILOG)
+    # the parser gets populated with arguments by traversing PARSER_ARGUMENTS
+    for argument, options in PARSER_ARGUMENTS:
+        parser.add_argument(argument, **options)
+    args = parser.parse_args()
+    return args
+
+
+def fetch_general_info(plugin, args):
+    """Scrapes all relevant information"""
+    info = []
+    verbose, start, end, i = (args.verbose, args.start_url, args.end_url,
+                              args.offset)
+    if not start:
+        start = plugin.fetch_first_url()
+    if not end:
+        end = plugin.fetch_last_url()
+
+    current_url = start
+    while True:
+        for image_url in plugin.fetch_image_urls(current_url):
+            full_filename = os.path.split(urlparse(image_url).path)[-1]
+            filename, ext = full_filename.split('.')
+            row = {'i': i, 'current_url': current_url,
+                   'image_url': image_url, 'full_filename': full_filename,
+                   'filename': filename, 'ext': ext}
+            logging.info(STATUS.format(i, current_url, image_url))
+            logging.debug(STATUS_VERBOSE.format(full_filename, filename, ext))
+            info.append(row)
+            i += 1
+        if current_url == end:
+            break
+        current_url = plugin.fetch_next_url(current_url)
+    return info
+
+
+def process_info(plugin, info, args):
+    """Processes and downloads all given content"""
+    filename_format = plugin.FILENAME_FORMAT
+    if args.filename_format:
+        filename_format = args.filename_format
+    for row in info:
+        image_url, full_filename = row['image_url'], row['full_filename']
+        full_filename = filename_format.format(**row)
+        logging.info(STATUS2.format(image_url, full_filename))
+        plugin.fetch_image(image_url, args.directory, full_filename)
+
+
+def fetch_content(url):
+    """General helper with basic error handling"""
+    while True:
+        r = requests.get(url)
+        # if the return code isn't 200, an error is raised
+        try:
+            r.raise_for_status()
+            break
+        except (requests.exceptions.ConnectionError,
+                requests.exceptions.RequestException) as e:
+            logging.error(e)
+            choice = input(EXCEPTION_ERROR).upper()
+            if choice == 'S':
+                break
+            elif choice == 'Q':
+                sys.exit("Quitting.")
+            else:
+                continue
+    return r.content
+
+
+def fetch_dom(url):
+    """Fetches the dom of a website for lxml trickery"""
+    html = fetch_content(url)
+    parser = lxml.html.html_parser
+    dom = lxml.html.document_fromstring(html, parser=parser)
+    return dom
+
+
+def save_file(url, path, full_filename):
+    """Fetches file"""
+    with open(os.path.join(path, full_filename), 'wb') as image_file:
+            image_file.write(fetch_content(url))
+
+
+def select_url(url, selector, entry=0):
+    """Selects an url with a CSS selector and index"""
+    return fetch_dom(url).cssselect(selector)[entry].get('href')
+
+
+def select_img(url, selector, entry=0):
+    """Selects an image url with a CSS selector and index"""
+    return fetch_dom(url).cssselect(selector)[entry].get('src')
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("Quitting...")