Source

calibre_utils / src / mekk / calibre / scripts / add_if_missing.py

Full commit
# -*- coding: utf-8 -*-

"""
Scans given directory, adds to calibre all books which are not yet
present there. Duplicate checking is done solely on file content
comparison (file name may differ).  Used to double-check whether some
dir items were added to calibre, or not fully.

Example:

    calibre_add_if_missing /home/jan/OldBooks

(and later remove OldBooks if everything is OK).

Can be also used to add individual files, for example:

    calibre_add_if_missing *.pdf *.djvu subdir/*.pdf

"""

import shutil
import re
import os.path
from collections import defaultdict
from mekk.calibre.calibre_util import \
    find_calibre_file_names, add_to_calibre
from mekk.calibre.disk_util import \
    find_disk_files, file_size, are_files_identical
# TODO: migrate to argparse some day
from optparse import OptionParser

def process_options():
    usage = "Usage: %prog [options] file-or-dir-1 file-or-dir-2 ..."
    parser = OptionParser(usage=usage)
    parser.add_option("-x", "--tag",
                      action="store", type="string", dest="tag",
                      help="Tag added files with given tag(s). Can be comma-separated.")
    parser.add_option("-a", "--author",
                      action="store", type="string", dest="author",
                      help="Force given author name.")
    parser.add_option("-m", "--move",
                      action="store", type="string", dest="move",
                      help="Move source files to given directory after adding them")
    (options, args) = parser.parse_args()
    if not args:
        parser.error("""No file or directory specified. Execute with:
    calibre_add_if_missing  /some/dire/ctory/name"
or
    calibre_add_if_missing  file.name otherfile.name dir.name
""")
    if options.move:
        if not os.path.isdir(options.move):
            parser.error("Parameter given for --move ('%s') is not a directory!" % options.move)
    return (options, args)


def run():
    """
    Run calibre_add_if_missing script
    """
    options, args = process_options()

    files_to_check = []
    for param in args:
        if os.path.isdir(param):
            files_to_check.extend(find_disk_files(param))
        else:
            files_to_check.append(param)

    # size -> set of files with that size
    known_by_calibre = defaultdict(lambda: set())

    for file_name in find_calibre_file_names():
        known_by_calibre[file_size(file_name)].add(file_name)

    added_count = 0
    skipped_count = 0
    for file_name in files_to_check:
        candidates = known_by_calibre[file_size(file_name)]
        for c in candidates:
            if are_files_identical(file_name, c):
                print "Already present: %s (stored as %s)" % (file_name, c)
                skipped_count += 1
                break
        else:
            print "Not registered by calibre:", file_name

            # doc, rtf and txt files are notoriously bad at metadata extraction, better
            # force filename into the title to know what is the book about.
            # TODO: make this behaviour an option
            base_file_name = os.path.basename(file_name)
            m = re.match("^(.*)\.(rtf|docx?|txt)$", base_file_name)
            if m:
                force_title = m.group(1)
            else:
                force_title = None

            add_to_calibre(file_name,
                           force_title=force_title,
                           force_tags=options.tag,
                           force_author=options.author)
            if options.move:
                shutil.move(file_name, options.move)
            added_count += 1

    print
    print "%d files already present, %d added" % (skipped_count, added_count)