auto-extract-python / auto-extract.py

#!/usr/bin/env python
# Script:   Auto-Extract (Python edition)
# Author:   Mark Stahler <markstahler@gmail.com>
# Website:  http://bitbucket.org/markstahler/auto-unrar-python/, http://www.markstahler.ca
# Version:  1.02
# Released: January 24 2010
# License:  BSD
#
# Description:  Auto-Extract is a script designed to be run as a cron job (or scheduled task).
#   Its purpose is to recursively scan a directory for archive files and extract them. The script
#   is designed to be run at regular intervals and will create a file named .unrared in each
#   directory that contains an archive extracted by the script. This file is used to tell the
#   script, on subsequent scans, that the archive in the marked folder has previously been
#   extracted.
#
# Limitations:  Auto-Extract has been written to support one archive group per directory scanned
#   (Example a movie in its own directory with files .rar, .r01, .r02, .nfo, etc). This works well
#   with movies and other files that are packed and downloaded in individual directories.
#
#   Auto-Extract will overwrite previously extracted files if it does not find a .unrar file
#   present in the archive directory.
#
# Requirements:
# -Python 2.4 or newer
# -unrar in your path [Freeware command line version available from http://www.rarlab.com/]
#
# BUGS:
#   -Cannot tell if an archive has been fully downloaded
#
# TODO:
#   -Proper logging (ie. debug, info messages)
#   -Check for available disk space and estimate required
#   -Support for other archive types
#

import os, sys
import pdb

class Unrar(object):
    def __init__(self):
        # Class Variables
        #------------------
        self.mark_file_name = '.unrared'
        self.extensions_unrar = ['.rar', '.r01']        # List of extensions for auto-extract to look for
        self.extensions_unzip = ['.zip']
        self.supported_filetypes = []                   # Filled by extensions_list function
        self.extensions_list()

        # Sanity Checks
        #------------------
        # Check for proper number of parameters (TODO: and that parameters are correct)
        if len(sys.argv) < 2:
            self.display_help()
        # Check that we can find unrar on this system
        self.unrar_check()
        # Check that the download directory parameters is actually a directory
        self.check_arguments()
        self.traverse_directories()

    '''Displays script command line usage help'''
    def display_help(self):
        print 'usage: ' + sys.argv[0] + ' [options] [download_directory]'
        print 'options:'
        print '     -h, --help      Display this help message'
        exit()

    '''Creates the list of extensions supported by the script'''
    def extensions_list(self):
        self.supported_filetypes.extend(self.extensions_unrar)       # rar support
        self.supported_filetypes.extend(self.extensions_unzip)       # zip support (Not implemented yet)

    '''Figures out what the unrar executable name should be'''
    def unrar_exe(self):
        unrar_name = 'unrar'
        # If on Windows, add .exe to the end of the program file name
        if sys.platform == 'win32':
            unrar_name = 'UnRAR.exe'
        self.unrar_name = unrar_name

    '''Attempts to find unrar on the system path and return the directory unrar is found in'''
    def find_unrar(self):
        # Search the default Unrar for Windows directory
        if sys.platform == 'win32':
            win_unrar_dir = os.path.join(os.getenv('PROGRAMFILES'), 'unrar')
            if os.path.exists(win_unrar_dir):
                files = os.listdir(win_unrar_dir)
                if self.unrar_name in files:
                    # Found Unrar for Windows
                    print 'Found ' + self.unrar_name +' in ' + win_unrar_dir
                    return win_unrar_dir
        
        # Search the system path for the unrar executable
        for dir in os.getenv('PATH').split(os.pathsep):
            # Ensure the dir in the path is a real directory
            if os.path.exists(dir):
                files = os.listdir(dir)
                if self.unrar_name in files:
                    # Found it!
                    print 'Found ' + self.unrar_name +' in ' + dir
                    return dir
            else:
                # The directory in the path does not exist
                pass
        
        # unrar not found on this system
        return False

    '''Sanity check to make sure unrar is found on the system'''
    def unrar_check(self):
        self.unrar_exe()
        self.unrar_path = self.find_unrar()
        if self.unrar_path != False:
            self.unrar_exe = os.path.join(self.unrar_path, self.unrar_name)
        else:
            print 'Error: ' + self.unrar_name + ' not found in the system path \n'
            exit()

    '''Ensure download dir argument is in fact a directory'''
    def check_arguments(self):
        if os.path.isdir(sys.argv[1]):
            self.download_dir = os.path.abspath(sys.argv[1])

    '''Scan the download directory and its subdirectories'''
    def traverse_directories(self):
        # Search download directory and all subdirectories
        for dirname, dirnames, filenames in os.walk(self.download_dir):
            self.scan_for_archives(dirname)

    '''Check for rar files in each directory'''
    def scan_for_archives(self, dir):
        # Look for a .rar archive in dir
        dir_listing = os.listdir(dir)
        # First archive that is found with .rar extension is extracted
        # (for directories that have more than one archives in it)
        for filename in dir_listing:
            for ext in self.supported_filetypes:
                if filename.endswith(ext):
                    # If a .rar archive is found, check to see if it has been extracted previously
                    file_unrared = os.path.exists(os.path.join(dir, self.mark_file_name))
                    if file_unrared == False:
                        print "Need to extract: " + filename
                        # Start extracting file
                        self.start_unrar(dir, filename)
                    else:
                        print 'Skipping archive ' + filename
                    # .rar was found, dont need to search for .r01
                    break


    '''Extract a rar archive'''
    def start_unrar(self, dir, archive_name):
        # Create command line arguments for rar extractions
        cmd_args = ['','','','','']
        cmd_args[0] = self.unrar_name                   # unrar
        cmd_args[1] = 'e'                               # command line switches: e - extract
        cmd_args[2] = '-y'                              # y - assume yes to all queries (overwrite)
        cmd_args[3] = os.path.join(dir, archive_name)   # archive path
        cmd_args[4] = dir                               # destination

        try:
            os.spawnv(os.P_WAIT, self.unrar_exe, cmd_args)
        except OSError:
            print 'Error: ' + self.unrar_name + ' not found in the given path \n'
            exit()

        # Sucessfully extracted archive, mark the dir with a hidden file
        self.mark_dir(dir)

    '''Creates a hidden file so the same archives will not be extracted again'''
    def mark_dir(self, dir):
        mark_file = os.path.join(dir, self.mark_file_name)
        f = open(mark_file,'w')
        f.close()
        print self.mark_file_name + ' file created'

if __name__ == '__main__':
    obj = Unrar()