Source

iviewautodownload / python-iview / daily_scrape.py

Full commit

'''
To view a list of all shows, run:
    H:\iViewNapper\python-iview\iview-cli.py -i
Then add the desired show (as a fnmatch) to shows.txt.
'''

import fnmatch
import os
import sys
import time
import handbrake
import re
import urllib
import shutil
import ConfigParser

import iview.config
import iview.fetch
import iview.comm

SETTINGS_FILE = 'settings.ini'

config = ConfigParser.ConfigParser()

with open(SETTINGS_FILE, 'r') as f:
    config.readfp(f)

INVALID_FILENAME_CHARACTERS = r'\/:?*"<>|'
SCREEN_WIDTH = 79

LOGLEVEL = 9

DOWNLOADED = []

def log(msg, level=5):
    if level <= LOGLEVEL:
        msg = str(msg)
        print msg

def fnmatchpatterns(name, patterns):
    for pattern in patterns:
        if fnmatch.fnmatch(name, pattern):
            return True
    return False

def progressbar(pcg):
    txtpcg = ' %3.1f%%' % (pcg*100.0)
    done = int((SCREEN_WIDTH-2-len(txtpcg))*pcg)
    todo = SCREEN_WIDTH-2-done-1-len(txtpcg)
    sys.stdout.write('|%s>%s|%s\r' % ('='*done, ' '*todo, txtpcg))

def validfilename(fn):
    '''Replaces any invalid filname character with an _. Includes / and \.
    '''
    for c in INVALID_FILENAME_CHARACTERS:
        if c in fn:
            fn = fn.replace(c, '_')
    return fn

def showfilename(show):
    '''
    Gives the file name (less extension) that a given show should have.

    Extracts season and episode info etc.
    '''
    title = show['title']
    match = re.match('(.*)Episode (\d+)(.*)', title)
    print '*************************'
    print 'show dict:', show
    if not match:
        print 'Unknown title pattern: %r' % (title)
        # Use old naming method
        fn = show['title']
    else:
        tmpseriesname, episodenum, episode = match.groups()
        episode = episode.strip()
        if episode:
            episode = ' - %s' % (episode)
        match = re.match(r'.*_\d{2}_(?P<series>\d{2})_(?P<episode>\d{2}(_.+)?)\..*', show['url'])
        if not match:
            print
            print show
            print
            print "NO MATCH for url:", show['url']
        seriesnum = match.group('series')
        episodenum = match.group('episode')
        fn = '%s - s%se%s%s' % (showfolder(show), seriesnum,
                episodenum, episode)
    fn = validfilename(fn)
    return fn

def getoutputpath(show):
    '''Given a show dict, return the full path the file should be saved to.
    '''
    fn = showfilename(show) + os.path.splitext(show['url'])[1]
    outputdir = os.path.join(config.get('paths', 'base_output_path'), showfolder(show))
    if not os.path.isdir(outputdir):
        os.mkdir(outputdir)
    ret = os.path.join(outputdir, fn)
    print "Using show name: %s" % (ret)
    return ret

def getshowart(show):
    '''
    Gets the thumbnail JPEG if it does not already exist and puts it into
    the metadata folder in the show folder.
    '''
    if not show['thumb'] or show['thumb'] == '(none)':
        # Don't bother doing anything if there is no thumbnail
        return
    outputdir = os.path.join(config.get('paths', 'base_output_path'), showfolder(show), 'metadata')
    artfn = os.path.join(outputdir, showfilename(show) + '.jpg')
    print "Art path: %r" % (artfn)
    if os.path.isfile(artfn):
        # Already got it, return
        return
    print "Getting show art: %r" % (artfn)
    # Make sure the metadata folder is there
    if not os.path.isdir(outputdir):
        os.mkdir(outputdir)
    urlf = urllib.urlopen(show['thumb'])
    outf = open(artfn, 'wb')
    outf.write(urlf.read())
    urlf.close()
    outf.close()

    if int(config.get('options', 'transcode')):
        # Copy to the TRANSCODED_OUTPUT_PATH
        outputdir2 = os.path.join(config.get('paths', 'transcoded_output_path'), showfolder(show), 'metadata')
        artfn2 = os.path.join(outputdir2, showfilename(show) + '.jpg')
        if not os.path.isdir(outputdir2):
            os.mkdir(outputdir2)
        shutil.copy(artfn, artfn2)

def showfolder(show):
    '''
    Returns the name of the folder a show should be put into.

    Currently this just removes the "Series xx" if it exists from the
    seriestitle. Then passes it through validfilename()
    '''
    seriestitle = show['seriestitle']
    seriesmatch = re.match('(.*)Series \d+', seriestitle)
    if seriesmatch:
        seriestitle = seriesmatch.group(1).strip()
    return validfilename(seriestitle)

def transcode(show):
    '''Called when a show download is complete.
    '''
    srcpath = show['filename']

    outputdir = os.path.join(config.get('paths', 'transcoded_output_path'), showfolder(show))
    outputpath = os.path.join(
        outputdir,
        validfilename(os.path.basename(show['filename']))
        )

    log('Output path: %s' % (outputpath))
    if not os.path.isdir(outputdir):
        os.mkdir(outputdir)

    handbrake.iphoneconvert(srcpath, outputpath)

def download(show):
    global DOWNLOADED
    #outputfn = show['title'] + os.path.splitext(show['url'])[1]
    outputfn = getoutputpath(show)
    print "Getting: %s" % (outputfn)

    # This assumes if we have the file at all - it is a complete download
    if os.path.isfile(outputfn):
        print "Already downloaded: %s" % (outputfn)
    else:
        print "Downloading: %s from %s" % (outputfn, show['url'])
        # Note: fetch.py has been modified to return a subprocess object
        p = iview.fetch.fetch_program(show['url'], dest_file=outputfn)
        show['filename'] = outputfn
        loopstate = 0
        f = open('sample', 'w')
        while 1:
            line = ''
            while 1:
                c = p.stdout.read(1)
                if not c or c == '\r':
                    break
                line += c
            if not line:
                break
            f.write(line)
            f.write('\n---------------\n')
            line = line.strip()
            if line == 'INFO: Metadata:':
                loopstate = 1
            elif loopstate == 1:
                parts = line.split()
                if len(parts) != 3:
                    loopstate = 2
                    continue
                show[parts[1]] = parts[2]
            elif loopstate == 2:
                match = re.search('\((\d+\.\d+)%\)', line)
                if match:
                    progressbar(float(match.group(1))/100)
        f.close()
        p.wait()
        success = (p.returncode == 0)
        if success:
            DOWNLOADED.append(show['filename'])
            print "Successfully downloaded %s" % (show['title'])
            if int(config.get('options', 'transcode')):
                transcode(show)
        else:
            print "Error downloading."
            if os.path.isfile(outputfn):
                print "Removing incomplete download"
                os.remove(outputfn)

def savelastrun():
    '''Saves the last time of a complete run.
    '''
    config.set('tracking', 'last_scrape', time.time())
    with open(SETTINGS_FILE, 'w') as f:
        config.write(f)

def main():
    os.chdir(sys.path[0])

    # Sleep time for first run, check last run time
    sleeptime = 5 # Sleep for 5 seconds after startup to allow the PC to get a network connection
    lastscrape = float(config.get('tracking', 'last_scrape'))
    now = time.time()
    run_freq = int(config.get('options', 'run_freq'))
    if (now - lastscrape) < run_freq:
        sleeptime = lastscrape + run_freq - now
    print now, sleeptime, lastscrape
    while 1:
        nextruntime = time.asctime(time.localtime(time.time() + sleeptime))
        print "Sleeping %d seconds. Will run at: %s" % (sleeptime, nextruntime)
        time.sleep(sleeptime)

        # Get the show list we want to watch...
        srclist = config.get('shows', 'showlist')
        srclist = [show.strip() for show in srclist.split(';')]

        # Init the lib
        iview.comm.get_config()

        # See what we can find in the index...
        index = iview.comm.get_index()

        # Get the ID's of the shows we are after
        # Then update the list of episodes
        todownload = []
        for series in index:
            if fnmatchpatterns(series['title'], srclist):
                print 'Found series: %s' % (series['title'])
                seriesid = series['id']
                seriesshows = iview.comm.get_series_items(seriesid)
                for showinfo in seriesshows:
                    showinfo['seriesid'] = series['id']
                    showinfo['seriestitle'] = series['title']
                    todownload.append(showinfo)

        i = 1
        for show in todownload:
            print "Processing show %d of %d" % (i, len(todownload))
            download(show)
            getshowart(show)
            i += 1

        savelastrun()
        sleeptime = run_freq
        print
        if DOWNLOADED:
            print "This session have downloaded:"
            for show in DOWNLOADED:
                print "    %s" % (show)

        if int(config.get('options', 'run_once')):
            break

def test():
    sampleshow = {'aacaot': '2.00',
 'audiochannels': '2.00',
 'audiocodecid': 'mp4a',
 'audiosamplerate': '44100.00',
 'avclevel': '30.00',
 'avcprofile': '77.00',
 'date': '2010-03-01 00:00:00',
 'description': "(Preview) Award-winning actor Catherine Tate returns as the Doctor's new companion, returning to her role as Donna Noble who featured in the 2006 Christmas special The Runaway Bride. Now reunited, the Doctor and Donna travel back to Pompeii in AD 79 on the eve of the infamous eruption where people areslowly turning to stone.\n",
 'duration': '139.18',
 'filename': 'Doctor Who Series 4.mp4',
 'height': '360.00',
 'home': 'http://shop.abc.net.au/browse/product.asp?productid=996433',
 'id': '9998147',
 'livestream': '',
 'moovposition': '9993357.00',
 'seriesid': '9998147',
 'seriestitle': 'Doctor Who Series 4',
 'thumb': 'http://shop.abc.net.au/multimediaitems/images/iview/drwho_4_04.jpg',
 'title': 'Doctor Who Series 4',
 'url': 'abcshop/drwho_4_04.mp4',
 'videocodecid': 'avc1',
 'videoframerate': '25.00',
 'width': '640.00'}
    transcode(sampleshow)


if __name__ == '__main__':
    main()
    #test()