Commits

Anonymous committed 899dd07

Initial add of work to date on automatic scraper.

This does the basics of looking for shows, downloading (only if required), and converting using Handbrake.

Comments (0)

Files changed (4)

python-iview/Start iView-scrape.bat

+@echo off
+
+rem Change to the current directory (/d also changes drive letter)
+cd /d %~dp0
+
+rem This should point to your python install
+C:\Python27\python.exe daily-scrape.py
+
+pause

python-iview/daily-scrape.py

+
+'''
+To view a list of all shows, run:
+    H:\iViewNapper\python-iview\iview-cli.py -i
+Then add the desired show (as a fnmatch) to shows.txt.
+'''
+
+import iview.config
+import iview.fetch
+import iview.comm
+import fnmatch
+import os
+import sys
+import time
+import handbrake
+import re
+
+LOGLEVEL = 9
+
+SHOW_LIST_FILE = 'shows.txt'
+LAST_SCRAPE_FILE = 'lastscrape.dat'
+RUN_FREQ = 12 * 60 *60
+SCREEN_WIDTH = 79
+BASE_OUTPUT_PATH = r'h:\iview'
+TRANSCODED_OUTPUT_PATH = r'h:\media\iphone'
+
+INVALID_FILENAME_CHARACTERS = r'\/:?*"<>|'
+
+def log(msg, level=5):
+    if level <= LOGLEVEL:
+        msg = str(msg)
+        print msg
+
+def showlist():
+    '''Returns the names of the shows to look for.
+    '''
+    f = open(SHOW_LIST_FILE)
+    ret = []
+    for show in f:
+        show = show.strip()
+        if show:
+            if show.startswith('#'):
+                continue
+            ret.append(show)
+    return ret
+
+def fnmatchpatterns(name, patterns):
+    for pattern in patterns:
+        if fnmatch.fnmatch(name, pattern):
+            return True
+    return False
+
+def progressbar(pcg):
+    txtpcg = ' %3.1f%%' % (pcg*100.0)
+    done = int((SCREEN_WIDTH-2-len(txtpcg))*pcg)
+    todo = SCREEN_WIDTH-2-done-1-len(txtpcg)
+    sys.stdout.write('|%s>%s|%s\r' % ('='*done, ' '*todo, txtpcg))
+
+def validfilename(fn):
+    '''Replaces any invalid filname character with an _. Includes / and \.
+    '''
+    for c in INVALID_FILENAME_CHARACTERS:
+        if c in fn:
+            fn = fn.replace(c, '_')
+    return fn 
+
+def getoutputpath(show):
+    '''Given a show dict, return the path the final file should be saved to.
+    '''
+    fn = validfilename(show['title'] + os.path.splitext(show['url'])[1])
+    outputdir = os.path.join(BASE_OUTPUT_PATH, 
+                    validfilename(show['seriestitle']))
+    if not os.path.isdir(outputdir):
+        os.mkdir(outputdir)
+    ret = os.path.join(outputdir, fn)
+    return ret
+
+def postdownload(show):
+    '''Called when a show download is complete.
+    '''
+    srcpath = show['filename']
+    srcwidth = float(show['width'])
+    srcheight = float(show['height'])
+    
+    outputdir = os.path.join(TRANSCODED_OUTPUT_PATH, 
+            validfilename(show['seriestitle']))
+    outputpath = os.path.join(
+        outputdir,
+        validfilename(os.path.basename(show['filename']))
+        )
+
+    log('Output path: %s' % (outputpath))
+    if not os.path.isdir(outputdir):
+        os.mkdir(outputdir)
+
+    handbrake.iphoneconvert(srcpath, outputpath, srcwidth, srcheight)        
+
+def download(show):
+    #outputfn = show['title'] + os.path.splitext(show['url'])[1]
+    outputfn = getoutputpath(show)
+    print "Getting: %s" % (outputfn)
+
+    # This assumes if we have the file at all - it is a complete download
+    if os.path.isfile(outputfn):
+        print "Already downloaded: %s" % (outputfn)
+    else:
+        print "Downloading: %s from %s" % (outputfn, show['url'])
+        # Note: fetch.py has been modified to return a subprocess object
+        p = iview.fetch.fetch_program(show['url'], dest_file=outputfn)
+        show['filename'] = outputfn 
+        loopstate = 0
+        f = open('sample', 'w')
+        while 1:
+            line = ''
+            while 1:
+                c = p.stdout.read(1)
+                if not c or c == '\r':
+                    break
+                line += c
+            if not line:
+                break
+            f.write(line)
+            f.write('\n---------------\n')
+            line = line.strip()
+            if line == 'INFO: Metadata:':
+                loopstate = 1
+            elif loopstate == 1:
+                parts = line.split()
+                if len(parts) != 3:
+                    loopstate = 2
+                    continue
+                show[parts[1]] = parts[2]
+            elif loopstate == 2:
+                match = re.search('\((\d+\.\d+)%\)', line)
+                if match:
+                    progressbar(float(match.group(1))/100)
+        f.close()
+        p.wait()
+        success = (p.returncode == 0)
+        if success:
+            print "Successfully downloaded %s" % (show['title'])
+            postdownload(show)
+        else:
+            print "Error downloading."
+            if os.path.isfile(outputfn):
+                print "Removing incomplete download"
+                os.remove(outputfn)
+
+def main():
+    os.chdir(sys.path[0])
+    # Do we actually run, check last scrape time
+    lastscrape = 0
+    if os.path.isfile(LAST_SCRAPE_FILE):
+        lastscrape = os.stat(LAST_SCRAPE_FILE).st_mtime
+    if (time.time() - lastscrape) < RUN_FREQ:
+        print "Last run to recently, exiting..."
+        sys.exit()
+
+    # Init the lib
+    iview.comm.get_config()
+
+    # Get the show list we want to watch...
+    srclist = showlist()
+
+    # See what we can find in the index...
+    index = iview.comm.get_index()
+
+    # Get the ID's of the shows we are after
+    # Then update the list of episodes
+    todownload = []
+    for series in index:
+        if fnmatchpatterns(series['title'], srclist):
+            print 'Found series: %s' % (series['title'])
+            seriesid = series['id']
+            seriesshows = iview.comm.get_series_items(seriesid)
+            for showinfo in seriesshows:
+                showinfo['seriesid'] = series['id']
+                showinfo['seriestitle'] = series['title']
+                todownload.append(showinfo)
+
+    i = 1
+    for show in todownload:
+        print "Processing show %d of %d" % (i, len(todownload))
+        download(show)
+        i += 1
+
+    open(LAST_SCRAPE_FILE, 'w').write('last iView scrape')
+
+def test():
+    sampleshow = {'aacaot': '2.00',
+ 'audiochannels': '2.00',
+ 'audiocodecid': 'mp4a',
+ 'audiosamplerate': '44100.00',
+ 'avclevel': '30.00',
+ 'avcprofile': '77.00',
+ 'date': '2010-03-01 00:00:00',
+ 'description': "(Preview) Award-winning actor Catherine Tate returns as the Doctor's new companion, returning to her role as Donna Noble who featured in the 2006 Christmas special The Runaway Bride. Now reunited, the Doctor and Donna travel back to Pompeii in AD 79 on the eve of the infamous eruption where people areslowly turning to stone.\n",
+ 'duration': '139.18',
+ 'filename': 'Doctor Who Series 4.mp4',
+ 'height': '360.00',
+ 'home': 'http://shop.abc.net.au/browse/product.asp?productid=996433',
+ 'id': '9998147',
+ 'livestream': '',
+ 'moovposition': '9993357.00',
+ 'seriesid': '9998147',
+ 'seriestitle': 'Doctor Who Series 4',
+ 'thumb': 'http://shop.abc.net.au/multimediaitems/images/iview/drwho_4_04.jpg',
+ 'title': 'Doctor Who Series 4',
+ 'url': 'abcshop/drwho_4_04.mp4',
+ 'videocodecid': 'avc1',
+ 'videoframerate': '25.00',
+ 'width': '640.00'}
+    postdownload(sampleshow)
+ 
+
+if __name__ == '__main__':
+    main()
+    #test()

python-iview/handbrake.py

+
+import subprocess
+import fnmatch
+import os
+
+HANDBRAKE = r"C:\Program Files\Handbrake\HandBrakeCLI.exe" 
+WIDTH = 960.0
+HEIGHT = 640.0
+
+def setpriority(pid=None,priority=0):
+    """ Set The Priority of a Windows Process.  Priority is a value between 0-5 where
+        2 is normal priority.  Default sets the priority of the current
+        python process but can take any valid process ID. """
+    
+    import win32api,win32process,win32con
+        
+    priorityclasses = [win32process.IDLE_PRIORITY_CLASS,
+                       win32process.BELOW_NORMAL_PRIORITY_CLASS,
+                       win32process.NORMAL_PRIORITY_CLASS,
+                       win32process.ABOVE_NORMAL_PRIORITY_CLASS,
+                       win32process.HIGH_PRIORITY_CLASS,
+                       win32process.REALTIME_PRIORITY_CLASS]
+    if pid == None:
+        pid = win32api.GetCurrentProcessId()
+    handle = win32api.OpenProcess(win32con.PROCESS_ALL_ACCESS, True, pid)
+    win32process.SetPriorityClass(handle, priorityclasses[priority])
+
+
+def iphoneconvert(srcpath, outputpath, srcwidth=WIDTH, srcheight=HEIGHT):
+    '''Converts the passed in show at path to an iPhone video at outputpath
+    '''
+    # Force low priority
+    setpriority()
+    
+    widthratio = 1
+    heightratio = 1
+    if srcwidth > WIDTH:
+        widthratio = srcwidth/WIDTH
+    if srcheight > HEIGHT:
+        heightraido = srcheight/HEIGHT
+    if heightratio > widthratio:
+        ratio = heightratio
+    else:
+        ratio = widthratio
+
+    outputwidth = str(int(srcwidth / ratio))
+    outputheight = str(int(srcheight / ratio))
+
+    cmd = [HANDBRAKE, '-i', 
+            srcpath,
+            '-t', '1', '-c', '1', '-o', 
+            outputpath,
+            '-f', 'mp4',
+            '-e', 'x264', '-q', '20', '-a', '1', '-E', 'faac', '-6', 'dpl2',
+            '-R', '48', '-B', '128', '-D', '0.0', '-x', 
+            'cabac=0:ref=2:me=umh:bframes=0:subq=6:8x8dct=0:trellis=0:weightb=0',
+            '-v', '1']
+    print ratio
+    if ratio > 1:
+        print "Ratio >1, changing dimenions to: %s x %s" % (outputwidth, outputheight)
+        cmd += ['-X', outputwidth, '-l', outputheight]
+    print "Command:", ' '.join(cmd)
+    
+    p = subprocess.Popen(cmd)
+    p.wait()
+    if p.returncode != 0:
+        print 'Error encoding file: %s' % p.returncode
+    else:
+        print 'Encoded file OK.'
+
+def processdir(srcdir, outputdir, match='*'):
+    srcfns = []
+    for fn in os.listdir(srcdir):
+        if fnmatch.fnmatch(fn, match):
+            srcfns.append(os.path.join(srcdir, fn))
+    for fn in srcfns:
+        outfn = os.path.basename(fn)
+        outfn = os.path.splitext(outfn)[0] + '.mp4'
+        outfn = os.path.join(outputdir, outfn)
+        print fn, outfn
+        if os.path.isfile(outfn):
+            print 'File exists, skipping...'
+        else:
+            iphoneconvert(fn, outfn, 720, 576)
+
+if __name__ == '__main__':
+    processdir(r'H:\Media\Jade\Dora Undercover', r'H:\iPhone\Dora Undercover', '*')

python-iview/shows.txt

+# Sample matching of shows.
+# Uses the python fnmatch module - which allows matching using "?" and "*" like a DOS or shell command line
+# this is NOT case sensitive
+
+# Lines starting with a # are ignored and all white space is stripped.
+
+# Kids shows
+Mister maker*
+mama*
+peppa pig*
+
+# Other shows
+Doctor Who*
+Torchwood*
+The Gruen*
+*Spicks*Specks*