Commits

sillyfrog committed 56b4b1a

Remove "-" from filename so can be imported and used with tests.
Update relevant files

Comments (0)

Files changed (3)

python-iview/Start iView-scrape.bat

 cd /d %~dp0
 
 rem This should point to your python install
-C:\Python27\python.exe daily-scrape.py
+C:\Python27\python.exe daily_scrape.py
 
-pause
+pause

python-iview/daily-scrape.py

-
-'''
-To view a list of all shows, run:
-    H:\iViewNapper\python-iview\iview-cli.py -i
-Then add the desired show (as a fnmatch) to shows.txt.
-'''
-
-SHOW_LIST_FILE = 'shows.txt'
-LAST_SCRAPE_FILE = 'lastscrape.dat'
-BASE_OUTPUT_PATH = r'h:\iview'
-TRANSCODED_OUTPUT_PATH = r'h:\media\iphone'
-RUN_FREQ = 8 * 60 *60  # Frequency to run - every 8 hours. If RUN_ONCE is True, the most frequently it will run.
-RUN_ONCE = False    # If True, will only run once then exit
-
-# --------------- There is no config required below this line ---------------
-
-import iview.config
-import iview.fetch
-import iview.comm
-import fnmatch
-import os
-import sys
-import time
-import handbrake
-import re
-
-
-INVALID_FILENAME_CHARACTERS = r'\/:?*"<>|'
-SCREEN_WIDTH = 79
-
-LOGLEVEL = 9
-
-DOWNLOADED = []
-
-def log(msg, level=5):
-    if level <= LOGLEVEL:
-        msg = str(msg)
-        print msg
-
-def showlist():
-    '''Returns the names of the shows to look for.
-    '''
-    f = open(SHOW_LIST_FILE)
-    ret = []
-    for show in f:
-        show = show.strip()
-        if show:
-            if show.startswith('#'):
-                continue
-            ret.append(show)
-    return ret
-
-def fnmatchpatterns(name, patterns):
-    for pattern in patterns:
-        if fnmatch.fnmatch(name, pattern):
-            return True
-    return False
-
-def progressbar(pcg):
-    txtpcg = ' %3.1f%%' % (pcg*100.0)
-    done = int((SCREEN_WIDTH-2-len(txtpcg))*pcg)
-    todo = SCREEN_WIDTH-2-done-1-len(txtpcg)
-    sys.stdout.write('|%s>%s|%s\r' % ('='*done, ' '*todo, txtpcg))
-
-def validfilename(fn):
-    '''Replaces any invalid filname character with an _. Includes / and \.
-    '''
-    for c in INVALID_FILENAME_CHARACTERS:
-        if c in fn:
-            fn = fn.replace(c, '_')
-    return fn 
-
-def getoutputpath(show):
-    '''Given a show dict, return the path the final file should be saved to.
-    '''
-    title = show['title']
-    match = re.match('(.*) Episode (\d+) (.*)', title)
-    if not match:
-        print '*************************'
-        print 'Unknown title pattern: %r' % (title)
-        print 'show dict:', show
-        # Use old naming method
-        fn = show['title']
-    else:
-        seriesname, episodenum, episode = match.groups()
-        match = re.match(r'.*_\d{2}_(\d{2})_(\d{2})\..*', show['url'])
-        seriesnum, episodenum = match.groups()
-        fn = '%s - s%se%s - %s' % (show['seriestitle'], seriesnum, episodenum, episode)
-    fn = validfilename(fn + os.path.splitext(show['url'])[1])
-    outputdir = os.path.join(BASE_OUTPUT_PATH, 
-                    validfilename(show['seriestitle']))
-    if not os.path.isdir(outputdir):
-        os.mkdir(outputdir)
-    ret = os.path.join(outputdir, fn)
-    return ret
-
-def postdownload(show):
-    '''Called when a show download is complete.
-    '''
-    srcpath = show['filename']
-    
-    outputdir = os.path.join(TRANSCODED_OUTPUT_PATH, 
-            validfilename(show['seriestitle']))
-    outputpath = os.path.join(
-        outputdir,
-        validfilename(os.path.basename(show['filename']))
-        )
-
-    log('Output path: %s' % (outputpath))
-    if not os.path.isdir(outputdir):
-        os.mkdir(outputdir)
-
-    handbrake.iphoneconvert(srcpath, outputpath)
-
-def download(show):
-    global DOWNLOADED
-    #outputfn = show['title'] + os.path.splitext(show['url'])[1]
-    outputfn = getoutputpath(show)
-    print "Getting: %s" % (outputfn)
-
-    # This assumes if we have the file at all - it is a complete download
-    if os.path.isfile(outputfn):
-        print "Already downloaded: %s" % (outputfn)
-    else:
-        print "Downloading: %s from %s" % (outputfn, show['url'])
-        # Note: fetch.py has been modified to return a subprocess object
-        p = iview.fetch.fetch_program(show['url'], dest_file=outputfn)
-        show['filename'] = outputfn 
-        loopstate = 0
-        f = open('sample', 'w')
-        while 1:
-            line = ''
-            while 1:
-                c = p.stdout.read(1)
-                if not c or c == '\r':
-                    break
-                line += c
-            if not line:
-                break
-            f.write(line)
-            f.write('\n---------------\n')
-            line = line.strip()
-            if line == 'INFO: Metadata:':
-                loopstate = 1
-            elif loopstate == 1:
-                parts = line.split()
-                if len(parts) != 3:
-                    loopstate = 2
-                    continue
-                show[parts[1]] = parts[2]
-            elif loopstate == 2:
-                match = re.search('\((\d+\.\d+)%\)', line)
-                if match:
-                    progressbar(float(match.group(1))/100)
-        f.close()
-        p.wait()
-        success = (p.returncode == 0)
-        if success:
-            DOWNLOADED.append(show['title'])
-            print "Successfully downloaded %s" % (show['title'])
-            postdownload(show)
-        else:
-            print "Error downloading."
-            if os.path.isfile(outputfn):
-                print "Removing incomplete download"
-                os.remove(outputfn)
-
-def getlastrun():
-    '''Returns the time the program was last run as unix epoch
-    '''
-    lastrun = 0
-    if os.path.isfile(LAST_SCRAPE_FILE):
-        lastrun = os.stat(LAST_SCRAPE_FILE).st_mtime
-    return lastrun
-
-def savelastrun():
-    '''Saves the last time of a complete run.
-    '''
-    open(LAST_SCRAPE_FILE, 'w').write('last iView scrape')
-    
-def main():
-    os.chdir(sys.path[0])
-    
-    # Sleep time for first run, check last run time
-    sleeptime = 5 # Sleep for 5 seconds after startup to allow the PC to get a network connection
-    lastscrape = getlastrun()
-    now = time.time()
-    if (now - lastscrape) < RUN_FREQ:
-        sleeptime = lastscrape + RUN_FREQ - now
-    print now, sleeptime, lastscrape
-    while 1:
-        nextruntime = time.asctime(time.localtime(time.time() + sleeptime))
-        print "Sleeping %d seconds. Will run at: %s" % (sleeptime, nextruntime)
-        time.sleep(sleeptime)
-        
-        # Get the show list we want to watch...
-        srclist = showlist()
-
-        # Init the lib
-        iview.comm.get_config()
-        
-        # See what we can find in the index...
-        index = iview.comm.get_index()
-
-        # Get the ID's of the shows we are after
-        # Then update the list of episodes
-        todownload = []
-        for series in index:
-            if fnmatchpatterns(series['title'], srclist):
-                print 'Found series: %s' % (series['title'])
-                seriesid = series['id']
-                seriesshows = iview.comm.get_series_items(seriesid)
-                for showinfo in seriesshows:
-                    showinfo['seriesid'] = series['id']
-                    showinfo['seriestitle'] = series['title']
-                    todownload.append(showinfo)
-
-        i = 1
-        for show in todownload:
-            print "Processing show %d of %d" % (i, len(todownload))
-            download(show)
-            i += 1
-        
-        savelastrun()
-        sleeptime = RUN_FREQ
-        print
-        if DOWNLOADED:
-            print "This session have downloaded:"
-            for show in DOWNLOADED:
-                print "    %s" % (show)
-
-def test():
-    sampleshow = {'aacaot': '2.00',
- 'audiochannels': '2.00',
- 'audiocodecid': 'mp4a',
- 'audiosamplerate': '44100.00',
- 'avclevel': '30.00',
- 'avcprofile': '77.00',
- 'date': '2010-03-01 00:00:00',
- 'description': "(Preview) Award-winning actor Catherine Tate returns as the Doctor's new companion, returning to her role as Donna Noble who featured in the 2006 Christmas special The Runaway Bride. Now reunited, the Doctor and Donna travel back to Pompeii in AD 79 on the eve of the infamous eruption where people areslowly turning to stone.\n",
- 'duration': '139.18',
- 'filename': 'Doctor Who Series 4.mp4',
- 'height': '360.00',
- 'home': 'http://shop.abc.net.au/browse/product.asp?productid=996433',
- 'id': '9998147',
- 'livestream': '',
- 'moovposition': '9993357.00',
- 'seriesid': '9998147',
- 'seriestitle': 'Doctor Who Series 4',
- 'thumb': 'http://shop.abc.net.au/multimediaitems/images/iview/drwho_4_04.jpg',
- 'title': 'Doctor Who Series 4',
- 'url': 'abcshop/drwho_4_04.mp4',
- 'videocodecid': 'avc1',
- 'videoframerate': '25.00',
- 'width': '640.00'}
-    postdownload(sampleshow)
- 
-
-if __name__ == '__main__':
-    main()
-    #test()

python-iview/daily_scrape.py

+
+'''
+To view a list of all shows, run:
+    H:\iViewNapper\python-iview\iview-cli.py -i
+Then add the desired show (as a fnmatch) to shows.txt.
+'''
+
+SHOW_LIST_FILE = 'shows.txt'
+LAST_SCRAPE_FILE = 'lastscrape.dat'
+BASE_OUTPUT_PATH = r'h:\iview'
+TRANSCODED_OUTPUT_PATH = r'n:\media\iphone'
+RUN_FREQ = 8 * 60 *60  # Frequency to run - every 8 hours. If RUN_ONCE is True, the most frequently it will run.
+RUN_ONCE = False    # If True, will only run once then exit
+
+# --------------- There is no config required below this line ---------------
+
+import iview.config
+import iview.fetch
+import iview.comm
+import fnmatch
+import os
+import sys
+import time
+import handbrake
+import re
+import urllib
+import shutil
+
+INVALID_FILENAME_CHARACTERS = r'\/:?*"<>|'
+SCREEN_WIDTH = 79
+
+LOGLEVEL = 9
+
+DOWNLOADED = []
+
+def log(msg, level=5):
+    if level <= LOGLEVEL:
+        msg = str(msg)
+        print msg
+
+def showlist():
+    '''Returns the names of the shows to look for.
+    '''
+    f = open(SHOW_LIST_FILE)
+    ret = []
+    for show in f:
+        show = show.strip()
+        if show:
+            if show.startswith('#'):
+                continue
+            ret.append(show)
+    return ret
+
+def fnmatchpatterns(name, patterns):
+    for pattern in patterns:
+        if fnmatch.fnmatch(name, pattern):
+            return True
+    return False
+
+def progressbar(pcg):
+    txtpcg = ' %3.1f%%' % (pcg*100.0)
+    done = int((SCREEN_WIDTH-2-len(txtpcg))*pcg)
+    todo = SCREEN_WIDTH-2-done-1-len(txtpcg)
+    sys.stdout.write('|%s>%s|%s\r' % ('='*done, ' '*todo, txtpcg))
+
+def validfilename(fn):
+    '''Replaces any invalid filname character with an _. Includes / and \.
+    '''
+    for c in INVALID_FILENAME_CHARACTERS:
+        if c in fn:
+            fn = fn.replace(c, '_')
+    return fn 
+
+def showfilename(show):
+    '''
+    Gives the file name (less extension) that a given show should have.
+
+    Extracts season and episode info etc.
+    '''
+    title = show['title']
+    match = re.match('(.*)Episode (\d+)(.*)', title)
+    print '*************************'
+    print 'show dict:', show
+    if not match:
+        print 'Unknown title pattern: %r' % (title)
+        # Use old naming method
+        fn = show['title']
+    else:
+        tmpseriesname, episodenum, episode = match.groups()
+        episode = episode.strip()
+        if episode:
+            episode = ' - %s' % (episode)
+        match = re.match(r'.*_\d{2}_(\d{2})_(\d{2})\..*', show['url'])
+        seriesnum, episodenum = match.groups()
+        fn = '%s - s%se%s%s' % (showfolder(show), seriesnum, 
+                episodenum, episode)
+    fn = validfilename(fn)
+    return fn
+
+def getoutputpath(show):
+    '''Given a show dict, return the path the final file should be saved to.
+    '''
+    fn = showfilename(show) + os.path.splitext(show['url'])[1]
+    outputdir = os.path.join(BASE_OUTPUT_PATH, showfolder(show))
+    if not os.path.isdir(outputdir):
+        os.mkdir(outputdir)
+    ret = os.path.join(outputdir, fn)
+    print "Using show name: %s" % (ret)
+    return ret
+
+def getshowart(show):
+    '''
+    Gets the thumbnail JPEG if it does not already exist and puts it into 
+    the metadata folder in the show folder.
+    '''
+    if not show['thumb'] or show['thumb'] == '(none)':
+        # Don't bother doing anything if there is no thumbnail
+        return
+    outputdir = os.path.join(BASE_OUTPUT_PATH, showfolder(show), 'metadata')
+    artfn = os.path.join(outputdir, showfilename(show) + '.jpg')
+    print "Art path: %r" % (artfn)
+    if os.path.isfile(artfn):
+        # Already got it, return 
+        return
+    print "Getting show art: %r" % (artfn)
+    # Make sure the metadata folder is there
+    if not os.path.isdir(outputdir):
+        os.mkdir(outputdir)
+    urlf = urllib.urlopen(show['thumb'])
+    outf = open(artfn, 'wb')
+    outf.write(urlf.read())
+    urlf.close()
+    outf.close()
+
+    # Copy to the TRANSCODED_OUTPUT_PATH
+    outputdir2 = os.path.join(TRANSCODED_OUTPUT_PATH, showfolder(show), 'metadata')
+    artfn2 = os.path.join(outputdir2, showfilename(show) + '.jpg')
+    if not os.path.isdir(outputdir2):
+        os.mkdir(outputdir2)
+    shutil.copy(artfn, artfn2)
+
+def showfolder(show):
+    '''
+    Returns the name of the folder a show should be put into.
+
+    Currently this just removes the "Series xx" if it exists from the 
+    seriestitle. Then passes it through validfilename()
+    '''
+    seriestitle = show['seriestitle']
+    seriesmatch = re.match('(.*)Series \d+', seriestitle)
+    if seriesmatch:
+        seriestitle = seriesmatch.group(1).strip()
+    return validfilename(seriestitle)
+
+def postdownload(show):
+    '''Called when a show download is complete.
+    '''
+    srcpath = show['filename']
+    
+    outputdir = os.path.join(TRANSCODED_OUTPUT_PATH, showfolder(show))
+    outputpath = os.path.join(
+        outputdir,
+        validfilename(os.path.basename(show['filename']))
+        )
+
+    log('Output path: %s' % (outputpath))
+    if not os.path.isdir(outputdir):
+        os.mkdir(outputdir)
+
+    handbrake.iphoneconvert(srcpath, outputpath)
+
+def download(show):
+    global DOWNLOADED
+    #outputfn = show['title'] + os.path.splitext(show['url'])[1]
+    outputfn = getoutputpath(show)
+    print "Getting: %s" % (outputfn)
+
+    # This assumes if we have the file at all - it is a complete download
+    if os.path.isfile(outputfn):
+        print "Already downloaded: %s" % (outputfn)
+    else:
+        print "Downloading: %s from %s" % (outputfn, show['url'])
+        # Note: fetch.py has been modified to return a subprocess object
+        p = iview.fetch.fetch_program(show['url'], dest_file=outputfn)
+        show['filename'] = outputfn 
+        loopstate = 0
+        f = open('sample', 'w')
+        while 1:
+            line = ''
+            while 1:
+                c = p.stdout.read(1)
+                if not c or c == '\r':
+                    break
+                line += c
+            if not line:
+                break
+            f.write(line)
+            f.write('\n---------------\n')
+            line = line.strip()
+            if line == 'INFO: Metadata:':
+                loopstate = 1
+            elif loopstate == 1:
+                parts = line.split()
+                if len(parts) != 3:
+                    loopstate = 2
+                    continue
+                show[parts[1]] = parts[2]
+            elif loopstate == 2:
+                match = re.search('\((\d+\.\d+)%\)', line)
+                if match:
+                    progressbar(float(match.group(1))/100)
+        f.close()
+        p.wait()
+        success = (p.returncode == 0)
+        if success:
+            DOWNLOADED.append(show['title'])
+            print "Successfully downloaded %s" % (show['title'])
+            postdownload(show)
+        else:
+            print "Error downloading."
+            if os.path.isfile(outputfn):
+                print "Removing incomplete download"
+                os.remove(outputfn)
+
+def getlastrun():
+    '''Returns the time the program was last run as unix epoch
+    '''
+    lastrun = 0
+    if os.path.isfile(LAST_SCRAPE_FILE):
+        lastrun = os.stat(LAST_SCRAPE_FILE).st_mtime
+    return lastrun
+
+def savelastrun():
+    '''Saves the last time of a complete run.
+    '''
+    open(LAST_SCRAPE_FILE, 'w').write('last iView scrape')
+    
+def main():
+    os.chdir(sys.path[0])
+    
+    # Sleep time for first run, check last run time
+    sleeptime = 5 # Sleep for 5 seconds after startup to allow the PC to get a network connection
+    lastscrape = getlastrun()
+    now = time.time()
+    if (now - lastscrape) < RUN_FREQ:
+        sleeptime = lastscrape + RUN_FREQ - now
+    print now, sleeptime, lastscrape
+    while 1:
+        nextruntime = time.asctime(time.localtime(time.time() + sleeptime))
+        print "Sleeping %d seconds. Will run at: %s" % (sleeptime, nextruntime)
+        time.sleep(sleeptime)
+        
+        # Get the show list we want to watch...
+        srclist = showlist()
+
+        # Init the lib
+        iview.comm.get_config()
+        
+        # See what we can find in the index...
+        index = iview.comm.get_index()
+
+        # Get the ID's of the shows we are after
+        # Then update the list of episodes
+        todownload = []
+        for series in index:
+            if fnmatchpatterns(series['title'], srclist):
+                print 'Found series: %s' % (series['title'])
+                seriesid = series['id']
+                seriesshows = iview.comm.get_series_items(seriesid)
+                for showinfo in seriesshows:
+                    showinfo['seriesid'] = series['id']
+                    showinfo['seriestitle'] = series['title']
+                    todownload.append(showinfo)
+
+        i = 1
+        for show in todownload:
+            print "Processing show %d of %d" % (i, len(todownload))
+            download(show)
+            getshowart(show)
+            i += 1
+        
+        savelastrun()
+        sleeptime = RUN_FREQ
+        print
+        if DOWNLOADED:
+            print "This session have downloaded:"
+            for show in DOWNLOADED:
+                print "    %s" % (show)
+
+def test():
+    sampleshow = {'aacaot': '2.00',
+ 'audiochannels': '2.00',
+ 'audiocodecid': 'mp4a',
+ 'audiosamplerate': '44100.00',
+ 'avclevel': '30.00',
+ 'avcprofile': '77.00',
+ 'date': '2010-03-01 00:00:00',
+ 'description': "(Preview) Award-winning actor Catherine Tate returns as the Doctor's new companion, returning to her role as Donna Noble who featured in the 2006 Christmas special The Runaway Bride. Now reunited, the Doctor and Donna travel back to Pompeii in AD 79 on the eve of the infamous eruption where people areslowly turning to stone.\n",
+ 'duration': '139.18',
+ 'filename': 'Doctor Who Series 4.mp4',
+ 'height': '360.00',
+ 'home': 'http://shop.abc.net.au/browse/product.asp?productid=996433',
+ 'id': '9998147',
+ 'livestream': '',
+ 'moovposition': '9993357.00',
+ 'seriesid': '9998147',
+ 'seriestitle': 'Doctor Who Series 4',
+ 'thumb': 'http://shop.abc.net.au/multimediaitems/images/iview/drwho_4_04.jpg',
+ 'title': 'Doctor Who Series 4',
+ 'url': 'abcshop/drwho_4_04.mp4',
+ 'videocodecid': 'avc1',
+ 'videoframerate': '25.00',
+ 'width': '640.00'}
+    postdownload(sampleshow)
+ 
+
+if __name__ == '__main__':
+    main()
+    #test()