Commits

Anonymous committed 0c0fd99

initial import into hg

  • Participants

Comments (0)

Files changed (2)

+syntax: glob
+
+*.o
+*.so
+*.a
+*.pyd
+*.pyc
+*.elc
+*~
+*.orig
+*.rej
+*.tmp
+MANIFEST
+tags
+cscope.*
+tvdb_api/**
+
+syntax: regexp
+
+.*\#.*\#$
+tvdb_bulk_update_config.*\.py

tvdb_bulk_update.py

+#! /bin/env python
+# tvdb-bulk-update.py
+
+import sys
+import os
+import shutil
+import re
+import subprocess
+import MySQLdb
+sys.path.append("tvdb_api")
+import tvdb_api
+
+
+POSTER_NONE      = 0
+POSTER_COPY_FILE = 1
+POSTER_LINK_FILE = 2
+POSTER_DOWNLOAD  = 3
+
+DEBUG   = False
+DRY_RUN = False
+
+
+# ========== USER OPTIONS ==================================
+
+MYTH_HOST   = "localhost"
+MYTH_USER   = "mythtv"
+MYTH_PASSWD = "mythtv"
+MYTH_DB     = "mythconverg"
+
+ROOTS = []
+
+NAME_PARSERS = [
+    # .../Lost/season 2/[Lost - 2x]10 Everybody Hates Hugo.avi
+    r"/(?P<series>[^/]+)/(season|volume) (?P<season>[0-9]+)[^/]*/(sd/)?([^/]+? - )?([0-9]+x)?(?P<episode>[0-9]+)[^/]+$",
+    # .../Lost - 2x04 Everybody Hates Hugo.avi
+    r"/(?P<series>[^/]+) - (?P<season>[0-9]+)x(?P<episode>[0-9]+)[^/]+$",
+    # .../Lost S02E04 Everybody Hates Hugo.avi
+    r"/(?P<series>[^/]+)( +|.)[Ss](?P<season>[0-9]+)[Ee](?P<episode>[0-9]+)[^/]+$",
+    ]
+
+TITLE_FORMATS = [
+    # standard default
+    [".", "%(seasonnumber_int)dx%(episodenumber_int)02d. %(episodename)s"],
+    ]
+    
+POSTER_DIR = "/home/mythtv/.mythtv/MythVideo/tvdb"
+
+POSTER_ACTIONS = [
+    [".", POSTER_DOWNLOAD, "%(filedir)s/folder.jpg", "%(filedir)s/../folder.jpg"],
+    ]
+
+IMG_COMPOSITE_CMD = ['composite'
+                     ,'-gravity', 'center'
+                     ,'-border', '2'
+                     ,'-bordercolor', 'gray30'
+                     ,"%(thumb)s", "%(poster)s", "%(out)s"]
+
+# allow options to be defined in a seperate file
+try:
+    execfile("tvdb_bulk_update_config.py")
+except IOError:
+    pass
+                     
+# ========== END OF USER OPTIONS ===========================
+
+
+# new Tvdb method to search for a series name
+def _search_series(self, series):
+    """
+    """
+    seriesSoup = self._getsoupsrc( self.config['url_getSeries'] % (series) )
+    allSeries = []
+    for series in seriesSoup.findAll('series'):
+        cur_name = series.find('seriesname').contents[0]
+        cur_name = self._cleanData(cur_name)
+        cur_sid = series.find('id').contents[0]
+        cur_firstaired = series.find('firstaired')
+        if cur_firstaired:
+            cur_firstaired = cur_firstaired.contents[0]
+        else:
+            cur_firstaired = "???"
+        allSeries.append({'sid':cur_sid, 'name':cur_name, 'firstaired':cur_firstaired} )
+    return allSeries
+tvdb_api.Tvdb.search_series = _search_series
+
+# new Tvdb method to search for a series and return it's sid
+def _series_by_sid(self, sid):
+    seriesid = 'sid:' + str(sid)
+    if not self.corrections.has_key(seriesid):
+        self._getShowData(sid)
+        self.corrections[seriesid] = sid
+    return self.shows[sid]
+tvdb_api.Tvdb.series_by_sid = _series_by_sid
+
+# new Tvdb method to download a banner image
+def _get_banner(self, banner):
+    import urllib
+    url = "%(base_url)s/banners/%%s" % self.config
+    url = url % banner
+    imgfile = self.cache.checkCache(url)
+    if imgfile:
+        return imgfile
+    else:
+        path = self.cache.getCachePath(url)
+        dat = urllib.urlopen(url).read()
+        target_socket = open(path, "wb+")
+        target_socket.write(dat)
+        target_socket.close()
+        return path
+tvdb_api.Tvdb.get_banner = _get_banner
+
+
+class ParseError(Exception):
+    pass
+
+
+# try cleaning up some the the junk in tvdb text (especially in overview text)
+def clean_tvdb_text(text):
+    if not text: return text
+    text = text.replace("&quot;","'").replace("\r\n", " ")
+    text = text.replace(r"\'", "'")
+    return text
+
+
+# concert unicode to proper encoding for mythtv database
+def dbencode(text):
+    if not text: return text
+    # make some obvious replacements that latin-1 encoding won't do itself
+    text = text.replace(u'\u2013', "-")
+    text = text.replace(u'\u2014', "-")
+    text = text.replace(u'\u2018', "'")
+    text = text.replace(u'\u2019', "'")
+    text = text.replace(u'\u2026', "...")
+    text = text.replace(u'\u201c', '"')
+    text = text.replace(u'\u201d', '"')
+    text = text.encode('latin-1', 'backslashreplace')
+    return text
+
+
+def prompt_selection(prompt="Enter choice", valid_options=None, default=None):
+    from pprint import pprint as pp
+    ans = None
+    while True:
+        p = prompt
+        if default: p += " [" + default + "]"
+        p += ":"
+        print p,
+#        try:
+        ans = raw_input()
+#         except KeyboardInterrupt:
+#             continue
+        if ans=="" and default: ans=default
+        if (not valid_options) or (ans in valid_options):
+            return ans
+        print "* Invalid choice *"
+
+        
+_series_sid = {}        
+def prompt_search_series(series):
+    """
+    """
+    series = series.lower()
+    term = series
+    sid = None
+    while True:
+        # check for an cached sid
+        if _series_sid.has_key(term):
+            sid = _series_sid[term]
+            break
+        # otherwise search tvdb
+        allSeries = tvdb.search_series(term)
+        # also try some common substitutions
+        if len(allSeries) == 0:
+            allSeries = tvdb.search_series(term.replace('.',' ').replace('_',' '))
+        if len(allSeries) == 0:
+            print "Series Search For '" + term + "' Failed."
+            print "  x)  to skip episode"
+            print "  X)  to skip series"
+            print "  or enter a new search term"
+            term = prompt_selection()  # try new search term
+            if term == 'x':
+                break
+            elif term == 'X':
+                _series_sid[series] = None
+                break
+            else:
+                term = term.lower()
+        else:
+            print "TVDB Series Serach Results:"
+            for i in range(len(allSeries[:6])):
+                print "  %d) %s    (started: %s)  (tvdb id: %s)" % \
+                    (i+1,
+                     allSeries[i]['name'].encode("UTF-8","ignore"),
+                     allSeries[i]['firstaired'].encode("UTF-8","ignore"),                    
+                     allSeries[i]['sid'].encode("UTF-8","ignore"))
+            print "  x)  to skip episode"
+            print "  X)  to skip series"
+            print "  or enter a new search term"
+            choice = prompt_selection(default="1")
+            if choice == 'x':
+                break
+            elif choice == 'X':
+                _series_sid[series] = None
+                break
+            elif choice in [str(x) for x in range(1,min(len(allSeries),6)+1)]:
+                sid = allSeries[int(choice)-1]['sid']
+                _series_sid[series] = sid
+                break
+            else:
+                term = choice.lower()
+    if sid:
+        return tvdb.series_by_sid(sid)
+    else:
+        return None
+
+
+def get_working_set():
+    inetref_clause = "inetref='00000000' or inetref='99999999'"
+    root_clause = " or ".join("filename like '%s%%'" % s.replace("'","''") for s in ROOTS)
+    clause = ""
+    for c in (inetref_clause, root_clause):
+        if len(c) > 0:
+            if len(clause) > 0: clause += " and "
+            clause += "(%s)" % c
+    select = "select intid,filename from videometadata where " + clause
+    cur = mythdb.cursor()
+    try:
+        cur.execute(select)
+    except MySQLdb.Error, e:
+        print "*** Error loading working set: %d: %s" % (e.args[0],e.args[1])
+        return []
+    return cur.fetchall()
+
+        
+def parse_filename(filename):    
+    for p in NAME_PARSERS:
+        m = p.search(filename)
+        if m:
+            try:
+                parts = list(m.group('series','season','episode'))
+                parts[1] = int(parts[1])
+                parts[2] = int(parts[2])                            
+            except KeyError:
+                raise ParseError("Filename parser does not include all required groups: " + p.pattern)
+            except ValueError:
+                raise ParseError("Filename parser matches non-numeric season/episode: " + p.pattern)
+            return parts
+    return None
+
+
+def _find_poster_file(posters, epdata):
+    posterfile = None
+    for p in posters:            
+        try:
+            testfile = os.path.abspath(p % epdata)
+        except KeyError,e:
+            print "*** Invalid key in poster file format:", e.message
+            return None
+        except Exception, e:
+            print "*** Invalid poster file format:", e.message
+            return None
+        if os.path.exists(testfile):
+            posterfile = testfile
+            break
+    return posterfile
+    
+    
+def save_poster(intid, filename, series, epdata):
+    action = None
+    for action in POSTER_ACTIONS:
+        if action[0].search(filename):
+            action = action[1:]
+            break
+    if not action:
+        print "*** No poster action found"
+        return None
+    cmd = action[0]
+    if cmd == POSTER_NONE:
+        # do nothing
+        return None
+    elif cmd == POSTER_COPY_FILE or cmd == POSTER_LINK_FILE:
+        posterfile = _find_poster_file(action[1:], epdata)
+        if not posterfile:
+            print "No poster file not found"
+            return None
+        if cmd == POSTER_COPY_FILE:
+            # copy poster to poster dir
+            print "Copying poster:", posterfile
+            try:
+                dest = os.path.abspath( \
+                    os.path.join(POSTER_DIR,
+                                 'tvdb-'+epdata['id']+os.path.splitext(posterfile)[1]))
+                if not DRY_RUN: shutil.copy(posterfile, dest)
+                return dest
+            except IOError,e:
+                "*** Failed to copy poster:", e.message
+                return None
+        elif cmd == POSTER_LINK_FILE:
+            # db directly references poster file
+            print "Linking to poster:", posterfile
+            return posterfile
+    elif cmd == POSTER_DOWNLOAD:
+        # download episode thumbnail
+        thumbfile = epdata.get('filename')
+        if thumbfile:
+            print "Retrieving thumb:", thumbfile
+            if DRY_RUN:
+                thumbfile = "X-tvdb-dl-X"
+            else:
+                thumbfile = tvdb.get_banner(thumbfile)
+        else:
+            print "No episode thumbnail defined"
+        # optional background poster image    
+        posterfile = None
+        if len(action) > 1:
+            posterfile = _find_poster_file(action[1:], epdata)
+        if (not thumbfile) and (not posterfile):
+            return None
+        dest = os.path.abspath( \
+            os.path.join(POSTER_DIR,
+                         'tvdb-' + epdata['id'] + \
+                             (os.path.splitext(posterfile or thumbfile)[1] or '.jpg')))
+        # have both create a composite image
+        if thumbfile and posterfile:
+            subs = {'thumb':thumbfile,'poster':posterfile,'out':dest}
+            cmd = [a % subs for a in IMG_COMPOSITE_CMD]
+            if DRY_RUN:
+                ret = 0
+            else:
+                ret = subprocess.call(cmd)
+            if ret != 0:
+                print "*** Image composite command failed:", ret
+                return None
+            return dest
+        # have only a single image, return it
+        else:
+            imgfile = thumbfile or posterfile
+            try:
+                if not DRY_RUN: shutil.copy(imgfile, dest)
+                return dest
+            except IOError,e:
+                "*** Failed to copy poster:", e.message
+                return None
+
+        
+def save_episode_metadata(intid, filename, posterpath, series, epdata):
+    format = None
+    for f in TITLE_FORMATS:
+        if f[0].search(filename):
+            format = f[1]
+            break
+    if not format:
+        print "*** No matching title format found"
+        return
+    try:
+        title = format % epdata
+    except KeyError,e:
+        print "*** Invalid key in title format:", e.message
+        return
+    except Exception, e:
+        print "*** Invalid title format:", e.message
+        return
+    title = clean_tvdb_text(title)
+    # carefull: only plot allows NULL
+    inetref = 'tvdb-'+epdata['id']
+    director = epdata.get('director',u'')
+    if director: director = u', '.join([d for d in director.split('|') if d])
+    plot = clean_tvdb_text(epdata.get('overview'))
+    userrating = float(epdata.get('rating',0))
+    year = epdata.get('firstaired')  # '2007-10-02'
+    if year:
+        year = int(year[:4])
+    else:
+        year = 1895
+    coverfile = posterpath or ''  # posterpath could be None
+    genres = [g for g in series.data.get('genre',"").split('|') if g]
+
+    print "Updating videometadata with new information"
+    print "Title:", title.encode('utf-8','ignore')
+    if DEBUG:
+        print "#Save Episode:", intid
+        print "  #title:", title.encode('utf-8','ignore')
+        print "  #coverfile:", coverfile
+        print "  #inetref:", inetref
+        print "  #director:", director.encode('utf-8','ignore')
+        print "  #plot:", plot.encode('utf-8','ignore')
+        print "  #userrating:", userrating
+        print "  #year:", year
+        print "  #genres:", ','.join(genres).encode('utf-8','ignore')
+        #ep: GuestStars, Writer
+    if not DRY_RUN:
+        cur = mythdb.cursor()
+        try:
+            # reset genre data for episode
+            cur.execute("delete from videometadatagenre where idvideo=%s", (intid,))
+            for g in genres:
+                g = dbencode(g)
+                # lookup id for genre, inserting if it doesn't exist
+                cur.execute("select intid from videogenre where genre=%s", (g,))
+                genreid = cur.fetchone()
+                if not genreid:
+                    cur.execute("insert into videogenre (genre) values (%s)", (g,))
+                    genreid = int(cur.lastrowid)
+                else:
+                    genreid = genreid[0]
+                cur.execute("insert into videometadatagenre (idvideo,idgenre) values (%s,%s)",
+                            (intid,genreid))
+            # update episode metadata
+            title = dbencode(title)
+            director = dbencode(director)
+            plot = dbencode(plot)
+            cur.execute("update videometadata set inetref=%s, title=%s" + \
+                            ",director=%s, plot=%s" + \
+                            ",userrating=%s, year=%s, coverfile=%s" + \
+                            " where intid=%s",
+                        (inetref,title,director,plot,userrating,year,coverfile,intid))
+        except MySQLdb.Error, e:
+            print "*** Error saving metadata: %d: %s" % (e.args[0],e.args[1])
+    
+        
+
+#=================================================
+# Main
+#=================================================
+
+mythdb = MySQLdb.connect(host=MYTH_HOST,
+                         user=MYTH_USER,
+                         passwd=MYTH_PASSWD,
+                         db=MYTH_DB)
+tvdb = tvdb_api.Tvdb()
+            
+NAME_PARSERS = [re.compile(s) for s in NAME_PARSERS]
+
+for f in TITLE_FORMATS:
+    f[0] = re.compile(f[0])
+
+for a in POSTER_ACTIONS:
+    a[0] = re.compile(a[0])
+    if (a[1] == POSTER_COPY_FILE or a[1] == POSTER_LINK_FILE) and len(a) < 3:
+        a.append("%(filedir)s/folder.jpg")
+        
+
+if DEBUG:
+    def Trace(s): print s
+else:
+    def Trace(s): pass
+
+    
+def start():
+    if DRY_RUN:
+        print "DRY RUN mode.  No filesystem or database changes will be made."
+    for (intid,filename) in get_working_set():
+        print "="*70
+        print filename
+        try:
+            parts = parse_filename(filename)
+        except ParseError,e:
+            print "*** Filename Parse Error:", e.message
+            continue
+        else:
+            if not parts:
+                print "Filename can not be parsed"
+                continue
+        (series_name,season_num,episode_num) = parts
+        print "Series:%s    season:%d    episode:%d" % tuple(parts)
+        series = prompt_search_series(series_name)
+        if not series:
+            print "skipping"
+        else:
+            try:
+                ep = series[season_num][episode_num]
+            except tvdb_api.tvdb_seasonnotfound:
+                print "Season not found:", season_num
+            except tvdb_api.tvdb_episodenotfound:
+                print "Episode not found:", episode_num
+            else:
+                epdata = dict(ep.data)
+                # create 'int' version of all episode data for use by the title
+                # formatters.
+                for k,v in ep.data.iteritems():
+                    ival = 0
+                    try:
+                        # note: int("10.0") raises an exception
+                        ival = int(float(v))
+                    except:
+                        pass
+                    epdata[k+'_int'] = ival
+                # add series info
+                for k,v in series.data.iteritems():
+                    epdata['series_' + k] = v
+                # a few more useful bits
+                epdata['filepath'] = filename
+                epdata['filedir'] = os.path.dirname(filename)
+                posterpath = save_poster(intid, filename, series, epdata)
+                save_episode_metadata(intid, filename, posterpath, series, epdata)
+                
+            
+if __name__ == '__main__':
+    start()
+