Commits

Murty Rompalli  committed ea708f0

Combined mirror-sms.py and mirror-nonsms.py into a single script

  • Participants
  • Parent commits 7e0273b

Comments (0)

Files changed (4)

 TODO:
 
-1. Combine mirror-sms.py and mirror-nonsms.py
-2. Download contacts and current account configuration
-3. Create voicemail.html and recorded.html containinng all transcribed lines
+1. Download contacts and current account configuration
+2. Create voicemail.html and recorded.html containinng all transcribed lines
    such that clicking on transcription plays the correct mp3 file
-4. Create sms.html that shows all SMS conversations. Each conversation thread is enclosed in a rectangle.
+3. Create sms.html that shows all SMS conversations. Each conversation thread is enclosed in a rectangle.
+4. Add support to resume downloading from the first non-downloaded message, in case of interrupted download

File mirror-murty.py

+#!/usr/bin/env python
+# (C) Murty Rompalli and others in AUTHORS file
+
+# Based on an example supplied by Arno Hautala in his software at:
+# https://bitbucket.org/fracai/pygooglevoice
+
+# NOTE: Set download_type below:
+# 'all': Download ALL messages
+# 'inbox': Download messages in inbox
+
+download_type = 'all'
+#download_type = 'inbox'
+
+# Should we archive messages in inbox
+archive = True
+
+# Number of login attempts
+tries = 4
+
+# Download directory
+base = 'downloaded/'
+datefile = base + 'STATUS.' + download_type
+
+from googlevoice import Voice
+from googlevoice.util import LoginError
+import re
+import sys
+import json
+import time
+import os
+import os.path
+import BeautifulSoup
+import glob
+import csv
+
+feeds = ['received','placed','missed','recorded','voicemail','sms']
+mp3Feeds = ['recorded','voicemail']
+
+def fixmessage(msg,feed):
+    if 'relativeStartTime' in msg:
+        msg['relativeStartTime'] = ''
+    if 'children' in msg:
+        for child in msg['children']:
+            if 'relativeStartTime' in child:
+                child['relativeStartTime'] = ''
+            if 'fromName' not in child or not child['fromName']:
+                if feed == 'placed':
+                    child['fromName'] = 'Me'
+                elif feed == 'received' or feed == 'missed' or feed == 'recorded' or feed == 'voicemail':
+                    if 'phoneNumber' in child and child['phoneNumber'] and re.search('\S',child['phoneNumber']) and not re.match('Unknown',child['phoneNumber'],re.I):
+                        child['fromName'] = child['phoneNumber']
+                    else:
+                        child['fromName'] = 'Unknown'
+    return msg
+
+def getfeedname(msg):
+    for feed in feeds:
+        if feed in msg['labels']:
+            return feed
+    return ''
+
+def getNames(html):
+    Names = {}
+    tree = BeautifulSoup.BeautifulSoup(html)
+    convBlock = tree.findAll('div', attrs={'id' : True}, recursive=False)
+    for conv in convBlock:
+        fromnames = []
+        rows = conv.findAll(attrs={'class' : 'gc-message-sms-from'})
+        for row in rows:
+            fromname = ' '.join(row.findAll(text=True)).strip().replace(':','')
+            fromnames.append(fromname)
+        if fromnames:
+            Names[conv['id']] = fromnames
+    return Names
+
+def getmsgs(page):
+    getattr(voice,download_type)(terms={ 'page':('p%d' % page) })
+    value = getattr(voice,download_type)
+    jsonData = value.data
+    if 'messages' not in jsonData or len(jsonData['messages']) < 1:
+        return []
+    if not jsonData['messages'].values():
+        return []
+    pagenames = getNames(value.html)
+    pageconversations = []
+    for message in sorted(jsonData['messages'].values(),key=lambda k: k['startTime'],reverse=True):
+        getattr(voice,'message')(terms={ 'messageId':message['id'] })
+        pageconversations += getattr(voice,'message').data['messages'].values()
+    for conv in pageconversations:
+        key = conv['id']
+        if 'children' not in conv:
+            continue
+        if key not in pagenames:
+            continue
+        if 'relativeStartTime' in conv:
+            conv['relativeStartTime'] = ''
+        c = 0
+        maxc = len(pagenames[key])
+        for conversation in conv['children']:
+            if 'relativeStartTime' in conversation:
+                conversation['relativeStartTime'] = ''
+            if c == maxc:
+                break
+            if not conversation['fromName']:
+                if pagenames[key][c]:
+                    conversation['fromName'] = pagenames[key][c]
+            c += 1
+    return pageconversations
+ 
+def getnewdate(msgs):
+    for message in msgs:
+        if 'startTime' not in message:
+            continue
+        if message['startTime']:
+            return message['startTime']
+    return '0'
+
+def getolddate():
+    if not os.path.isfile(datefile):
+        return 0
+    try:
+        f = open(datefile)
+    except:
+        raise 
+    d = f.read()
+    d = filter(type(d).isdigit,d)
+    f.close()
+    return int(d)
+
+def setolddate(d):
+    try:
+        f = open(datefile,'wb')
+    except:
+        raise
+    f.write(d)
+    f.close()
+
+def saveMessage(key,msg,feed):
+    jsondir = base + feed + '/json/'
+    csvdir = base + feed + '/csv/'
+    head = msg['phoneNumber'].replace('+','')
+    if not head or re.match('Unknown',head,re.I):
+        head = 'unknown'
+    timestamp = '.' + time.strftime("%Y%m%d.%H%M%S",time.localtime(int(msg['startTime'])/1000))
+    tail = '.' + key.lower()
+    jsonfile = jsondir + head + timestamp + tail + '.json'
+    csvfile = csvdir + head + timestamp + tail + '.csv'
+    print "%-9s %s" % (feed,head+timestamp+tail)
+    for oldfile in glob.glob(jsondir + head + '.*' + tail + '.json'):
+        if oldfile != jsonfile:
+            os.unlink(oldfile)
+    for oldfile in glob.glob(csvdir + head + '.*' + tail + '.csv'):
+        if oldfile != csvfile:
+            os.unlink(oldfile)
+    f = open(jsonfile,'wb')
+    f.write(json.dumps({key:msg},sort_keys=True,indent=4))
+    f.close()
+    f = csv.writer(open(csvfile,'wb'))
+    for i in msg['children']:
+        f.writerow([i['fromName'].encode('utf-8'),i['displayStartDateTime'].encode('utf-8'),i['message'].encode('utf-8')])
+    if feed in mp3Feeds:
+        mp3dir = base + feed + '/mp3'
+        mp3file = head + timestamp + tail + '.mp3'
+        voice.download(key,mp3dir,mp3file)
+    return 1
+
+def ensureDir(path):
+    if not os.path.isdir(path):
+        try:
+            os.makedirs(path)
+        except OSError,e:
+            if e.errno != errno.EEXIST:
+                raise
+
+# Main function
+
+for feed in feeds:
+    ensureDir(base+feed+'/csv')
+    ensureDir(base+feed+'/json')
+
+for feed in mp3Feeds:
+    ensureDir(base+feed+'/mp3')
+
+voice = Voice()
+
+for i in xrange(tries):
+    time.sleep(i)
+    print 'Login attempt:',i+1
+    try:
+        voice.login()
+    except LoginError:
+        print '\t ... Failed'
+    else:
+        print '\t ... Success\n'
+        break
+else:
+    raise
+
+newdate = '0'
+count = 0
+keys = []
+stop = 0
+page = 1
+
+olddate = getolddate()
+msgs = getmsgs(page)
+newdate = getnewdate(msgs)
+
+while msgs and not stop:
+    print '----- Page',page,'-----'
+    for message in msgs:
+        if 'startTime' not in message or int(message['startTime']) > olddate:
+            key = message['id']
+            feed = getfeedname(message)
+            if 'inbox' in message['labels']:
+                keys.append(key)
+            if feed:
+                msg = fixmessage(message,feed)
+                if saveMessage(key,msg,feed):
+                    count += 1
+        else:
+            stop = 1
+            break
+    else:
+        page += 1
+        msgs = getmsgs(page)
+
+setolddate(newdate)
+print '\n',count,'messages written to directory',base
+
+if archive:
+    for key in keys:
+        voice.archive(key)
+    print len(keys),'messages in inbox are now archived'

File mirror-nonsms.py

-#!/usr/bin/env python
-# (C) Murty Rompalli and others in AUTHORS file
-
-# Based on an example supplied by Arno Hautala in his software at:
-# https://bitbucket.org/fracai/pygooglevoice
-
-# NOTE: Set download_type below:
-# 'all': Download ALL non-SMS messages and archive voicemail and recordings
-# 'inbox': Download voicemail and recordedings in inbox and archive them
-
-download_type = 'all'
-#download_type = 'inbox'
-
-# Should we archive messages
-archive = True
-
-# Download directory
-base = 'downloaded'
-datefile = base + '/STATUS.' + download_type
-
-from googlevoice import Voice
-import re
-import sys
-import json
-import time
-import os
-import os.path
-
-feeds = ['received','placed','missed','recorded','voicemail']
-mp3Feeds = ['recorded','voicemail']
-
-def fixmessage(msg,feed):
-    if 'relativeStartTime' in msg:
-        msg['relativeStartTime'] = ''
-    if 'children' in msg:
-        for child in msg['children']:
-            if 'relativeStartTime' in child:
-                child['relativeStartTime'] = ''
-            if 'fromName' not in child or not child['fromName']:
-                if feed == 'placed':
-                    child['fromName'] = 'Me'
-                elif feed == 'received' or feed == 'missed' or feed == 'recorded' or feed == 'voicemail':
-                    if 'phoneNumber' in child and child['phoneNumber'] and re.search('\S',child['phoneNumber']) and not re.match('Unknown',child['phoneNumber'],re.I):
-                        child['fromName'] = child['phoneNumber']
-                    else:
-                        child['fromName'] = 'Unknown'
-    return msg
-
-def getfeedname(msg):
-    for feed in feeds:
-        if feed in msg['labels']:
-            return feed
-    return ''
-
-def getMessagesFromIDs(ids):
-    for id in ids:
-        getattr(voice,'message')(terms={ 'messageId':id })
-        message = getattr(voice,'message').data['messages']
-        for key in message:
-            value = message[key]
-            feed = getfeedname(value)
-            value = fixmessage(value,feed)
-            if feed:
-                saveMessage(key,value,feed)
-                if archive:
-                    if 'inbox' in value['labels']:
-                        voice.archive(key)
-
-def getmsgs(page):
-    if page < 1:
-        return []
-    getattr(voice,download_type)(terms={ 'page':('p%d' % page) })
-    jsonData = getattr(voice,download_type).data
-    if 'messages' not in jsonData or len(jsonData['messages']) < 1:
-        return []
-    if not jsonData['messages'].values():
-        return []
-    return sorted(jsonData['messages'].values(),key=lambda k: k['startTime'],reverse=True)
-
-def getnewdate(msgs):
-    for message in msgs:
-        if 'startTime' not in message:
-            continue
-        if message['startTime']:
-            return message['startTime']
-    return '0'
-
-def getolddate():
-    if not os.path.isfile(datefile):
-        return 0
-    try:
-        f = open(datefile)
-    except:
-        raise 
-    d = f.read()
-    d = filter(type(d).isdigit,d)
-    f.close()
-    return int(d)
-
-def setolddate(d):
-    try:
-        f = open(datefile,'wb')
-    except:
-        raise
-    f.write(d)
-    f.close()
-
-def getMessageIDs():
-    global newdate
-    msgIDs = []
-    stop = 0
-    page = 1
-    msgs = getmsgs(page)
-    newdate = getnewdate(msgs)
-    olddate = getolddate()
-    if not msgs:
-        return msgIDs
-    while not stop:
-        for message in msgs:
-            if 'sms' not in message['labels']:
-                if 'startTime' not in message or int(message['startTime']) > olddate:
-                    msgIDs.append(message['id'])
-                else:
-                    stop = 1
-                    break
-        print 'page %d' % page
-        page += 1
-        msgs = getmsgs(page)
-        if not msgs:
-            break
-    return msgIDs
-
-def saveMessage(key,msg,feed):
-    outputDir = base + '/' + feed + '/'
-    name = msg['phoneNumber'].replace('+','')
-    if not name or re.match('Unknown',name,re.I):
-        name = 'unknown'
-    name += '.' + time.strftime("%Y%m%d.%H%M%S",time.localtime(int(msg['startTime'])/1000))
-    name += '.' + key.lower()
-    print name
-    ensureDirectory(outputDir)
-    f = open(outputDir+name+'.json','wb')
-    f.write(json.dumps({key:msg},sort_keys=True,indent=4))
-    f.close()
-    if feed in mp3Feeds:
-        voice.download(key,outputDir,name+'.mp3')
-
-def ensureDirectory(path):
-    if not os.path.isdir(path):
-        try:
-            os.makedirs(path)
-        except OSError,e:
-            if e.errno != errno.EEXIST:
-                raise
-
-voice = Voice()
-voice.login()
-newdate = '0'
-
-messageIDs = getMessageIDs()
-getMessagesFromIDs(messageIDs)
-setolddate(newdate)
-print len(messageIDs),'messages written to directory',base
-if archive:
-    print 'Any messages in inbox are now archived'

File mirror-sms.py

-#!/usr/bin/env python
-# (C) Murty Rompalli and others in AUTHORS file
-
-# Based on an example supplied by Arno Hautala in his software at:
-# https://bitbucket.org/fracai/pygooglevoice
-
-# Download sms messages and archive them if present in inbox
-#
-# NOTE: Set download_type below:
-# 'sms': Download all SMS messages and archive them. Set this for initial run
-# 'inbox': Download SMS messages in inbox only and archive them
-
-download_type = 'sms'
-#download_type = 'inbox'
-
-# Should we archive messages
-archive = True
-
-# Download directory
-outputDir = 'downloaded/sms'
-datefile = outputDir + '/STATUS.' + download_type
-
-from googlevoice import Voice
-import re
-import sys
-import json
-import time
-import os
-import os.path
-import glob
-import csv
-
-def getNames(html):
-    import BeautifulSoup
-    Names = {}
-    tree = BeautifulSoup.BeautifulSoup(html)
-    convBlock = tree.findAll('div', attrs={'id' : True}, recursive=False)
-    for conv in convBlock:
-        fromnames = []
-        rows = conv.findAll(attrs={'class' : 'gc-message-sms-from'})
-        for row in rows:
-            fromname = ' '.join(row.findAll(text=True)).strip().replace(':','')
-            fromnames.append(fromname)
-        if fromnames:
-            Names[conv['id']] = fromnames
-    return Names
-
-def ensureDirectory(path):
-    if not os.path.isdir(path):
-        try:
-            os.makedirs(path)
-        except OSError, e:
-            if e.errno != errno.EEXIST:
-                raise
-
-def getolddate():
-    if not os.path.isfile(datefile):
-        return 0
-    try:
-        f = open(datefile)
-    except:
-        raise 
-    d = f.read()
-    d = filter(type(d).isdigit,d)
-    f.close()
-    return int(d)
-
-def setolddate(d):
-    try:
-        f = open(datefile,'wb')
-    except:
-        raise
-    f.write(d)
-    f.close()
-
-jsondir = outputDir + '/json'
-csvdir = outputDir + '/csv'
-ensureDirectory(jsondir)
-ensureDirectory(csvdir)
-voice = Voice()
-voice.login()
-conversations = []
-page = 0
-newdate = '0'
-stop = 0
-olddate = getolddate()
-
-while not stop:
-    page += 1
-    getattr(voice,download_type)(terms={ 'page':('p%d' % page) })
-    value = getattr(voice,download_type)
-    jsonData = value.data
-    pagenames = getNames(value.html)
-    pageconversations = []
-    if 'messages' not in jsonData or len(jsonData['messages']) < 1:
-        break
-    for message in sorted(jsonData['messages'], key=lambda k: jsonData['messages'][k]['startTime'], reverse=True):
-        if 'sms' not in jsonData['messages'][message]['labels']:
-            continue 
-        if not int(newdate):
-            if 'startTime' in jsonData['messages'][message]:
-                newdate = jsonData['messages'][message]['startTime']
-        if 'startTime' not in jsonData['messages'][message] or int(jsonData['messages'][message]['startTime']) > olddate:    
-            getattr(voice,'message')(terms={ 'messageId':jsonData['messages'][message]['id'] })
-            pageconversations.append(getattr(voice,'message').data['messages'])
-        else:
-            stop = 1
-            break
-    for conv in pageconversations:
-        for key in conv:
-            if 'children' not in conv[key]:
-                continue
-            if key not in pagenames:
-                continue
-            if 'relativeStartTime' in conv[key]:
-                conv[key]['relativeStartTime'] = ''
-            c = 0
-            maxc = len(pagenames[key])
-            for conversation in conv[key]['children']:
-                if 'relativeStartTime' in conversation:
-                    conversation['relativeStartTime'] = ''
-                if c == maxc:
-                    break
-                if not conversation['fromName']:
-                    if pagenames[key][c]:
-                        conversation['fromName'] = pagenames[key][c]
-                c += 1
-    conversations += pageconversations
-    print 'page %d' % page
-
-for conv in conversations:
-    for key in conv:
-        head = conv[key]['phoneNumber'].replace('+','') + '.'
-        timestamp = time.strftime('%Y%m%d.%H%M%S',time.localtime(int(conv[key]['startTime'])/1000))
-        tail = '.' + key
-        jsonname = jsondir + '/' + head + timestamp + tail + '.json'
-        csvname = csvdir + '/' + head + timestamp + tail + '.csv'
-        print head + timestamp + tail
-        for oldfile in glob.glob(jsondir + '/' + head + '*' + tail + '.json'):
-            if oldfile != jsonname:
-                os.unlink(oldfile)
-        for oldfile in glob.glob(csvdir + '/' + head + '*' + tail + '.csv'):
-            if oldfile != csvname:
-                os.unlink(oldfile)
-        f = open(jsonname,'wb')
-        f.write(json.dumps(conv,sort_keys=True, indent=4))
-        f.close()
-        f = csv.writer(open(csvname,'wb'))
-        for i in conv[key]['children']:
-            f.writerow([i['fromName'].encode('utf-8'),i['displayStartDateTime'].encode('utf-8'),i['message'].encode('utf-8')])
-        if archive:
-            if 'inbox' in conv[key]['labels']:
-                voice.archive(key)
-
-setolddate(newdate)
-print len(conversations), 'sms conversations written to directory', outputDir
-if archive:
-    print 'Any sms conversations in inbox are now archived'