Source

scatter-share / firefox_session_textareas.py

# encoding: utf-8
'''
firefox_session_textareas.py - Recover text area content from Firefox session files

Print out text area content cached by Firefox in the saved session file.

For more information, see:

http://broadcast.oreilly.com/2009/01/recovering-text-areas-from-fir.html

Sample usage:

python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js"
    -- Print out all text area content found (including the corresponding page URL)

python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js" spam
    -- Print out all only text areas found to contain the string "spam"
       (or whose corresponding page URL contains that string)
'''

import urllib
import sys
import re

TEXTAREA_PAT_3_0 = re.compile(r'{url:"([^"]*)"[^}]*}[^}]*text:"#editor-textarea=([^"]*)"')
TEXTAREA_PAT_3_5 = re.compile(r'{"url":"([^"]*)"[^}]*?,"title":"([^"]*)".*?,"formdata":({("([^"]*)":("((\\"|[^"])*)"|\d+|true|false),?)*})')
FORMDATA_PAT_3_5 = re.compile(r'("([^"]*)":("((\\"|[^"])*)"))')
HEADING_TPL = '------------ firefox_session_textareas.py - %s ------------'

def textarea_content(s):
    return urllib.unquote(s)

#FIXME: Use optparse, etc to clean up command line handling
sessioninfo = open(sys.argv[1]).read()
try:
    search = sys.argv[2]
except IndexError:
    search = None


#First check the Firefox 3 sessionstore.js format
for match in TEXTAREA_PAT_3_0.finditer(sessioninfo):
    if search is None or search in match.group(0):
        print HEADING_TPL%match.group(1)
        print
        print textarea_content(match.group(2))
        print

#Now check the Firefox 3.5 sessionstore.js format
for match in TEXTAREA_PAT_3_5.finditer(sessioninfo):
    if search is None or search in match.group(0):
        #print match.group(0)
        print HEADING_TPL%match.group(1)
        print
        print match.group(2)
        for datamatch in FORMDATA_PAT_3_5.finditer(match.group(3)):
            #Were we to convert JS true to Python True, and prepend u to strings, we couild probabaly use eval
            print "Field name:", datamatch.group(2)
            try:
                print "Field value\n", eval('u'+datamatch.group(3)).encode('utf-8')
            except Exception, e:
                print "Unable to parse field value"
                print e
                print
                print
                print "Raw field value\n", datamatch.group(3)
            print