Commits

Uche Ogbuji  committed 3caffab

Add updated tool for Recovering text areas from Firefox session information

  • Participants
  • Parent commits 01c0b00

Comments (0)

Files changed (1)

File firefox_session_textareas.py

+# encoding: utf-8
+'''
+firefox_session_textareas.py - Recover text area content from Firefox session files
+
+Print out text area content cached by Firefox in the saved session file.
+
+Sample usage:
+
+python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js"
+    -- Print out all text area content found (including the corresponding page URL)
+
+python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js" spam
+    -- Print out all only text areas found to contain the string "spam"
+       (or whose corresponding page URL contains that string)
+'''
+
+import urllib
+import sys
+import re
+
+TEXTAREA_PAT_3_0 = re.compile(r'{url:"([^"]*)"[^}]*}[^}]*text:"#editor-textarea=([^"]*)"')
+TEXTAREA_PAT_3_5 = re.compile(r'{"url":"([^"]*)"[^}]*?,"title":"([^"]*)".*?,"formdata":({("([^"]*)":("((\\"|[^"])*)"|\d+|true|false),?)*})')
+FORMDATA_PAT_3_5 = re.compile(r'("([^"]*)":("((\\"|[^"])*)"))')
+HEADING_TPL = '------------ firefox_session_textareas.py - %s ------------'
+
+def textarea_content(s):
+    return urllib.unquote(s)
+
+#FIXME: Use optparse, etc to clean up command line handling
+sessioninfo = open(sys.argv[1]).read()
+try:
+    search = sys.argv[2]
+except IndexError:
+    search = None
+
+
+#First check the Firefox 3 sessionstore.js format
+for match in TEXTAREA_PAT_3_0.finditer(sessioninfo):
+    if search is None or search in match.group(0):
+        print HEADING_TPL%match.group(1)
+        print
+        print textarea_content(match.group(2))
+        print
+
+#Now check the Firefox 3.5 sessionstore.js format
+for match in TEXTAREA_PAT_3_5.finditer(sessioninfo):
+    #{"url":"http://trac.xml3k.org/newticket","title":"New Ticket – xml3k – Trac","ID":2969506219,"scroll":"0,121","formdata":{"#proj-search":"","#field-summary":"Fix-up of Amara bindery nodes after xml_namespace update","#field-reporter":"http://uche.myopenid.com/","#field-description":"Amara bindery nodes are bound to their parents by connecting (via a descriptor) a Python attribute name to a namespace/local name combination.\u000a\u000aGiven the following document:\u000a\u000a{{{\u000a<top xmlns:a=\"urn:bogus:a\">\u000a  <a:monty/>\u000a</top>\u000a}}}
+    if search is None or search in match.group(0):
+        #print match.group(0)
+        print HEADING_TPL%match.group(1)
+        print
+        print match.group(2)
+        for datamatch in FORMDATA_PAT_3_5.finditer(match.group(3)):
+            #Were we to convert JS true to Python True, and prepend u to strings, we couild probabaly use eval
+            print "Field name:", datamatch.group(2)
+            try:
+                print "Field value\n", eval('u'+datamatch.group(3)).encode('utf-8')
+            except Exception, e:
+                print "Unable to parse field value"
+                print e
+                print
+                print
+                print "Raw field value\n", datamatch.group(3)
+            print
+