uche / scatter-share (http://uche.ogbuji.net/)

Broad bucket for sharing various, general utilities and code samples

Clone this repository (size: 14.4 KB): HTTPS / SSH
$ hg clone http://bitbucket.org/uche/scatter-share/
commit 1: 3caffab99ea0
parent 0: 01c0b007c22f
branch: default
Add updated tool for Recovering text areas from Firefox session information
Uche Ogbuji / uche
9 months ago

Changed (Δ2.8 KB):

raw changeset »

firefox_session_textareas.py (65 lines added, 0 lines removed)

Up to file-list firefox_session_textareas.py:

1
# encoding: utf-8
2
'''
3
firefox_session_textareas.py - Recover text area content from Firefox session files
4
5
Print out text area content cached by Firefox in the saved session file.
6
7
Sample usage:
8
9
python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js"
10
    -- Print out all text area content found (including the corresponding page URL)
11
12
python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js" spam
13
    -- Print out all only text areas found to contain the string "spam"
14
       (or whose corresponding page URL contains that string)
15
'''
16
17
import urllib
18
import sys
19
import re
20
21
TEXTAREA_PAT_3_0 = re.compile(r'{url:"([^"]*)"[^}]*}[^}]*text:"#editor-textarea=([^"]*)"')
22
TEXTAREA_PAT_3_5 = re.compile(r'{"url":"([^"]*)"[^}]*?,"title":"([^"]*)".*?,"formdata":({("([^"]*)":("((\\"|[^"])*)"|\d+|true|false),?)*})')
23
FORMDATA_PAT_3_5 = re.compile(r'("([^"]*)":("((\\"|[^"])*)"))')
24
HEADING_TPL = '------------ firefox_session_textareas.py - %s ------------'
25
26
def textarea_content(s):
27
    return urllib.unquote(s)
28
29
#FIXME: Use optparse, etc to clean up command line handling
30
sessioninfo = open(sys.argv[1]).read()
31
try:
32
    search = sys.argv[2]
33
except IndexError:
34
    search = None
35
36
37
#First check the Firefox 3 sessionstore.js format
38
for match in TEXTAREA_PAT_3_0.finditer(sessioninfo):
39
    if search is None or search in match.group(0):
40
        print HEADING_TPL%match.group(1)
41
        print
42
        print textarea_content(match.group(2))
43
        print
44
45
#Now check the Firefox 3.5 sessionstore.js format
46
for match in TEXTAREA_PAT_3_5.finditer(sessioninfo):
47
    #{"url":"http://trac.xml3k.org/newticket","title":"New Ticket – xml3k – Trac","ID":2969506219,"scroll":"0,121","formdata":{"#proj-search":"","#field-summary":"Fix-up of Amara bindery nodes after xml_namespace update","#field-reporter":"http://uche.myopenid.com/","#field-description":"Amara bindery nodes are bound to their parents by connecting (via a descriptor) a Python attribute name to a namespace/local name combination.\u000a\u000aGiven the following document:\u000a\u000a{{{\u000a<top xmlns:a=\"urn:bogus:a\">\u000a  <a:monty/>\u000a</top>\u000a}}}
48
    if search is None or search in match.group(0):
49
        #print match.group(0)
50
        print HEADING_TPL%match.group(1)
51
        print
52
        print match.group(2)
53
        for datamatch in FORMDATA_PAT_3_5.finditer(match.group(3)):
54
            #Were we to convert JS true to Python True, and prepend u to strings, we couild probabaly use eval
55
            print "Field name:", datamatch.group(2)
56
            try:
57
                print "Field value\n", eval('u'+datamatch.group(3)).encode('utf-8')
58
            except Exception, e:
59
                print "Unable to parse field value"
60
                print e
61
                print
62
                print
63
                print "Raw field value\n", datamatch.group(3)
64
            print
65