uche / scatter-share (http://uche.ogbuji.net/)
Broad bucket for sharing various, general utilities and code samples
Clone this repository (size: 14.4 KB): HTTPS / SSH
$ hg clone http://bitbucket.org/uche/scatter-share/
| commit 1: | 3caffab99ea0 |
| parent 0: | 01c0b007c22f |
| branch: | default |
Add updated tool for Recovering text areas from Firefox session information
Changed (Δ2.8 KB):
raw changeset »
firefox_session_textareas.py (65 lines added, 0 lines removed)
Up to file-list firefox_session_textareas.py:
1 |
# encoding: utf-8 |
|
2 |
''' |
|
3 |
firefox_session_textareas.py - Recover text area content from Firefox session files |
|
4 |
||
5 |
Print out text area content cached by Firefox in the saved session file. |
|
6 |
||
7 |
Sample usage: |
|
8 |
||
9 |
python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js" |
|
10 |
-- Print out all text area content found (including the corresponding page URL) |
|
11 |
||
12 |
python firefox_session_textareas.py "~/.firefox/Profiles/*/sessionstore.js" spam |
|
13 |
-- Print out all only text areas found to contain the string "spam" |
|
14 |
(or whose corresponding page URL contains that string) |
|
15 |
''' |
|
16 |
||
17 |
import urllib |
|
18 |
import sys |
|
19 |
import re |
|
20 |
||
21 |
TEXTAREA_PAT_3_0 = re.compile(r'{url:"([^"]*)"[^}]*}[^}]*text:"#editor-textarea=([^"]*)"') |
|
22 |
TEXTAREA_PAT_3_5 = re.compile(r'{"url":"([^"]*)"[^}]*?,"title":"([^"]*)".*?,"formdata":({("([^"]*)":("((\\"|[^"])*)"|\d+|true|false),?)*})') |
|
23 |
FORMDATA_PAT_3_5 = re.compile(r'("([^"]*)":("((\\"|[^"])*)"))') |
|
24 |
HEADING_TPL = '------------ firefox_session_textareas.py - %s ------------' |
|
25 |
||
26 |
def textarea_content(s): |
|
27 |
return urllib.unquote(s) |
|
28 |
||
29 |
#FIXME: Use optparse, etc to clean up command line handling |
|
30 |
sessioninfo = open(sys.argv[1]).read() |
|
31 |
try: |
|
32 |
search = sys.argv[2] |
|
33 |
except IndexError: |
|
34 |
search = None |
|
35 |
||
36 |
||
37 |
#First check the Firefox 3 sessionstore.js format |
|
38 |
for match in TEXTAREA_PAT_3_0.finditer(sessioninfo): |
|
39 |
if search is None or search in match.group(0): |
|
40 |
print HEADING_TPL%match.group(1) |
|
41 |
||
42 |
print textarea_content(match.group(2)) |
|
43 |
||
44 |
||
45 |
#Now check the Firefox 3.5 sessionstore.js format |
|
46 |
for match in TEXTAREA_PAT_3_5.finditer(sessioninfo): |
|
47 |
#{"url":"http://trac.xml3k.org/newticket","title":"New Ticket – xml3k – Trac","ID":2969506219,"scroll":"0,121","formdata":{"#proj-search":"","#field-summary":"Fix-up of Amara bindery nodes after xml_namespace update","#field-reporter":"http://uche.myopenid.com/","#field-description":"Amara bindery nodes are bound to their parents by connecting (via a descriptor) a Python attribute name to a namespace/local name combination.\u000a\u000aGiven the following document:\u000a\u000a{{{\u000a<top xmlns:a=\"urn:bogus:a\">\u000a <a:monty/>\u000a</top>\u000a}}} |
|
48 |
if search is None or search in match.group(0): |
|
49 |
#print match.group(0) |
|
50 |
print HEADING_TPL%match.group(1) |
|
51 |
||
52 |
print match.group(2) |
|
53 |
for datamatch in FORMDATA_PAT_3_5.finditer(match.group(3)): |
|
54 |
#Were we to convert JS true to Python True, and prepend u to strings, we couild probabaly use eval |
|
55 |
print "Field name:", datamatch.group(2) |
|
56 |
try: |
|
57 |
print "Field value\n", eval('u'+datamatch.group(3)).encode('utf-8') |
|
58 |
except Exception, e: |
|
59 |
print "Unable to parse field value" |
|
60 |
print e |
|
61 |
||
62 |
||
63 |
print "Raw field value\n", datamatch.group(3) |
|
64 |
||
65 |
