Source

ensalvage / ensalvage

Full commit
#!/usr/bin/env python

import re
from email import message_from_string
from mmap import mmap
from os.path import isdir, isfile
from os import makedirs
from itertools import count
import webbrowser
from threading import Thread

import Tkinter as tk
from tkFileDialog import askopenfilename, askdirectory
from tkMessageBox import showerror

def subfn(match):
    return match.group(1).decode("base64")

def find_size(data, start):
    for i in xrange(len(data) - start):
        if data[start + i] == chr(0):
            return i
    return -1

HEADER = '''
<html>
    <head>
        <title>Lost Emails</title>
        <style>
            tr.odd {
                background: silver;
            }
        </style>
    </head>
    <body>
        <table>
            <tr>
                <th>Date</th><th>From</th><th>Subject</th>
            </tr>
'''
TR = '''<tr class="%s">
            <td>%s</td>
            <td>%s</td>
            <td><a href="%s">%s</a></td>
        </tr>
'''
def salvage(dbfile, outdir, log):
    next = count(1).next
    emails = "%s/emails" % outdir
    if not isdir(emails):
        makedirs(emails)

    html = open("%s/index.html" % outdir, "wt")
    print >> html, HEADER

    fo = open(dbfile, "rb+")
    data = mmap(fo.fileno(), 0)
    for match in re.finditer("X-Apparently-To:", data):
        start = match.start()
        size = find_size(data, start)
        text = data[start:start+size].replace(chr(13), "\n")
        msg = message_from_string(text)
        subject = msg["Subject"]
        if not subject:
            continue
        i = next()
        subject = re.sub("=\?.*B\?([A-Za-z0-9=]+)\?=", subfn, subject)
        log("%d: %s" % (i, subject))
        fname = "%s/emails/%d.txt" % (outdir, i)
        cls = ["even", "odd"][i % 2]
        print >> html, TR % (cls, msg["Date"], msg["From"], fname, subject)
        with open(fname, "wb") as outfo:
            outfo.write(text)


    print >> html, "</table></body></html>"

    html.close()
    data.close()
    fo.close()

    log("* DONE (%d emails)" % i)

def _get(entry, func):
    def get():
        name = func()
        if not name:
            return

        entry.delete(0, tk.END)
        entry.insert(0, name)

    return get

def askfile(entry):
    return _get(entry, askopenfilename)

def askdir(entry):
    return _get(entry, askdirectory)

if __name__ == "__main__":
    root = tk.Tk()
    root.title("EnSalvage version 0.0.1")
    tk.Label(root, text="Database:").grid(row=0, sticky=tk.W)
    db = tk.Entry(root, width=40)
    db.grid(row=0, column=1, sticky=tk.W+tk.E)
    tk.Button(root, text="...", command=askfile(db)).grid(row=0, column=2)

    tk.Label(root, text="Output Directory:").grid(row=1, sticky=tk.W)
    out = tk.Entry(root, width=40)
    out.grid(row=1, column=1, sticky=tk.W+tk.E)
    tk.Button(root, text="...", command=askdir(out)).grid(row=1, column=2)
    tk.Label(root, text="Progress:").grid(row=2, sticky=tk.W)
    progress = tk.Label(root, text="", width=40, anchor=tk.W)
    progress.grid(row=2, column=1, columnspan=2, sticky=tk.W+tk.E)

    def log(message):
        try:
            progress["text"] = message
        except RuntimeError:
            pass

    def go():
        dbfile = db.get().strip()
        if not isfile(dbfile):
            showerror("File Not Found", "Can't find database - %s" % dbfile)
            return
        
        outdir = out.get().strip()
        if not isdir(outdir):
            showerror("Directory Not Found",
                      "Can't find output directory - %s" % outdir)
            return

        t = Thread(target=salvage, args=(dbfile, outdir, log))
        t.daemon = 1
        t.start()

    def view():
        webbrowser.open("%s/index.html" % out.get().strip())

    tk.Button(root, text="Salvage", command=go).grid(row=3)
    tk.Button(root, text="View", command=view).grid(row=3, column=1)
    tk.Button(root, text="Quit", command=root.quit).grid(row=3, column=2)

    root.columnconfigure(1, weight=1)
    root.mainloop()