1. Andy Mikhailenko
  2. scripts

Source

scripts / chromium_tool.py

#!/usr/bin/env python

# 2010 (c) Andrey Mikhaylenko
#
# Inspiration:
# http://stackoverflow.com/questions/2141537/convert-datetime-fields-in-chrome-history-file-sqlite-to-readable-format

import os
import datetime
import sqlite3
from itertools import izip
from argh import *
from dark import *
from docu import *

DB_SETTINGS = {'backend': 'docu.ext.shelve_db', 'path': 'history.shelve'}


SQL_TIME = 'SELECT time FROM info'
SQL_URL  = 'SELECT c0url FROM pages_content'

class Visit(Document):
    structure = {'datetime': datetime.datetime, 'url': unicode}
    use_dot_notation = True


def date_from_webkit(webkit_timestamp):
    """Normalizes the weird "Webkit timestamp".
    """
    epoch_start = datetime.datetime(1601,1,1)
    delta = datetime.timedelta(microseconds=int(webkit_timestamp))
    return epoch_start + delta

def statistics(args):
    db = get_db(DB_SETTINGS)
    visits = Visit.objects(db).order_by('datetime')
    print '{0} visits since {1}'.format(visits.count(), visits[0].datetime)
    cast_cons(visits, ['datetime__year', 'datetime__month'])

@arg('paths', nargs='+')
def import_chrome_history(args):
    """Imports Chromium history.

    Usage::

        $ chromium-tool.py ~/.config/chromium/Default/History\ Index* > hist.txt

    """
    db = get_db(DB_SETTINGS)
    try:
        latest_visit = Visit.objects(db).order_by('datetime', reverse=True)[0]
    except IndexError:
        latest_visit = None

    for path in args.paths:
        print 'Importing "{path}"...'.format(**locals())
        assert os.path.exists(path)
        c = sqlite3.connect(path)
        times = (row[0] for row in c.execute(SQL_TIME))
        urls  = (row[0] for row in c.execute(SQL_URL))
        pairs = izip(times, urls)
        for raw_timestamp, url in pairs:
            dt = date_from_webkit(raw_timestamp)
            if latest_visit and dt <= latest_visit.datetime:
                continue
            if url.startswith('data:'):
                continue
            Visit(datetime=dt, url=url).save(db)
            #db.get_or_create(Visit, datetime=dt, url=url)
        c.close()

if __name__=='__main__':
    p = ArghParser()
    p.add_commands([statistics, import_chrome_history])
    p.dispatch()