Source

orgtool / orgtool / ext / hamster / commands.py

Full commit
# -*- coding: utf-8 -*-
"""
Commands for importing data from `Project Hamster`_, a time tracking tool.

.. _Project Hamster: http://projecthamster.wordpress.com/

.. warning::

    We cannot guarantee accurate import/export because Hamster does not update
    facts when user edits them — it deletes the old fact and creates a new one.
    Every time. Even if you only change the description. So we only can import
    new facts, update existing ones and purge orphaned. It seems that updating
    does not make sense with regard to Hamster's way of dealing with fact IDs.

    So be careful: if you edit a fact in *orgtool* and then update it in
    Hamster, you will lose local changes after the next purge. And you have to
    purge the facts to avoid duplicates.

    So there. Not very nice. But anyway.

"""

import datetime
import dbus.exceptions
import hamster.client
import pytz
import time
from tool import app
from tool.cli import arg, confirm, CommandError
from doqu import Document
from orgtool.ext.events import Plan
from schema import HamsterFact


# Hamster stores dates in local timezone, darn.
TIMEZONE_NAME = 'UTC'


@arg('--since', help='start date (YYYY-MM-DD[-HH:MM], local time)')
@arg('--until', help='end date (YYYY-MM-DD[-HH:MM], local time)')
@arg('-d', '--dry-run', default=False, help='do not really save anything')
@arg('-z', '--timezone')
@arg('--guess-tz', default=False, help='obtain timezone from adjacent facts')
def import_facts(args):
    """Imports all available facts from Hamster. Checks uniqueness.

    :param guess-tz:

        The timezone of every imported fact is obtained from adjacent facts.
        Useful if the changeset spans previously imported data from multiple
        timezones. Very slow.

    """
    db = app.get_feature('document_storage').default_db
    ext = app.get_feature('hamster')

    def get_date_or_datetime(string):
        try:
            return datetime.datetime.strptime(string, '%Y-%m-%d-%H:%M')
        except ValueError:
            try:
                return datetime.datetime.strptime(string, '%Y-%m-%d')
            except ValueError:
                raise ValueError('Could not parse date "{0}"'.format(string))

    if args.since:
        start_time = get_date_or_datetime(args.since)
    else:
        imported = db.find(HamsterFact).order_by('date_time', reverse=True)
        if imported:
            start_time = imported[0].date_time
        else:
            start_time = datetime.date(1980,1,1)

    if args.until:
        end_time = get_date_or_datetime(args.until)
    else:
        end_time = datetime.datetime.utcnow() + datetime.timedelta(hours=12)

    # timezone: argument or config or UTC
    tz_name = args.timezone or ext.env.get('tz', TIMEZONE_NAME)
    if not args.guess_tz and not args.timezone:
        if not confirm('All facts between {start_time} and {end_time} took '
                       'place in {tz_name}'.format(**locals()), default=True):
            raise CommandError('Please specify another time zone or provide '
                               'more precise dates.')

    if args.dry_run:
        print('(dry run, no data will be actually saved.)')

    seen_cnt = imported_cnt = 0
    errors = []

    storage = hamster.client.Storage()

    for data in storage.get_facts(start_time, end_time):
        seen_cnt += 1
        if not all([data.start_time, data.end_time]):
            print 'WARNING: missing date in', data
            errors.append(data)
            time.sleep(1)
            continue
        if data.start_time < start_time or end_time < data.end_time:
            # This should have been caught by the Hamster API but it seems to
            # round date/time up to date, damnit. When you switch TZ, it's
            # really important to specify precise *time*.
            continue
        saved = _import_fact(data, dry_run=args.dry_run, tz_name=tz_name,
                             guess_tz=args.guess_tz)
        if saved:
            imported_cnt += 1

    print('Processed {seen_cnt}, imported {imported_cnt} '
          'facts.'.format(**locals()))
    if errors:
        print '!!! ERRORS:'
        print
        for data in errors:
            print '*', data.id, data.start_time, '-', data.end_time, ':', data
            print
    if args.dry_run:
        print('(None was actually saved in dry run mode.)')

@arg('-d', '--dry-run', default=False, help='do not update anything')
def update_facts(args):
    """Updates all facts previously imported from Hamster.

    Please note that this will only work for activity and category titles.
    Hamster does update records when they are changed. However, it *replaces*
    facts instead of updating them when other fields change their values, e.g.
    description or tags.
    """
    db = app.get_feature('document_storage').default_db
    imported = db.find(HamsterFact).order_by('date_time')
    start_date = imported[0].date_time if imported else datetime.date(1980,1,1)

    print('Updating all facts since {0}'.format(start_date))
    if args.dry_run:
        print('(dry run, no data will be actually updated.)')

    seen_cnt = updated_cnt = 0

    storage = hamster.client.Storage()

    for data in storage.get_facts(start_date, datetime.date.today()):
        updated = _update_fact(data, dry_run=args.dry_run)
        if updated:
            updated_cnt += 1
        seen_cnt += 1

    print('Updated {updated_cnt} facts of {seen_cnt}.'.format(**locals()))
    if args.dry_run:
        print('(None was actually saved in dry run mode.)')

@arg('-d', '--dry-run', default=False, help='do not delete anything')
def purge_facts(args):
    """Deletes all facts previously imported from Hamster and not currently
    present there.     WARNING: the command does *NOT* check if the "orphaned"
    facts are in the scope of given Hamster storage. That is, all facts
    gathered from an older storage will be DROPPED. This should be fixed later.
    """
    db = app.get_feature('document_storage').default_db
    imported = db.find(HamsterFact).order_by('date_time')
    storage = hamster.client.Storage()

    seen_cnt = deleted_cnt = 0

    print('Purging orphaned facts...')
    if args.dry_run:
        print('(dry run, no data will be actually updated.)')

    for fact in imported:
        fact_id = int(fact.x_hamster_id)
        try:
            storage.get_fact(fact_id)
        except dbus.exceptions.DBusException:
            # fact is no more in Hamster
            # XXX TODO: check if the Hamster storage is not newer than known
            #           facts!!! if it is, we'll lose older data
            print 'DEL', fact, fact.get_duration()

            # check if the fact can be safely deleted.
            # FIXME this is a quick fix for plan references. We should instead
            # check for *all* attributes (via Document instance) and copy them
            # to the newer fact; if the newer fact cannot be found (e.g.
            # date/time were updated), then just leave it as is.
            # This should *not* apply to the created/updated tmestamps.
            plan_pk = fact.get('plan')
            plan = Plan.object(db, plan_pk) if plan_pk else None
            if plan:
                # try finding exact match by date/time (exact replacement)
                same_facts = imported.where(date_time=fact.date_time)
                same_facts = [f for f in same_facts if f.pk != fact.pk]
                replacement = same_facts[0] if same_facts else None
                if replacement:
                    print('  Copying plan to fact {0}'.format(replacement.pk))
                    assert not replacement.get('plan'), (
                        'the replacing fact must be freshly imported')
                    d = Document.object(db, replacement.pk)
                    d['plan'] = plan.pk
                    if not args.dry_run:
                        d.save()
                        fact.delete()
                    deleted_cnt += 1
                else:
                    print('  Not deleting: fact references plan {0}'.format(plan))
            else:
                if not args.dry_run:
                    fact.delete()
                deleted_cnt += 1
        seen_cnt += 1

    print('Deleted {deleted_cnt} facts of {seen_cnt}.'.format(**locals()))
    if args.dry_run:
        print('(None was actually deleted in dry run mode.)')


# TODO
#@arg('since')
#@arg('until')
#@arg('timezone')
#def change_tz(args):
#    "Changes timezone of given events."
#    raise NotImplementedError

def _convert_date(date, tz_name):
    if date is None:
        # this really can happen, e.g. importing current activity = no end time
        return
    timezone = pytz.timezone(tz_name)
    local_date = timezone.localize(date)  # just add tzinfo
    utc_date = local_date.astimezone(pytz.utc)
    return pytz.utc.normalize(utc_date)

def _prepare_data(data, tz_name):
    assert data.id
    return dict(
        summary = unicode(data.activity),
        details = unicode(data.description or ''),  # can be None
        date_time = _convert_date(data.start_time, tz_name),
        date_time_end = _convert_date(data.end_time, tz_name),
        tags = [unicode(x) for x in data.tags],

        x_hamster_type = u'fact',
        x_hamster_id = int(data.id),
        x_hamster_category = unicode(data.category),
        x_hamster_activity_id = int(data.activity_id),
        #x_hamster_delta = data['delta'],  # a datetime.timedelta obj!

        x_hamster_orig_datetime = data.start_time,
        x_hamster_timezone = unicode(tz_name),
    )

def _import_fact(data, tz_name, dry_run=False, guess_tz=False):
    db = app.get_feature('document_storage').default_db
    assert data.id
    if db.find(HamsterFact, x_hamster_id=int(data.id)).count():
        return False
    if guess_tz:
        # TODO: peek at the next one, too; ask if different
        prevs = db.find(HamsterFact,
                       x_hamster_orig_datetime__lte=data.start_time
        ).order_by('x_hamster_orig_datetime', reverse=True)
        if prevs.count():
            tz_name = prevs[0].x_hamster_timezone
    prepared = _prepare_data(data, tz_name=tz_name)
    fact = HamsterFact(**prepared)
    if not dry_run:
        fact.save(db)
    print 'ADD', fact, tz_name
    return fact

def _update_fact(data, dry_run=False):
    db = app.get_feature('document_storage').default_db
    facts = db.find(HamsterFact, x_hamster_id=int(data.id))
    if not facts:
        print 'no fact with id', repr(data.id)
        return False
    assert 1 == len(facts)
    fact = facts[0]
    prepared = _prepare_data(data, fact.x_hamster_timezone)
    if prepared['tags'] == []:
        prepared['tags'] = None  # this is how the schema works
    for key in prepared:
        old_value = fact[key]
        if isinstance(old_value, datetime.datetime):
            # TODO: don't skip (messing with TZ-naive/aware datetimes)
            continue
#            if prepared[key].tzname():
#                old_value = pytz.utc.localize(old_value)
        if prepared[key] != old_value:
            print '---', fact
            print 'changed {0}: {1} → {2}'.format(
                key, repr(fact[key]), repr(prepared[key]))
            break
        #print 'EQ:', repr(prepared[key]), 'vs.', repr(fact[key])
    else:
        #print 'SAME', fact
        return False  # same data
    fact.update(**prepared)
    if not dry_run:
        fact.save()
    return fact