Source

noodle-bucket / mercurial_data / bitbucket / scrape.py

#!/usr/bin/env python
from django.core.management import setup_environ
import settings
setup_environ(settings)

import datetime, sys, urllib2
try:
    import simplejson as json
except ImportError:
    import json
from noodlebucket.mercurial_data.models import Changeset, Repository


# We'll refresh 5 repos at a time
repos = Repository.objects.order_by('-refreshed')[:5]

for repo in repos:
    repo_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/' % (repo.user_name, repo.repo_name)
    repo_data = json.loads(urllib2.urlopen(repo_url).read())
    repo.description = repo_data['description']
    repo.save()
    
    csets_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/changesets/' % (repo.user_name, repo.repo_name)
    csets_data = json.loads(urllib2.urlopen(csets_url).read())
    for cset in csets_data['changesets']:
        if Changeset.objects.filter(repo=repo, cset_hash=cset['node'], cset_parent__isnull=False).count() == 0:
            c = Changeset(
                repo=repo, cset_hash=cset['node'], cset_parent=None,
                cset_author=cset['author'], cset_date=None,
                cset_branch=cset['branch'], cset_message=cset['message']
            )
            cset_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/changesets/%s/' % (repo.user_name, repo.repo_name, cset['node'])
            cset_data = json.loads(urllib2.urlopen(cset_url).read())
            c.cset_date = datetime.datetime.strptime(cset_data['timestamp'], '%Y-%m-%d %H:%M:%S')
            print cset_data['timestamp'], c.cset_date