Source

noodle-bucket / mercurial_data / bitbucket / scrape.py

Full commit
#!/usr/bin/env python
from django.core.management import setup_environ
import settings
setup_environ(settings)

import datetime, sys, urllib2
try:
    import simplejson as json
except ImportError:
    import json
from noodlebucket.mercurial_data.models import Changeset, Repository


# We'll refresh 5 repos at a time
repos = Repository.objects.order_by('-refreshed')[:5]

for repo in repos:
    repo_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/' % (repo.user_name, repo.repo_name)
    repo_data = json.loads(urllib2.urlopen(repo_url).read())
    repo.description = repo_data['description']
    repo.save()
    
    csets_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/changesets/?limit=50' % (repo.user_name, repo.repo_name)
    csets_data = json.loads(urllib2.urlopen(csets_url).read())
    for cset in csets_data['changesets']:
        if Changeset.objects.filter(repo=repo, cset_hash=cset['node']).count() == 0:
            c = Changeset(
                repo=repo, cset_hash=cset['node'],
                cset_parent1=None, cset_parent2=None,
                cset_author=cset['author'], cset_date=None,
                cset_branch=cset['branch'], cset_message=cset['message']
            )
            cset_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/changesets/%s/' % (repo.user_name, repo.repo_name, cset['node'])
            cset_data = json.loads(urllib2.urlopen(cset_url).read())
            c.cset_date = datetime.datetime.strptime(cset_data['timestamp'], '%Y-%m-%d %H:%M:%S')
            c.cset_parent1 = cset_data['parents'][0]
            if len(cset_data['parents']) > 1:
                c.cset_parent2 = cset_data['parents'][1]
            c.save()