Source

noodle-bucket / mercurial_data / bitbucket / scrape.py

#!/usr/bin/env python
from django.core.management import setup_environ
import settings
setup_environ(settings)

import sys, urllib2
try:
    import simplejson as json
except ImportError:
    import json
from noodlebucket.mercurial_data.models import Changeset, Repository


# We'll refresh 5 repos at a time
repos = Repository.objects.order_by('-refreshed')[:5]

for repo in repos:
    repo_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/' % (repo.user_name, repo.repo_name)
    repo_data = json.loads(urllib2.urlopen(repo_url).read())
    repo.description = repo_data['description']
    repo.save()
    
    cset_url = 'http://api.bitbucket.org/1.0/repositories/%s/%s/changesets/' % (repo.user_name, repo.repo_name)
    cset_data = json.loads(urllib2.urlopen(cset_url).read())
    for cset in cset_data['changesets']:
        if Changeset.objects.filter(repo=repo, cset_hash=cset['node']).count() == 0:
            c = Changeset(repo=repo, cset_hash=cset['node'], cset_parent=None,
                cset_author=None, cset_date=None, cset_message=cset['message'])
            print cset['author']