Commits

Brad Montgomery committed 6aaddba

added methods to pull contributor data from changesets

Comments (0)

Files changed (1)

 https://github.com/dustin/py-github
 
 """
-from stripperparser import strip_tags
+import datetime, hashlib, re, time
+from functools import wraps
 from urllib2 import Request, urlopen, URLError
 from urllib import urlencode
-from functools import wraps
-import datetime
-import time
+
+from stripperparser import strip_tags
 
 try:
     import json
         url = self.base_url + 'changesets/%s/' % (revision)
         return json.loads(self.bb.load_url(url))
 
-    def changesets(self, limit=None):
+    def changesets(self, limit=None, start=None):
         """Get information about changesets on a repository."""
         url = self.base_url + 'changesets/'
-        query = smart_encode(limit=limit)
+        query = smart_encode(limit=limit, start=start)
         if query: url += '?%s' % query
         return json.loads(self.bb.load_url(url, quiet=True))
 
+    def parse_changeset_for_author(self, cset):
+        """ try to pull username, full name, email from changeset info, return a dict """
+        author = cset.get('author', '')
+        raw_author = cset.get('raw_author', '')
+        full_name = email = ''
+        try: # parse email from the raw_author field
+            full_name, email = re.search("([^<]*)(<.+>)?", raw_author).groups()  # leaves email as <name@example.com>
+            if email:
+                email = re.sub('[<|>]', '', email)
+            elif email is None:
+                email = ''
+        except AttributeError:
+            pass
+        
+        return {
+            'name':full_name or '', 
+            'author':author, 
+            'email':email,
+            'gravatar_id': hashlib.md5(email.lower()).hexdigest()
+        }
+
     def tags(self):
         """Get a list of tags for a repository."""
         url = self.base_url + 'tags/'
             # look for a string like: "bkmontgomery / python-bitbucket (fork of jmoiron / python-bitbucket)"
             search_string = u"%s / %s (fork of" % (self.username, self.slug)
             return self.scraped_content.decode("utf8").find(search_string) > 0
+    
+    def contributors(self, limit=50):
+        """
+        NOTE: This method is not very nice to the bitbucket API. 
+            
+        Use Sparingly.
+
+        Get contributors for a repo, and attempt to provide info similar to 
+        Github's ``contributors``. To do this, we look for data in all of a 
+        project's changesets, and ``limit`` controls how many of those we 
+        query at once.
+        
+        This method returns a dictionary of the form:
+
+            {
+                "contributors": [
+                    {
+                        "name": "Brad Montgomery",
+                        "gravatar_id": "d57aec10399cbb252bd890c2bb3fe1c9",
+                        "contributions": 123,
+                        "login": "bkmontgomery",
+                        "email": "brad@example.com"
+                    },
+                ]
+            }
+
+        """
+        contributors = {} # user data, keyed by author/username
+        def _update_contributors(cset, contributors):
+            author_data = self.parse_changeset_for_author(cset)
+            author = author_data.get('author', '')
+            
+            if author and author in contributors.keys():
+                contributors[author]['contributions'] += 1
+            elif author:
+                contributors[author] = {
+                    'name':author_data.get('name', ''),
+                    'login':author,
+                    'email':author_data.get('email', ''),
+                    'gravatar_id':author_data.get('gravatar_id', ''),
+                    'contributions':1,
+                }
+
+        # Do this for every "page" of the changsets... 
+        start = None
+        remaining_changesets = 1
+        retrieved_changesets = 0
+        while remaining_changesets > 0:
+            changesets = self.changesets(limit=limit, start=start)  # initial group of changesets
+            for cset in changesets.get('changesets', []):
+                _update_contributors(cset, contributors)
+            retrieved_changesets += len(changesets.get('changesets', []))
+            start = retrieved_changesets + (changesets.get('start', None) or 0)
+            remaining_changesets = changesets['count'] - retrieved_changesets
+            
+        #TODO sort based on contributions, like github does?
+        for k,v in contributors.items():
+            v.update({'name':k})
+        return {'contributors': contributors.values()}
 
     def __repr__(self):
         return '<Repository: %s\'s %s>' % (self.username, self.slug)