Commits

Brad Montgomery committed 9f3259e Draft

no more scraping

Comments (0)

Files changed (1)

         self.bb = bb
         self.username = username
         self.slug = slug
-        self.base_url = api_base + 'repositories/%s/%s/' % (self.username, self.slug)
-        self.scraped_content = '' # *cringe*
+        self.base_url = api_base + 'repositories/{0}/{1}/'.format(
+            self.username,
+            self.slug
+        )
+        self._data = None  # cache of repo data from the api
 
     def get(self):
-        return self.bb._loads(self.base_url)
+        if self._data is None:
+            self._data = self.bb._loads(self.base_url)
+        return self._data
 
     def changeset(self, revision):
         """Get one changeset from a repos."""
         url = self.base_url + 'followers/'
         return self.bb._loads(url)
     
-    def _scrape(self):
-        """
-        The bitbucket API doesn't readily provide some interesting info
-        that can be retreived from a repo's page. So, we scrape it.
-        Yes, this feels dirty :(
-        """
-        url = 'https://bitbucket.org/{0}/{1}'.format(self.username, self.slug)
-        response = urlopen(Request(url))
-        if response.code == 200:
-            self.scraped_content = response.read().lower()
-        else:   
-            self.scraped_content = '' 
-        response.close()
-
     def forks(self):
-        """ 
-        This is a dirty hack to get the number of Forks/Queues by scraping
-        the project page. If the scraping fails, this just returns None.
-
-        Number of forks is in content that looks like this:
-            
-            <a href="#forks">
-                <span class="value">1</span>
-                Fork
-            </a>
-
-        """
-        if not self.scraped_content:
-            self._scrape()
-        if self.scraped_content:
-            start = self.scraped_content.find('<a href="#forks">')
-            if start > 0:
-                end = start + self.scraped_content[start:].find("</span>")
-                num_forks = strip_tags(self.scraped_content[start:end].strip())
-                return int(num_forks)
-        return None
+        """Number of times this Repo has been forked."""
+        data = self.get()
+        return data.get("forks_count", 0)
     
     def fork(self):
-        """
-        Returns True if this Repository is a Fork, False otherwise.
-        NOTE: You *could* get this through the Events API (http://confluence.atlassian.com/display/BBDEV/Events)
-        which defaults to 25 events. Info about the fork event could be buried under tons of commit events, so
-        for now, this scrapes for the information rather than querying the api unnecessarily.
-        """
-        if not self.scraped_content:
-            self._scrape()
-        if self.scraped_content:
-            # look for a string like: "bkmontgomery / python-bitbucket (fork of jmoiron / python-bitbucket)"
-            search_string = u"%s / %s (fork of" % (self.username, self.slug)
-            return self.scraped_content.decode("utf8").find(search_string) > 0
+        """Is this a fork of another Repo? Returns True or False."""
+        data = self.get()
+        return data.get("is_fork", False)
     
     def contributors(self, limit=50, sleep_after=None, sleep_for=None):
         """