Commits

Lynn Rees  committed 6179c0b

- more polishing

  • Participants
  • Parent commits f79260f

Comments (0)

Files changed (1)

     def _webwalk(self):
         '''Yields good URLs from under a base URL'''
         # Assignments
-        webwalk, cache = self._webwalk, self._cache
-        width, urlresolve = self.width, self._urlresolve
+        cache, urlresolve = self._cache, self._urlresolve
         # End processing if cache is empty
         while cache:
             # Fetch item from cache
         width -- amount of resources to crawl (default: 200)
         depth -- depth in hierarchy to crawl (default: 5)'''
         # Assignments
-        self._visited, self._good, self._cache, self.badurls = {}, {}, {}, [] 
-        self.redirs, self.outside, self.badhtml, self.unhttp = {}, {}, {}, {}        
+        self._visited, self._good, self._cache, self.badurls = {}, {}, {}, []
+        self.redirs, self.outside, self.badhtml, self.unhttp = {}, {}, {}, {}
         webwalk, good, self._robot = self._webwalk, self._good, self._rparser()
         uparse = self._uparse 
         # Use global base if present
             self.base, self._sb, good[base] = base, base.split('/'), 1
         # If URL is bad, abort and raise error
         else:
-            raise IOError, "URL does not exist."
-            return False
+            raise IOError, "URL is invalid"
         # Assign width
         if self.width and width == 200: width = self.width
         # Adjust dept to length of base URL