1. Jannis Leidel
  2. pep381client


oremj  committed a63957e

Filter out external links.

  • Participants
  • Parent commits 955cb5e
  • Branches default

Comments (0)

Files changed (1)

File pep381client/__init__.py

View file
 from __future__ import with_statement
 import cPickle, os, xmlrpclib, time, urllib2, httplib, socket
+import re
 from xml.etree import ElementTree
 import xml.parsers.expat
 import sqlite
         self.skip_file_contents = False
+        self.filter_external_hrefs = True
     def defaults(self):
         # Fill fields that may not exist in the pickle
         for field, value in (('quiet', False),):
         with open(os.path.join(self.homedir, "web", "last-modified"), "wb") as f:
             f.write(time.strftime("%Y%m%dT%H:%M:%S\n", time.gmtime(when)))
+    def filter_external(self, html):
+        if self.filter_external_hrefs:
+            html = re.sub('<a\s*href="(?!\.\./\.\./.*).+">.*</a>', '', html)
+        return html
     def copy_simple_page(self, project):
         project = project.encode('utf-8')
         h = http()
         if not os.path.exists(project_simple_dir):
         with open(os.path.join(project_simple_dir, 'index.html'), "wb") as f:
-            f.write(html)
+            f.write(self.filter_external(html))
         h.putrequest('GET', '/serversig/'+urllib2.quote(project)+'/')
         h.putheader('User-Agent', UA)