Commits

ewdurbin committed 86cc8d9

use xmlrpc2 for connection to PyPI XMLRPC service

The goal here is primarily to validate SSL certificates when
obtaining metadata for the sync via XMLRPC.

In order to facilitate security for mirrors, this commit also
removes the choice to use `http` schemas for syncing, and raises
a ValueError and appropriate Error in the log

Comments (0)

Files changed (7)

           'pytest-timeout',
           'pytest-cache',
           'requests',
-          'pdbpp'
+          'pdbpp',
+          'xmlrpc2'
           ],
       entry_points="""
             [console_scripts]

src/bandersnatch/default.conf

 
 ; The PyPI server which will be mirrored.
 ; master = https://testpypi.python.org
+; scheme for PyPI server MUST be https
 master = https://pypi.python.org
 
 ; The network socket timeout to use for all connections. This is set to a

src/bandersnatch/master.py

 from .utils import USER_AGENT
-import httplib
 import logging
 import requests
-import sys
-import xmlrpclib
-
+import xmlrpc2
 
 logger = logging.getLogger(__name__)
 
 
-def _get_connector(ssl=False):
-    if ssl:
-        try:
-            if sys.version_info < (2, 7):
-                httplib.HTTPS
-            else:
-                httplib.HTTPSConnection
-        except AttributeError:
-            raise NotImplementedError(
-                "your version of httplib doesn't support HTTPS")
-
-        if sys.version_info < (2, 7):
-            return httplib.HTTPS
-        else:
-            return httplib.HTTPSConnection
-
-    else:
-        if sys.version_info < (2, 7):
-            return httplib.HTTP
-        else:
-            return httplib.HTTPConnection
-
-
-class CustomTransport(xmlrpclib.Transport):
+class CustomTransport(xmlrpc2.client.HTTPSTransport):
     """This transport adds a custom user agent string and timeout handling."""
 
-    def __init__(self, ssl=False, timeout=10.0):
-        xmlrpclib.Transport.__init__(self)
+    def __init__(self, timeout=10.0):
+        xmlrpc2.client.HTTPSTransport.__init__(self)
         self.timeout = timeout
-        self.ssl = ssl
-        if not hasattr(self, '_connection'):
-            self._connection = None
+        self.session.headers.update({"User-Agent": USER_AGENT,
+                                     "Content-Type": "text/xml"})
 
-    def make_connection(self, host):
-        # Partially copied from xmlrpclib.py because its inheritance model is
-        # inconvenient.
-
-        # return an existing connection if possible.  This allows
-        # HTTP/1.1 keep-alive.
-        if self._connection and host == self._connection[0]:
-            return self._connection[1]
-
-        # create an HTTP connection object from a host descriptor
-        chost, self._extra_headers, x509 = self.get_host_info(host)
-        self._extra_headers = [('User-Agent', USER_AGENT)]
-
-        # store the host argument along with the connection object
-        self._connection = host, _get_connector(ssl=self.ssl)(
-            chost, None, **(x509 or {}))
-
-        return self._connection[1]
+    def request(self, uri, body):
+        resp = self.session.post(uri, body, timeout=self.timeout)
+        resp.raise_for_status()
+        return resp.content
 
 
 class StalePage(Exception):
 
     def __init__(self, url, timeout=10.0):
         self.url = url
+        if self.url.startswith('http://'):
+            logger.error("Master URL {0} is not https scheme".format(url))
+            raise ValueError("Master URL {0} is not https scheme".format(url))
         self.timeout = timeout
         self.session = requests.Session()
+        self.session.headers.update({'User-Agent': USER_AGENT})
 
     def get(self, path, required_serial, **kw):
         logger.debug('Getting {0} (serial {1})'.format(path, required_serial))
         if not path.startswith(self.url):
             path = self.url + path
-        headers = {'User-Agent': USER_AGENT}
-        r = self.session.get(path, timeout=self.timeout,
-                             headers=headers, **kw)
+        r = self.session.get(path, timeout=self.timeout, **kw)
         r.raise_for_status()
         # The PYPI-LAST-SERIAL header allows us to identify cached entries,
         # e.g. via the public CDN or private, transparent mirrors and avoid us
         return r
 
     def rpc(self):
-        # This is a function as a wrapper to make it thread-safe.
-        use_ssl = self.xmlrpc_url.startswith('https:')
-        t = CustomTransport(ssl=use_ssl, timeout=self.timeout)
-        return xmlrpclib.ServerProxy(self.xmlrpc_url, transport=t)
+        transports = [CustomTransport(timeout=self.timeout)]
+        return xmlrpc2.client.Client(uri=self.xmlrpc_url,
+                                     transports=transports)
 
     @property
     def xmlrpc_url(self):
         return packages
 
     def package_releases(self, package):
-        return self.rpc().package_releases(package, True)
+        return self.rpc().package_releases(unicode(package), True)
 
     def release_urls(self, package, version):
-        return self.rpc().release_urls(package, version)
+        return self.rpc().release_urls(unicode(package), version)

src/bandersnatch/tests/conftest.py

 @pytest.fixture
 def master(requests):
     from bandersnatch.master import Master
-    master = Master('http://pypi.example.com')
+    master = Master('https://pypi.example.com')
     master.rpc = mock.Mock()
     master.session = mock.Mock()
     master.session.get = requests
 @pytest.fixture
 def master_mock():
     master = mock.Mock()
-    master.url = 'http://pypi.example.com'
+    master.url = 'https://pypi.example.com'
     return master
 
 
         patcher.stop()
     request.addfinalizer(tearDown)
     return mirror
-
-
-@pytest.fixture
-def httplib(request):
-    to_patch = ['httplib.HTTPConnection', 'httplib.HTTPSConnection',
-                'httplib.HTTP', 'httplib.HTTPS']
-    mocks = {}
-    patchers = {}
-    for p in to_patch:
-        patchers[p] = mock.patch(p)
-        mocks[p] = patchers[p].start()
-
-    def tearDown():
-        for p in patchers.values():
-            p.stop()
-    request.addfinalizer(tearDown)
-    return mocks
-
-
-@pytest.fixture
-def no_https(request):
-    import httplib
-    httpsconn = httplib.HTTPSConnection
-    https = httplib.HTTPS
-    del httplib.HTTPSConnection
-    del httplib.HTTPS
-
-    def tearDown():
-        httplib.HTTPSConnection = httpsconn
-        httplib.HTTPS = https
-    request.addfinalizer(tearDown)

src/bandersnatch/tests/test_master.py

 from bandersnatch.master import Master, StalePage
+import xmlrpc2
 import pytest
-import sys
-import xmlrpclib
 
 
 def test_rpc_factory():
-    master = Master('http://pypi.example.com')
-    assert isinstance(master.rpc(), xmlrpclib.ServerProxy)
+    master = Master('https://pypi.example.com')
+    assert isinstance(master.rpc(), xmlrpc2.client.Client)
+
+
+def test_disallow_http():
+    with pytest.raises(ValueError):
+        Master('http://pypi.example.com')
 
 
 def test_rpc_url(master):
-    assert master.xmlrpc_url == 'http://pypi.example.com/pypi/'
+    assert master.xmlrpc_url == 'https://pypi.example.com/pypi/'
 
 
 def test_all_packages(master):
     master.release_urls('foobar', '0.1')
 
 
-def test_transport_reuses_connection():
-    from bandersnatch.master import CustomTransport
-    t = CustomTransport()
-    t._connection = ('localhost', 'existing-connection')
-    assert t.make_connection('localhost') == 'existing-connection'
-
-
-def test_transport_creates_new_http_connection(httplib):
-    from bandersnatch.master import CustomTransport
-    t = CustomTransport()
-    t.make_connection('localhost')
-    if sys.version_info < (2, 7):
-        assert (t.make_connection('localhost') is
-                httplib['httplib.HTTP']())
-    else:
-        assert (t.make_connection('localhost') is
-                httplib['httplib.HTTPConnection']())
-
-
-def test_transport_creates_new_https_connection(httplib):
-    from bandersnatch.master import CustomTransport
-    t = CustomTransport(ssl=True)
-    t.make_connection('localhost')
-    if sys.version_info < (2, 7):
-        assert (t.make_connection('localhost') is
-                httplib['httplib.HTTPS']())
-    else:
-        assert (t.make_connection('localhost') is
-                httplib['httplib.HTTPSConnection']())
-
-
-def test_transport_raises_on_missing_https_implementation(no_https):
-    from bandersnatch.master import CustomTransport
-    t = CustomTransport(ssl=True)
-    with pytest.raises(NotImplementedError):
-        t.make_connection('localhost')
-
-
 def test_master_raises_if_serial_too_small(master, requests):
     requests.prepare('foo', 1)
     with pytest.raises(StalePage):

src/bandersnatch/tests/test_mirror.py

     mirror.master.all_packages.return_value = {'foo': 1}
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     release_download = mock.Mock()
     mirror.master.all_packages.return_value = {'foo': 1}
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     release_download_stale = mock.Mock()
     mirror.master.all_packages.return_value = {'foo': 1}
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     release_download = mock.Mock()
     mirror.master.all_packages.return_value = {'foo': 1}
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     release_download = mock.Mock()

src/bandersnatch/tests/test_package.py

     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls = mock.Mock()
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     requests.prepare('the release content', 10)
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls = mock.Mock()
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/foo/bar/foo/foo.zip',
+        {'url': 'https://pypi.example.com/foo/bar/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     mirror.packages_to_sync = set(['foo'])
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls = mock.Mock()
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     requests.prepare('the release content', 10)
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls = mock.Mock()
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     requests.prepare('the release content', 10)
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls = mock.Mock()
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     requests.prepare('not release content', 10)
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls = mock.Mock()
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     requests.prepare('not release content', 9)
     mirror.master.package_releases.return_value = ['0.1']
     mirror.master.release_urls = mock.Mock()
     mirror.master.release_urls.return_value = [
-        {'url': 'http://pypi.example.com/packages/any/f/foo/foo.zip',
+        {'url': 'https://pypi.example.com/packages/any/f/foo/foo.zip',
          'md5_digest': 'b6bcb391b040c4468262706faf9d3cce'}]
 
     requests.prepare('not release content', 11)