Sebastian Rahlf avatar Sebastian Rahlf committed 5fc1260

Make ElementTree processors testable (which should be the final nail closing #28).

Comments (0)

Files changed (5)

amazonproduct/processors/__init__.py

 
 class BaseProcessor (object):
 
+    """
+    Skeleton class for processors.
+
+    If you like to implement your own result processing, subclass
+    :class:`BaseProcessor` and override the methods.
+    """
+
     def parse(self, fp):
-        raise NotImplementedError
+        """
+        Parses a file-like XML source returned from Amazon. This is the most
+        import method of this class!
+
+        :return: parsed XML node
+        """
+        raise NotImplementedError # pragma: no cover
 
     @classmethod
     def load_paginator(cls, paginator_type):
-        return None
+        """
+        Returns a result paginator for operations like ItemSearch.
+
+        :param paginator_type: will be one of :const:`ITEMS_PAGINATOR` or
+          :const:`RELATEDITEMS_PAGINATOR`.
+        :return: a subclass of :class:`BaseResultPaginator`
+        """
+        return None # pragma: no cover
 
     @classmethod
     def parse_cart(cls, node):
 
         Obviously, this has to be implemented in each subclass of
         :class:`BaseProcessor`.
+
+        :param node: parsed XML node (as returned by :meth:`parse`).
+        :return: a :class:`~amazonproduct.contrib.Cart` instance
         """
-        raise NotImplementedError
+        raise NotImplementedError # pragma: no cover
 
 
 class BaseResultPaginator (object):
 
     A result paginator has the following attributes:
 
-    ``pages``
+    ``pages`` (same as ``len(<paginator>)``)
         Number of *total* pages. This may differ from the number of pages
         actually iterated over because of limits either imposed by Amazon or
-        yourself (using ``limit`).
+        yourself (using ``limit``).
 
     ``results``
         Number of total results. This may differ from the number of results
 
     def __init__(self, fun, *args, **kwargs):
         """
-        :param limit: limit fetched pages to this amount (restricted to a
-        maximum of 10 pages by API itself).
+        :param fun: original API method which will be called repeatedly with
+        ``args`` and ``kwargs``.
         """
         self.fun = fun
         self.args, self.kwargs = args, kwargs
 
     def paginator_data(self, node):
         """
-        Extracts pagination data from XML node.
+        Extracts pagination data from XML node, i.e.
+
+        * current page
+        * total number of pages
+        * total number of results
+
+        .. note:: *Number of pages* and *number of results* which may differ
+           from the ones that Amazon is actually willing to return!
+
+        :return: ``(current page, total pages, total results)``
         """
-        raise NotImplementedError
+        raise NotImplementedError # pragma: no cover
 
     def iterate(self, node):
         """
         Returns iterable over XML item nodes.
         """
-        raise NotImplementedError
-
+        raise NotImplementedError # pragma: no cover

amazonproduct/processors/etree.py

         "Couldn't find any of the ElementTree implementations in %s!" % (
             list(modules), ))
 
-etree = load_elementtree_module()
 
 
 _nsreg = re.compile('^({.+?})')
 
 class Processor (BaseProcessor):
 
+    def __init__(self, *args, **kwargs):
+        # processor can be told which etree module to use in order to have
+        # multiple processors each using a different implementation 
+        etree_mod = kwargs.pop('module', None)
+        try:
+            if etree_mod:
+                self.etree = load_elementtree_module(etree_mod)
+            else:
+                self.etree = load_elementtree_module()
+        except (AttributeError, ImportError):
+            self.etree = None
+
     def parse(self, fp):
-        root = etree.parse(fp).getroot()
+        root = self.etree.parse(fp).getroot()
         ns = extract_nspace(root)
         errors = root.findall('.//%sError' % ns)
         for error in errors:
                 msg=error.findtext('./%sMessage' % ns), xml=root)
         return root
 
-    def __repr__(self):
+    def __repr__(self): # pragma: no cover
         return '<%s using %s at %s>' % (
-            self.__class__.__name__, etree.__name__, hex(id(self)))
+            self.__class__.__name__, getattr(self.etree, '__name__', '???'), hex(id(self)))
 
     @classmethod
     def parse_cart(cls, node):

docs/source/processors.rst

+
+.. _parsers:
+
+Result processing
+=================
+
+By default this module uses `lxml.objectify`_ to parse all XML responses it receives from Amazon.
+However, this will only work if ``lxml`` is actually installed.
+
+On some systems like Google App Engine lxml cannot be installed. Therefore there are a number of fallbacks which will be tried in the following order:
+
+* :class:`amazonproduct.processors.objectify.Processor`
+* :class:`amazonproduct.processors.etree.Processor`
+
+There is also a processor using ``minidom``.
+
+* :class:`amazonproduct.processors.minidom.Processor`
+
+
+.. note:: If you want to use your own parser have a look at :class:`amazonproduct.processors.BaseProcessor` and :class:`amazonproduct.processors.BaseResultPaginator`
+
+
+.. _lxml.objectify: http://lxml.de/objectify.html

tests/parser-performance.py

 import sys
 sys.path.insert(0, '..')
 
-from amazonproduct import API
-from amazonproduct import AWSError
-from config import AWS_KEY, SECRET_KEY
-
-# xml.minidom
-#
-def minidom_response_parser(fp):
-    root = xml.dom.minidom.parse(fp)
-    # parse errors
-    for error in root.getElementsByTagName('Error'):
-        code = error.getElementsByTagName('Code')[0].firstChild.nodeValue
-        msg = error.getElementsByTagName('Message')[0].firstChild.nodeValue
-        raise AWSError(code, msg)
-    return root
-
-# lxml.objectify
-#
-def objectify_response_parser(fp):
-    root = lxml.objectify.parse(fp).getroot()
-    nspace = root.nsmap.get(None, '')
-    errors = root.xpath('//aws:Request/aws:Errors/aws:Error', 
-                        namespaces={'aws' : nspace})
-    for error in errors:
-        raise AWSError(error.Code.text, error.Message.text)
-    return root
-
-# lxml.etree
-#
-def etree_response_parser(fp):
-    root = lxml.etree.parse(fp).getroot()
-    error = root.find('Error')
-    if error is not None:
-        raise AWSError(error.Code.text, error.Message.text)
-    return root
+from amazonproduct import API, AWSError
+from amazonproduct.processors import objectify, etree, minidom
 
 if __name__ == '__main__':
 
     RUNS = 10
 
     custom_parsers = {
-        'lxml.objectify' : objectify_response_parser, 
-        'lxml.etree' : etree_response_parser, 
-        'minidom' : minidom_response_parser, 
+        'lxml.objectify': objectify.Processor(), 
+        'lxml.etree': etree.Processor(module='lxml.etree'), 
+        'xml.etree.cElementTree': etree.Processor(module='xml.etree.cElementTree'),
+        'xml.etree.ElementTree': etree.Processor(module='xml.etree.ElementTree'),
+        'cElementTree': etree.Processor(module='cElementTree'),
+        'elementtree.ElementTree': etree.Processor(module='elementtree.ElementTree'),
+        'minidom': minidom.Processor(), 
     }
 
     print "Collecting test files..."
     print "Parsing %i XML files..." % (len(xml_files)*RUNS, )
     for label, parser in custom_parsers.items():
         print label, 
+        if getattr(parser, 'etree', '') is None:
+            print 'not installed!'
+            continue
         start = time.clock()
-        api = API(AWS_KEY, SECRET_KEY, 'de', processor=parser)
+        api = API(locale='de', processor=parser)
         for i in range(RUNS):
             for path in xml_files:
                 try:

tests/test_xml_responses.py

 import re
 import urllib2
 
-from tests import utils
+from tests import utils, ELEMENTTREE_IMPLEMENTATIONS
 from tests import XML_TEST_DIR
 from tests import TESTABLE_API_VERSIONS, TESTABLE_LOCALES, TESTABLE_PROCESSORS
 
     if 'api' in metafunc.funcargnames:
         processors = getattr(metafunc.function, 'processors',
             getattr(metafunc.cls, 'processors', TESTABLE_PROCESSORS))
+        # replace etree with all known implementations
+        if 'etree' in processors:
+            processors.extend(ELEMENTTREE_IMPLEMENTATIONS)
+            processors = set(processors)
+            processors.remove('etree')
         # if --processor is used get intersecting values
         if metafunc.config.option.processors:
             is_specified = lambda x: x in metafunc.config.option.processors
     locale = request.param['locale']
     version = request.param['version']
     xml_response = request.param['xml_response']
-    processor = TESTABLE_PROCESSORS[request.param['processor']]()
+
+    processor = TESTABLE_PROCESSORS[request.param['processor']]
+    if isinstance(processor, type):
+        processor = processor()
 
     api = API(locale=locale, processor=processor)
     api.VERSION = version
     Check that each requested API version is also really used.
     """
 
-    processors = ['objectify', 'etree']
+    processors = ['objectify']
 
     def test_correct_version(self, api):
         # any operation will do here
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.