Commits

Luke Plant committed 570028e

Use a StructureItem class instead of a 5 tuple (which was getting unwieldy)

Comments (0)

Files changed (3)

semanticeditor/utils/datastructures.py

+"""
+Use the 'struct' function (see below), or use the __metaclass__
+attribute with class definition syntax.
+
+>>> class Animal(object):
+...    __metaclass__ = struct
+...    name = ""
+...    weight = 0
+...    can_fly = False
+>>> Animal(name="Pig")
+<Animal can_fly=False name='Pig' weight=0>
+
+Subclass to add more attributes or override
+existing ones.  Multiple bases can be used.
+
+>>> class Bird(Animal):
+...    __metaclass__ = struct
+...    can_fly = True
+...    sings = None
+>>> Bird(sings=True)
+<Bird can_fly=True name='' sings=True weight=0>
+
+You can subclass to add methods like this:
+
+>>> class SelfDescribingBird(Bird):
+...     def describe(self):
+...         if self.sings:
+...             print "I can sing"
+>>> SelfDescribingBird(sings=True).describe()
+I can sing
+
+"""
+
+from semanticeditor.utils.mixins import StandardReprMixin
+
+
+def struct(name, bases, attrdict):
+    """
+    Returns a class that can be used as a simple struct, with default
+    attributes defined in 'attrdict'.
+
+    >>> Person = struct("Person", (), dict(age=0, name=""))
+    >>> Person
+    <class '__main__.Person'>
+    >>> Person()
+    <Person age=0 name=''>
+    >>> Person(name="Joe")
+    <Person age=0 name='Joe'>
+    >>> Person(name="Fred", age=1)
+    <Person age=1 name='Fred'>
+    >>> Person(name="Fred", value=0, stupidity=1000)
+    Traceback (most recent call last):
+        ...
+    TypeError: Person does not have attribute(s): stupidity, value
+
+    >>> GenderedObject = struct("GenderedObject", (), dict(sex=None))
+
+    If the default attribute is mutable, use a callable (e.g. a
+    lambda, a class or a factory function) to provide it, otherwise it
+    will be shared between instances.  Use the bases argument to
+    inherit attribute definitions from other structs.
+
+    >>> Parent = struct("Parent", (Person, GenderedObject), dict(children=list))
+    >>> Parent(name="Fred", sex="male")
+    <Parent age=0 children=[] name='Fred' sex='male'>
+    """
+    # In case '__metaclass__ = struct' is set directly, need these:
+    attrdict.pop('__metaclass__', None)
+    attrdict.pop('__module__', None)
+
+    # Find allowed attributes of all bases
+    attrs = {}
+    for d in filter(lambda x: x is not None,
+                    (getattr(b, '_allowed_attrs', None) for b in bases)):
+        attrs.update(d)
+    attrs.update(attrdict)
+
+    def __init__(self, **kwargs):
+        for k, v in attrs.items():
+            try:
+                val = kwargs.pop(k)
+            except KeyError, e:
+                val = v
+                if callable(val):
+                    val = val()
+            setattr(self, k, val)
+        if kwargs:
+            raise TypeError("%s does not have attribute(s): %s" % \
+                                (name, ", ".join(sorted(kwargs.keys()))))
+
+    # we have no need for 'real' bases, and they only confuse things:
+    bases = (StandardReprMixin,)
+    return type(name, bases, {'__init__': __init__,
+                              '_allowed_attrs': attrs})
+
+# We could also define a 'Struct' class which has '__metaclass__ = StructBase
+# Defining StructBase, however, is more tricky, and can't resuse
+# the existing 'struct' function. (You end up needing to do:
+#   s = struct(...)
+#   s.__class__ = struct
+# which does not work.)
+# It also makes it harder to define normal subclasses that are just
+# using the __init__ functionality provided by the 'struct' type.
+
+
+def _test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == "__main__":
+    _test()

semanticeditor/utils/mixins.py

+# -*- coding: utf-8 -*-
+
+class StandardReprMixin(object):
+    u"""
+    Used to add an implementation of '__repr__' that is generally 
+    useful for debugging.
+
+    >>> class Foo(StandardReprMixin):
+    ...     def __init__(self):
+    ...         self.name = "blé"
+    >>>
+    >>> f = Foo()
+    >>> f.frobble = 1
+    >>> print f
+    <Foo frobble=1 name='bl\\xc3\\xa9'>
+    """
+    def __repr__(self):
+        return u"<%s %s>" % (self.__class__.__name__,
+                             u' '.join(u"%s=%r" % (k,v) for (k,v) in sorted(self.__dict__.iteritems())))
+
+def _test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == "__main__":
+    _test()

semanticeditor/utils/presentation.py

 
 from elementtree import ElementTree as ET
 from semanticeditor.utils.etree import cleanup, flatten, get_parent, get_index, wrap_elements_in_tag, indent
+from semanticeditor.utils.datastructures import struct
 from xml.parsers import expat
 import re
 
     # TODO - make configurable
     return LayoutDetails()
 
+
+class StructureItem(object):
+    __metaclass__ = struct
+    level = 0     #    level is the 'outline level' in the document i.e. an integer
+    sect_id = ''  #    sect_id is a unique ID used for storing presentation information against
+    name = ''     #    name is a user presentable name for the section
+    tag = ''      #    tag is the HTML element e.g. H1
+    node = None   #    node is the ElementTree node
+
+
 def get_structure(root, assert_structure=False):
     """
-    Return the structure nodes, as (level, sect_id, name, tag, node) tuples
-
-    level is the 'outline level' in the document i.e. an integer
-    sect_id is a unique ID used for storing presentation information against
-    name is a user presentable name for the section
-    tag is the HTML element e.g. H1
-    node is the ElementTree node
+    Return the structure nodes, as a list of StructureItems
     """
     retval = []
     names = set()
             names.add(name)
             # Level is adjusted so that e.g. H3 is level 1, if it is
             # the first to appear in the document.
-            retval.append((level - first_heading_level + 1, sect_id, name, n.tag.upper(), n))
+            retval.append(StructureItem(level=level - first_heading_level + 1,
+                                        sect_id=sect_id,
+                                        name=name,
+                                        tag=n.tag.upper(),
+                                        node=n))
 
     return retval
 
     # Parse
     tree = parse(content)
     structure = get_structure(tree, assert_structure=True)
-    return [(l,name,tag) for (l,sect_id,name,tag,node) in structure]
+    return [(s.level, s.name, s.tag) for s in structure]
 
 # == Formatting HTML ==
 #
     layout_strategy = get_layout_details_strategy()
     root = parse(html)
     structure = get_structure(root, assert_structure=True)
-    sect_ids = [sect_id for (level, sect_id, name, tag, node) in structure]
+    sect_ids = [s.sect_id for s in structure]
     styleinfo = _sanitise_styleinfo(styleinfo, sect_ids)
 
     # Strip existing divs, otherwise we cannot format properly.  If
     _assert_sane_sections(root, structure)
 
     section_nodes = {}
-    headers = [(level,sect_id,tag,h) for (level,sect_id,name,tag,h) in structure
-               if tag.lower() in headingdef]
+    headers = [si for si in structure
+               if si.tag.lower() in headingdef]
 
     # Cut the HTML up into sections
 
     # as headers always produce nested structures, and the
     # indexes passed to wrap_elements_in_tag don't need
     # adjusting for the changes we have made.
-    for idx, (level, sect_id, tag, node) in enumerate(headers):
+    for idx, si in enumerate(headers):
         # We can no longer assume that parent = root, because the divs
         # we insert will change that.  However, the divs we insert
         # will keep sub-section headings on the same level.
-        parent = get_parent(root, node)
+        parent = get_parent(root, si.node)
 
-        thisidx = get_index(parent, node)
+        thisidx = get_index(parent, si.node)
         first_elem = thisidx
 
         # if a heading, then the 'scope' of each section is from
         # heading node to before the next heading with a level the
         # same or higher
-        nextnodes = [(l,n) for (l,_sect_id,t,n) in headers[idx+1:] if l <= level]
+        nextnodes = [(si2.level, si2.node) for si2 in headers[idx+1:] if si2.level <= si.level]
         if not nextnodes:
             # scope extends to end
             # Bug in elementtree - throws AssertionError if we try
                 last_elem = len(parent)
 
         newdiv = wrap_elements_in_tag(parent, first_elem, last_elem, "div")
-        section_nodes[sect_id] = newdiv
+        section_nodes[si.sect_id] = newdiv
 
     # Now deal with everything else
-    for idx, (level, sect_id, name, tag, node) in enumerate(structure):
-        if tag.lower() not in headingdef:
+    for idx, si in enumerate(structure):
+        if si.tag.lower() not in headingdef:
             # Normal block level - these simply get a div that wraps
             # them.
-            parent = get_parent(root, node)
-            thisidx = get_index(parent, node)
+            parent = get_parent(root, si.node)
+            thisidx = get_index(parent, si.node)
             newdiv = wrap_elements_in_tag(parent, thisidx, thisidx + 1, "div")
-            section_nodes[sect_id] = newdiv
+            section_nodes[si.sect_id] = newdiv
 
     # Apply normal CSS classes.
     for sect_id, newdiv in section_nodes.items():
     # First, all h1, h2 etc tags will be children of the root.
     # remove_tag should have ensured that, otherwise we will be unable
     # to cut the HTML into sections.
-    for level, sect_id, name, tag, node in structure:
-        parent = get_parent(root, node)
-        if tag.lower() in headingdef and parent is not root:
+    for si in structure:
+        parent = get_parent(root, si.node)
+        if si.tag.lower() in headingdef and parent is not root:
             raise BadStructure("Section heading \"%(name)s\" is not at the top level of "
                                "the document. This interferes with the ability to "
                                "format the sections and apply columns. "
                                "Please move the heading out of the '%(element)s'"
-                               " element that contains it." % dict(name=name, element=parent.tag.upper()))
+                               " element that contains it." % dict(name=si.name, element=parent.tag.upper()))
 
 def _apply_commands(root, section_nodes, styleinfo, structure, layout_strategy=None):
     # Rules:
     #  - No columns allowed if newrow has not been started.
 
     # 'structure' has the sections in document order
-    sections = [(level, sect_id, section_nodes[sect_id])
-                for level, sect_id, name, tag, n in structure]
+    sections = [(si.level, si.sect_id, section_nodes[si.sect_id])
+                for si in structure]
 
     # Inverted dict
     known_nodes = _invert_dict(section_nodes)
 
     # Preprocess:
     #  - insert 'newcolumn' on everything that has 'newrow'
-    for level, sect_id, name, tag, hn in structure:
-        if NEWROW in styleinfo[sect_id]:
-            styleinfo[sect_id].add(NEWCOL)
+    for si in structure:
+        if NEWROW in styleinfo[si.sect_id]:
+            styleinfo[si.sect_id].add(NEWCOL)
 
     _add_rows_and_columns(root, known_nodes, styleinfo, layout_strategy=layout_strategy)
     # Due to HTML/CSS quirks, we add an empty <div
     root = parse(html)
     structure = get_structure(root)
     pres = {}
-    for level, sect_id, name, tag, node in structure:
-        pres[sect_id] = set()
-        section_node = get_parent(root, node)
+    for si in structure:
+        pres[si.sect_id] = set()
+        section_node = get_parent(root, si.node)
         if section_node is None or section_node.tag != 'div':
             # Not in standard format, we can't say anything about it
             continue
 
         # Section - extract classes
         for c in _get_classes_for_node(section_node):
-            pres[sect_id].add(PresentationClass(c))
+            pres[si.sect_id].add(PresentationClass(c))
 
         # Parent/grandparent of section - newcol/newrow
         p = get_parent(root, section_node)
             if get_index(p, section_node) == 0:
                 classes = _get_classes_for_node(p)
                 if layout_strategy.is_column_class(classes):
-                    pres[sect_id].add(NEWCOL)
+                    pres[si.sect_id].add(NEWCOL)
                 gp = get_parent(root, p)
                 if gp is not None and gp.tag == 'div':
                     if layout_strategy.is_row_class(_get_classes_for_node(gp)) \
                             and get_index(gp, p) == 0:
-                        pres[sect_id].add(NEWROW)
-                        pres[sect_id].discard(NEWCOL) # for tidiness, not technically necessary
+                        pres[si.sect_id].add(NEWROW)
+                        pres[si.sect_id].discard(NEWCOL) # for tidiness, not technically necessary
 
     _strip_presentation(root)
     out_html = _html_extract(root)