Commits

Luke Plant committed f2dd044

Use a machine friendly ID instead of user friendly 'name' for storing style information against

This will allow style information to stay with sections more easily
if we label sections with these ids when extracting the presentation.

Comments (0)

Files changed (2)

semanticeditor/tests.py

     def test_add_css_classes(self):
         html = "<h1>Hello <em>you</em></h1><h2>Hi</h2>"
         outh = "<div class=\"myclass\"><h1>Hello <em>you</em></h1><div class=\"c1 c2\"><h2>Hi</h2></div></div>"
-        self.assertEqual(outh, format_html(html, {'Hello you':[PC('myclass')],
-                                                  'Hi':[PC('c1'), PC('c2')]}))
+        self.assertEqual(outh, format_html(html, {'h1_1':[PC('myclass')],
+                                                  'h2_1':[PC('c1'), PC('c2')]}))
 
     def test_sanity_check_sections(self):
         html = "<h1>Hello</h1><blockquote><h2>Hi</h2></blockquote>"
     def test_columns_1(self):
         html = "<h1>1</h1><p>para 1</p><h1>2</h1><h1>3</h1>"
         outh = "<div class=\"row columns2\"><div class=\"column firstcolumn\"><div><h1>1</h1><div><p>para 1</p></div></div></div><div class=\"column lastcolumn\"><div><h1>2</h1></div><div><h1>3</h1></div></div></div>"
-        self.assertEqual(outh, format_html(html, {'1':[NEWROW],
-                                                  '2':[NEWCOL]}))
+        self.assertEqual(outh, format_html(html, {'h1_1':[NEWROW],
+                                                  'h1_2':[NEWCOL]}))
 
     def test_max_cols(self):
         html = "<h1>1</h1><h1>2</h1><h1>3</h1><h1>4</h1><h1>5</h1>"
-        self.assertRaises(TooManyColumns, format_html, html, {'1':[NEWROW],
-                                                              '2':[NEWCOL],
-                                                              '3':[NEWCOL],
-                                                              '4':[NEWCOL],
-                                                              '5':[NEWCOL]
+        self.assertRaises(TooManyColumns, format_html, html, {'h1_1':[NEWROW],
+                                                              'h1_2':[NEWCOL],
+                                                              'h1_3':[NEWCOL],
+                                                              'h1_4':[NEWCOL],
+                                                              'h1_5':[NEWCOL]
                                                             })
 
     def test_creates_section_divs_2(self):
             "<div><h1>4</h1></div>" \
             "</div>" \
             "</div>"
-        self.assertEqual(outh, format_html(html, {'2.1':[NEWROW],
-                                                  '2.2':[NEWCOL],
-                                                  '2.3':[NEWROW],
-                                                  '2.4':[NEWCOL],
-                                                  '3':[NEWROW],
-                                                  '4':[NEWCOL],
+        self.assertEqual(outh, format_html(html, {'h2_1':[NEWROW],
+                                                  'h2_2':[NEWCOL],
+                                                  'h2_3':[NEWROW],
+                                                  'h2_4':[NEWCOL],
+                                                  'h1_3':[NEWROW],
+                                                  'h1_4':[NEWCOL],
                                                   }))
 
     def test_columns_3(self):
             "<p>P3</p>" \
             "<h1>4</h1>"
         pres = {
-            '1':[NEWROW],
-            '2':[NEWCOL],
-            '3':[NEWROW],
-            'P1...':[NEWROW],
-            'P2...':[NEWCOL],
-            'P3...':[NEWCOL],
-            '4':[NEWROW],
+            'h1_1':[NEWROW],
+            'h1_2':[NEWCOL],
+            'h1_3':[NEWROW],
+            'p_1':[NEWROW],
+            'p_2':[NEWCOL],
+            'p_3':[NEWCOL],
+            'h1_4':[NEWROW],
             }
 
         outh = \
 
         # Check that if we add NEWCOL to '4', we get BadStructure
         pres_bad1 = pres.copy()
-        pres_bad1.update({'4':[NEWCOL]})
+        pres_bad1.update({'h1_4':[NEWCOL]})
         self.assertRaises(BadStructure, format_html, html, pres_bad1)
 
 
     def test_columns_missing_newrow(self):
         html = "<h1>1</h1><p>para 1</p><h1>2</h1><h1>3</h1>"
-        self.assertRaises(BadStructure, format_html, html, {'2':[NEWCOL]})
+        self.assertRaises(BadStructure, format_html, html, {'h1_2':[NEWCOL]})
 
     def test_columns_nested_newcols(self):
         """
         will generate an error
         """
         html = "<h1>1</h1><h1>2</h1><h2>1.1</h2><h1>3</h1>"
-        self.assertRaises(BadStructure, format_html, html, {'1':[NEWROW],
-                                                            '2':[NEWCOL],
-                                                            '1.1':[NEWCOL]})
+        self.assertRaises(BadStructure, format_html, html, {'h1_1':[NEWROW],
+                                                            'h1_2':[NEWCOL],
+                                                            'h2_1':[NEWCOL]})
     def test_columns_nested_newrow(self):
         """
         Check that attempting to add new row at a different level
         will generate an error
         """
         html = "<h1>1</h1><h1>2</h1><h2>1.1</h2>"
-        self.assertRaises(BadStructure, format_html, html, {'1':[NEWROW],
-                                                            '2':[NEWCOL],
-                                                            '1.1':[NEWROW]})
+        self.assertRaises(BadStructure, format_html, html, {'h1_1':[NEWROW],
+                                                            'h1_2':[NEWCOL],
+                                                            'h2_1':[NEWROW]})
 
 
 class TestElementTreeUtils(TestCase):
     def test_extract_presentation(self):
         html = "<div class=\"foo\"><h1>Heading 1</h1><div class=\"bar baz\"><h2>Heading 2</h2><div class=\"whatsit\"><p>Some paragraph</p></div></div></div>"
         pres, html2 = extract_presentation(html)
-        self.assertEqual({'Heading 1':set([PC('foo')]),
-                          'Heading 2':set([PC('bar'), PC('baz')]),
-                          'Some paragraph...':set([PC('whatsit')]),
+        self.assertEqual({'h1_1':set([PC('foo')]),
+                          'h2_1':set([PC('bar'), PC('baz')]),
+                          'p_1':set([PC('whatsit')]),
                           }, pres)
         self.assertEqual("<h1>Heading 1</h1><h2>Heading 2</h2><p>Some paragraph</p>", html2)
 
             "<h1>3</h1>" \
             "<h1>4</h1>"
 
-        presentation = {'1':set([PC('myclass1')]),
-                        '2':set([]),
-                        '2.1':set([NEWROW]),
-                        '2.2':set([NEWCOL]),
-                        '2.3':set([NEWROW]),
-                        '2.4':set([NEWCOL, PC('myclass2')]),
-                        '3':set([NEWROW]),
-                        '4':set([NEWCOL]),
+        presentation = {'h1_1':set([PC('myclass1')]),
+                        'h1_2':set([]),
+                        'h2_1':set([NEWROW]),
+                        'h2_2':set([NEWCOL]),
+                        'h2_3':set([NEWROW]),
+                        'h2_4':set([NEWCOL, PC('myclass2')]),
+                        'h1_3':set([NEWROW]),
+                        'h1_4':set([NEWCOL]),
                         }
         combined = format_html(html, presentation)
         pres2, html2 = extract_presentation(combined)
         html = """
 <div class="row columns3"><div class="column firstcolumn"><div><h1>Hello Jane</h1><div><p>Some fancy content, entered using WYMeditor</p></div><div><p>Another paragraph</p></div><div><p>Hello</p></div></div></div><div class="column"><div><h1>Another &lt;heading&gt;</h1><div><h2>this is a test</h2></div><div><h2>hello1</h2><div><h3>hello2</h3></div><div><h3>hello3</h3></div><div><h3>hello4</h3></div></div></div></div><div class="column lastcolumn"><div><h1>hello5</h1><div><h2>hello6</h2><p>asdasd</p><p>asdxx</p></div></div></div></div>
 """
-        pres = {'Hello Jane':set([NEWROW]),
-                'Some fancy content, ...': set(),
-                'Another paragraph...': set(),
-                'Hello...': set(),
-                'Another <heading>':set([NEWCOL]),
-                'this is a test':set(),
-                'hello1':set(),
-                'hello2':set(),
-                'hello3':set(),
-                'hello4':set(),
-                'hello5':set([NEWCOL]),
-                'hello6':set(),
-                'asdasd...': set(),
-                'asdxx...': set(),
+        pres = {'h1_1':set([NEWROW]),
+                'p_1': set(),
+                'p_2': set(),
+                'p_3': set(),
+                'h1_2':set([NEWCOL]),
+                'h2_1':set(),
+                'h2_2':set(),
+                'h3_1':set(),
+                'h3_2':set(),
+                'h3_3':set(),
+                'h1_3':set([NEWCOL]),
+                'h2_3':set(),
+                'p_4': set(),
+                'p_5': set(),
                 }
 
         pres2, html2 = extract_presentation(html)
         html = """
 <div><h1>1</h1><div class="row columns1"><div class="column firstcolumn lastcolumn"><div><h2>1.1</h2></div><div><h2>1.2</h2></div></div></div></div>
 """
-        pres = {'1': set(),
-                '1.1':set([NEWROW]),
-                '1.2': set(),
+        pres = {'h1_1': set(),
+                'h2_1':set([NEWROW]),
+                'h2_2': set(),
                 }
         pres2, html2 = extract_presentation(html)
         self.assertEqual(pres, pres2)

semanticeditor/utils/presentation.py

         else:
             i += 1
 
+def make_sect_id(tag, used_ids):
+    i = 1
+    while True:
+        attempt = tag + "_" + str(i)
+        if attempt not in used_ids:
+            return attempt
+        else:
+            i += 1
+
 def get_layout_details_strategy():
     # TODO - make configurable
     return LayoutDetails()
 
 def get_structure(root, assert_structure=False):
     """
-    Return the heading nodes, as (level, name, tag, node) tuples
+    Return the structure nodes, as (level, sect_id, name, tag, node) tuples
+
+    level is the 'outline level' in the document i.e. an integer
+    sect_id is a unique ID used for storing presentation information against
+    name is a user presentable name for the section
+    tag is the HTML element e.g. H1
+    node is the ElementTree node
     """
     retval = []
     names = set()
+    sect_ids = set()
     heading_names = set()
     cur_level = 1
     last_heading_num = 0
     for n in root.getiterator():
         if n.tag in blockdef:
             text = flatten(n)
+            sect_id = make_sect_id(n.tag, sect_ids)
+            sect_ids.add(sect_id)
             if n.tag in headingdef:
                 name = text
                 level = int(n.tag[1])
             names.add(name)
             # Level is adjusted so that e.g. H3 is level 1, if it is
             # the first to appear in the document.
-            retval.append((level - first_heading_level + 1, name, n.tag.upper(), n))
+            retval.append((level - first_heading_level + 1, sect_id, name, n.tag.upper(), n))
 
     return retval
 
     # Parse
     tree = parse(content)
     structure = get_structure(tree, assert_structure=True)
-    return [(l,name,tag) for (l,name,tag,node) in structure]
+    return [(l,name,tag) for (l,sect_id,name,tag,node) in structure]
 
 # == Formatting HTML ==
 #
     layout_strategy = get_layout_details_strategy()
     root = parse(html)
     structure = get_structure(root, assert_structure=True)
-    sectionnames = [name for (level, name, tag, node) in structure]
-    styleinfo = _sanitise_styleinfo(styleinfo, sectionnames)
+    sect_ids = [sect_id for (level, sect_id, name, tag, node) in structure]
+    styleinfo = _sanitise_styleinfo(styleinfo, sect_ids)
 
     # Strip existing divs, otherwise we cannot format properly.  If
     # there are other block level elements that mess things up, we
     _assert_sane_sections(root, structure)
 
     section_nodes = {}
-    headers = [(level,name,tag,h) for (level,name,tag,h) in structure
+    headers = [(level,sect_id,tag,h) for (level,sect_id,name,tag,h) in structure
                if tag.lower() in headingdef]
 
     # Cut the HTML up into sections
     # as headers always produce nested structures, and the
     # indexes passed to wrap_elements_in_tag don't need
     # adjusting for the changes we have made.
-    for idx, (level, name, tag, node) in enumerate(headers):
+    for idx, (level, sect_id, tag, node) in enumerate(headers):
         # We can no longer assume that parent = root, because the divs
         # we insert will change that.  However, the divs we insert
         # will keep sub-section headings on the same level.
         # if a heading, then the 'scope' of each section is from
         # heading node to before the next heading with a level the
         # same or higher
-        nextnodes = [(l,n) for (l,nname,t,n) in headers[idx+1:] if l <= level]
+        nextnodes = [(l,n) for (l,_sect_id,t,n) in headers[idx+1:] if l <= level]
         if not nextnodes:
             # scope extends to end
             # Bug in elementtree - throws AssertionError if we try
                 last_elem = len(parent)
 
         newdiv = wrap_elements_in_tag(parent, first_elem, last_elem, "div")
-        section_nodes[name] = newdiv
+        section_nodes[sect_id] = newdiv
 
     # Now deal with everything else
-    for idx, (level, name, tag, node) in enumerate(structure):
+    for idx, (level, sect_id, name, tag, node) in enumerate(structure):
         if tag.lower() not in headingdef:
             # Normal block level - these simply get a div that wraps
             # them.
             parent = get_parent(root, node)
             thisidx = get_index(parent, node)
             newdiv = wrap_elements_in_tag(parent, thisidx, thisidx + 1, "div")
-            section_nodes[name] = newdiv
+            section_nodes[sect_id] = newdiv
 
     # Apply normal CSS classes.
-    for name, newdiv in section_nodes.items():
+    for sect_id, newdiv in section_nodes.items():
         # Apply css styles
-        classes = [s.name for s in styleinfo[name] if s.prestype == "class"]
+        classes = [s.name for s in styleinfo[sect_id] if s.prestype == "class"]
         classes.sort()
         if classes:
             newdiv.set("class", " ".join(classes))
     cleanup(tree, lambda t: t.tag != 'div')
 
 
-def _sanitise_styleinfo(styleinfo, sectionnames):
+def _sanitise_styleinfo(styleinfo, sect_ids):
     # Replace lists with sets
     out = {}
     for k, v in styleinfo.items():
         out[k] = set(v)
 
     # Ensure that all sections have an entry in styleinfo
-    for name in sectionnames:
-        if not name in out:
-            out[name] = set()
+    for sect_id in sect_ids:
+        if not sect_id in out:
+            out[sect_id] = set()
 
     return out
 
     # First, all h1, h2 etc tags will be children of the root.
     # remove_tag should have ensured that, otherwise we will be unable
     # to cut the HTML into sections.
-    for level, name, tag, node in structure:
+    for level, sect_id, name, tag, node in structure:
         parent = get_parent(root, node)
         if tag.lower() in headingdef and parent is not root:
             raise BadStructure("Section heading \"%(name)s\" is not at the top level of "
     #  - No columns allowed if newrow has not been started.
 
     # 'structure' has the sections in document order
-    sections = [(level, name, section_nodes[name])
-                for level, name, tag, n in structure]
+    sections = [(level, sect_id, section_nodes[sect_id])
+                for level, sect_id, name, tag, n in structure]
 
     # Inverted dict
     known_nodes = _invert_dict(section_nodes)
 
     # Preprocess:
     #  - insert 'newcolumn' on everything that has 'newrow'
-    for level, name, tag, hn in structure:
-        if NEWROW in styleinfo[name]:
-            styleinfo[name].add(NEWCOL)
+    for level, sect_id, name, tag, hn in structure:
+        if NEWROW in styleinfo[sect_id]:
+            styleinfo[sect_id].add(NEWCOL)
 
     _add_rows_and_columns(root, known_nodes, styleinfo, layout_strategy=layout_strategy)
     # Due to HTML/CSS quirks, we add an empty <div
     for n in node.getiterator():
         if n == node:
             continue # ignore root
-        name = known_nodes.get(n)
-        if name is not None:
-            commands = styleinfo[name]
+        sect_id = known_nodes.get(n)
+        if sect_id is not None:
+            commands = styleinfo[sect_id]
             if NEWROW in commands or NEWCOL in commands:
-                return (name, n)
+                return (sect_id, n)
     return None
 
 def _get_next_section_node(nodelist, known_nodes):
     for n in nodelist:
-        name = known_nodes.get(n)
-        if name is not None:
-            return name
+        sect_id = known_nodes.get(n)
+        if sect_id is not None:
+            return sect_id
     return None
 
 def _add_rows_and_columns(topnode, known_nodes, styleinfo, layout_strategy=None):
     # out of topnode as we go along.
     idx_offset = 0
     for idx, node in enumerate(children):
-        name = known_nodes.get(node)
-        if name is None:
+        sect_id = known_nodes.get(node)
+        if sect_id is None:
             # If not a section node, it cannot contain sections.
             # or have commands
             continue
-        commands = styleinfo[name]
+        commands = styleinfo[sect_id]
 
         if NEWROW in commands:
             if cur_row_start is not None:
 
         if NEWCOL in commands:
             if cur_row_start is None:
+                # TODO - need name, not sect_id
                 raise BadStructure("'New column' command was found on section "
                                    "'%(name)s' without an appropriate 'new row' "
-                                   "command before it. " % dict(name=name))
+                                   "command before it. " % dict(name=sect_id))
             else:
-                columns.append((idx + idx_offset, name))
+                columns.append((idx + idx_offset, sect_id))
 
         if cur_row_start:
             # Rows/columns can only be added within the same level of
             if child is not None:
                 if len(columns) > 1:
                     # Can't do it.
-                    cname, cnode = child
-                    raise BadStructure("Item '%(tag)s: %(name)s' has a 'New row' or 'New column' command applied to "
+                    csect_id, cnode = child
+                    # TODO - names not sect_ids
+                    raise BadStructure("A '%(tag)s' item has a 'New row' or 'New column' command applied to "
                                        "it, but it is a subsection of '%(ptag)s: %(pname)s' which is already in a column. "
                                        "This would create a nested column structure, which is not allowed." %
-                                       dict(tag=cnode[0].tag.upper(), name=cname, ptag=cur_row_start[0].tag.upper(), pname=name))
+                                       dict(tag=cnode[0].tag.upper(), ptag=cur_row_start[0].tag.upper(), pname=sect_id))
                 else:
                     # Allow it, but next section on this level must
                     # not be NEWCOL (unless it is also NEWROW)
-                    nextnodename = _get_next_section_node(children[idx+1:], known_nodes)
-                    if nextnodename is not None:
-                        nextnode_commands = styleinfo[nextnodename]
+                    nextnode_sect_id = _get_next_section_node(children[idx+1:], known_nodes)
+                    if nextnode_sect_id is not None:
+                        nextnode_commands = styleinfo[nextnode_sect_id]
                         if NEWCOL in nextnode_commands and (NEWROW not in nextnode_commands):
+                            # TODO - need name not sect_id
                             raise BadStructure("Item '%(ptag)s: %(pname)s' has a column structure within it "
                                                "but section '%(name)s' has a 'New column' command applied to "
                                                "it.  This would create a nested column structure, which is "
-                                               "not allowed." % (dict(name=nextnodename, ptag=cur_row_start[0].tag.upper(), pname=name)))
+                                               "not allowed." % (dict(name=nextnode_sect_id, ptag=cur_row_start[0].tag.upper(), pname=sect_id)))
                     _add_rows_and_columns(node, known_nodes, styleinfo, layout_strategy=layout_strategy)
 
         else:
 
     # Add the columns
     if total_columns > MAXCOLS:
+        # TODO need name not sect_id
         raise TooManyColumns("The maximum number of columns is %(max)d. "
                              "Please move section '%(name)s' into a new "
                              "row." % dict(max=MAXCOLS, name=columns[MAXCOLS][1]))
             _create_preview(n, structure, known_nodes)
         else:
             parent = node
+            # TODO - need to get the name, known_nodes uses sect_id as value
             name = known_nodes.get(parent)
             if name is not None and (n.tag in blockdef):
                 n.set('class', 'structural ' + "tag" + n.tag.lower() )
     root = parse(html)
     structure = get_structure(root)
     pres = {}
-    for level, name, tag, node in structure:
-        pres[name] = set()
+    for level, sect_id, name, tag, node in structure:
+        pres[sect_id] = set()
         section_node = get_parent(root, node)
         if section_node is None or section_node.tag != 'div':
             # Not in standard format, we can't say anything about it
 
         # Section - extract classes
         for c in _get_classes_for_node(section_node):
-            pres[name].add(PresentationClass(c))
+            pres[sect_id].add(PresentationClass(c))
 
         # Parent/grandparent of section - newcol/newrow
         p = get_parent(root, section_node)
             if get_index(p, section_node) == 0:
                 classes = _get_classes_for_node(p)
                 if layout_strategy.is_column_class(classes):
-                    pres[name].add(NEWCOL)
+                    pres[sect_id].add(NEWCOL)
                 gp = get_parent(root, p)
                 if gp is not None and gp.tag == 'div':
                     if layout_strategy.is_row_class(_get_classes_for_node(gp)) \
                             and get_index(gp, p) == 0:
-                        pres[name].add(NEWROW)
-                        pres[name].discard(NEWCOL) # for tidiness, not technically necessary
+                        pres[sect_id].add(NEWROW)
+                        pres[sect_id].discard(NEWCOL) # for tidiness, not technically necessary
 
     _strip_presentation(root)
     out_html = _html_extract(root)