Source

namedentities / namedentities / ne3.py

Diff from to

File namedentities/ne3.py

 """Namedentities workhorse for Python 3."""
 
-
 from html.entities import codepoint2name, name2codepoint
 import re
 import codecs
     """Convert from HTML entities (named or numeric) to Unicode characters."""
     
     def fixup(m):
-        """Given a matched entity, return its Unicode equivalent.  NB this maps
-        existing named entities as well."""
+        """Given an HTML entity (named or numeric), return its Unicode
+        equivalent. Does not, however, unescape < > and & (decimal 60,
+        62, and 38). Those are 'special' in that they are often escaped for very
+        important, specific reasons (e.g. to describe HTML within HTML). Any
+        messing with them is likely to break things badly."""
         
         text = m.group(0)
-        if text[:2] == "&#":
-            # character reference
+        if text[:2] == "&#":            # numeric entity
             try:
-                if text[:3] == "&#x":
-                    return chr(int(text[3:-1], 16))
-                else:
-                    return chr(int(text[2:-1]))
+                codepoint = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1])
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return chr(codepoint)
             except ValueError:
                 pass
-        else:
-            # named entity
+        else:                           # named entity
             try:
-                text = chr(name2codepoint[text[1:-1]])
+                codepoint = name2codepoint[text[1:-1]]
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return chr(codepoint)
             except KeyError:
                 pass
         return text # leave as is
     else:
         raise TypeError("Can't handle {}".format(text.__name__))
 
+
 codecs.register_error('named_entities', named_entities_codec)