Jonathan Eunice avatar Jonathan Eunice committed 9b93595

updated so no longer encodes < > & to avoid interfering with HTML escaping

Comments (0)

Files changed (4)

 ::
 
     pip install namedentities
-    
-Failing that, fall back to the older approach::
-
-    easy_install namedentities
-    
-(You may need to begin these with "sudo " to authorize installation.)
+        
+(You may need to prefix this with "sudo " to authorize installation.)
 
 **NOTA BENE** Code runs successfully under Python 3, but packaging
 seemingly doesn't work as yet.
     print(named_entities(u))
 
     
-Credits
+History
 =======
 
-This is basically a packaging of Ian Beck's work
-(described in http://beckism.com/2009/03/named_entities_python/)
+ * 1.0.8 No longer attempts to encode ``<``, ``>``, or 
+   ``&`` (or thier numerical equivalents) to avoid
+   interfering with HTML escaping.
 
-Thank you, Ian!
+ * This is basically a packaging of Ian Beck's work
+   (described in http://beckism.com/2009/03/named_entities_python/)
+
+   Thank you, Ian!
+
+

namedentities/ne2.py

 """Namedentities workhorse for Python 2."""
 
-
 from htmlentitydefs import codepoint2name, name2codepoint
 import re
 import codecs
     """Convert from HTML entities (named or numeric) to Unicode characters."""
     
     def fixup(m):
-        """Given a matched entity, return its Unicode equivalent.  NB this maps
-        existing named entities as well."""
+        """Given an HTML entity (named or numeric), return its Unicode
+        equivalent. Does not, however, unescape < > and & (decimal 60,
+        62, and 38). Those are 'special' in that they are often escaped for very
+        important, specific reasons (e.g. to describe HTML within HTML). Any
+        messing with them is likely to break things badly."""
         
         text = m.group(0)
-        if text[:2] == "&#":
-            # character reference
+        if text[:2] == "&#":            # numeric entity
             try:
-                if text[:3] == "&#x":
-                    return unichr(int(text[3:-1], 16))
-                else:
-                    return unichr(int(text[2:-1]))
+                codepoint = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1])
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return unichr(codepoint)
             except ValueError:
                 pass
-        else:
-            # named entity
+        else:                           # named entity
             try:
-                text = unichr(name2codepoint[text[1:-1]])
+                codepoint = name2codepoint[text[1:-1]]
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return unichr(codepoint)
             except KeyError:
                 pass
         return text # leave as is

namedentities/ne3.py

 """Namedentities workhorse for Python 3."""
 
-
 from html.entities import codepoint2name, name2codepoint
 import re
 import codecs
     """Convert from HTML entities (named or numeric) to Unicode characters."""
     
     def fixup(m):
-        """Given a matched entity, return its Unicode equivalent.  NB this maps
-        existing named entities as well."""
+        """Given an HTML entity (named or numeric), return its Unicode
+        equivalent. Does not, however, unescape < > and & (decimal 60,
+        62, and 38). Those are 'special' in that they are often escaped for very
+        important, specific reasons (e.g. to describe HTML within HTML). Any
+        messing with them is likely to break things badly."""
         
         text = m.group(0)
-        if text[:2] == "&#":
-            # character reference
+        if text[:2] == "&#":            # numeric entity
             try:
-                if text[:3] == "&#x":
-                    return chr(int(text[3:-1], 16))
-                else:
-                    return chr(int(text[2:-1]))
+                codepoint = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1])
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return chr(codepoint)
             except ValueError:
                 pass
-        else:
-            # named entity
+        else:                           # named entity
             try:
-                text = chr(name2codepoint[text[1:-1]])
+                codepoint = name2codepoint[text[1:-1]]
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return chr(codepoint)
             except KeyError:
                 pass
         return text # leave as is
     else:
         raise TypeError("Can't handle {}".format(text.__name__))
 
+
 codecs.register_error('named_entities', named_entities_codec)
     
 
 
 setup(
     name='namedentities',
-    version='1.0.6',
+    version='1.0.8',
     author='Jonathan Eunice',
     author_email='jonathan.eunice@gmail.com',
     description='Simple way to convert numeric HTML entities to far more readable named entities.',
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.