Commits

Jonathan Eunice committed e824279

updating for package distribution

  • Participants
  • Parent commits 6a41f20

Comments (0)

Files changed (10)

 
 Python 2::
   
-    import namedentities
+    from namedentities import named_entities
     
     u = u'both em\u2014and–dashes…'
     print named_entities(u)
     
 Python 3::
 
-    import namedentities
+    from namedentities import named_entities
     
     u = 'both em\u2014and–dashes…'
     print(named_entities(u))
+from namedentities import named_entities
+ 
+u = u'both em\u2014and–dashes…'
+print named_entities(u)
+from namedentities import named_entities
+
+u = 'both em\u2014and–dashes…'
+print(named_entities(u))

File namedentities.py

-
-"""Named HTML entities are much easier to comprehend than numeric entities. This
-module helps convert between the more typical numerical entiies and the more
-attractive named entities. """
-
-# Primarily a packaging of Ian Beck's work from
-# http://beckism.com/2009/03/named_entities_python/
-
-# There are too many little differences in Python 2 and Python 3 string
-# handling syntax and symantics to have just one implementation. So there are
-# two parallel implementations, multiplexed here.
-
-import sys
-if sys.version >= '3':
-    from namedentities3 import named_entities, encode_ampersands
-else:
-    from namedentities2 import named_entities, encode_ampersands
-
-
-def test_named_entities():
-    """Give it a run."""
-    
-    num_html   = " this —is—an— ok?"
-    named_html = " this —is—an— ok?"
-   
-    assert named_html == named_entities(num_html)
-  
-  
-if __name__ == '__main__':
-    test_named_entities()

File namedentities/__init__.py

+"""Named HTML entities are much easier to comprehend than numeric entities. This
+module helps convert between the more typical numerical entiies and the more
+attractive named entities. """
+
+# Primarily a packaging of Ian Beck's work from
+# http://beckism.com/2009/03/named_entities_python/
+
+# There are too many little differences in Python 2 and Python 3 string
+# handling syntax and symantics to have just one implementation. So there are
+# two parallel implementations, multiplexed here.
+
+import sys
+if sys.version >= '3':
+    from ne3 import named_entities, encode_ampersands
+else:
+    from ne2 import named_entities, encode_ampersands
+
+
+def test_named_entities():
+    """Give it a run."""
+    
+    num_html   = " this —is—an— ok?"
+    named_html = " this —is—an— ok?"
+   
+    assert named_html == named_entities(num_html)
+  
+  
+if __name__ == '__main__':
+    test_named_entities()

File namedentities/ne2.py

+"""Namedentities workhorse for Python 2."""
+
+
+from htmlentitydefs import codepoint2name, name2codepoint
+import re
+import codecs
+
+def unescape(text):
+    """Convert from HTML entities (named or numeric) to Unicode characters."""
+    
+    def fixup(m):
+        """Given a matched entity, return its Unicode equivalent.  NB this maps
+        existing named entities as well."""
+        
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return unichr(int(text[3:-1], 16))
+                else:
+                    return unichr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = unichr(name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
+    
+    
+def named_entities_codec(text):
+    """Encode codec that converts Unicode characters into named entities (where
+    the names are known), or failing that, numerical entities."""
+    
+    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
+        s = []
+        for c in text.object[text.start:text.end]:
+            if ord(c) in codepoint2name:
+                s.append(u'&%s;' % codepoint2name[ord(c)])
+            else:
+                s.append(u'&#%s;' % ord(c))
+        return ''.join(s), text.end
+    else:
+        raise TypeError("Can't handle %s" % text.__name__)
+
+
+codecs.register_error('named_entities', named_entities_codec)
+    
+
+def named_entities(text):
+    """Given a string, convert its numerical HTML entities to named HTML
+    entities. Works by converting the entire string to Unicode characters, then
+    re-encoding Unicode characters into named entities (where the names are
+    known), or failing that, numerical entities."""
+    
+    unescaped_text = unescape(text)
+    return unescaped_text.encode('ascii', 'named_entities')
+    
+    
+def encode_ampersands(text):
+    """Encode ampersands into &"""
+    
+    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
+    return text
+

File namedentities/ne3.py

+"""Namedentities workhorse for Python 3."""
+
+
+from html.entities import codepoint2name, name2codepoint
+import re
+import codecs
+
+def unescape(text):
+    """Convert from HTML entities (named or numeric) to Unicode characters."""
+    
+    def fixup(m):
+        """Given a matched entity, return its Unicode equivalent.  NB this maps
+        existing named entities as well."""
+        
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return chr(int(text[3:-1], 16))
+                else:
+                    return chr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = chr(name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
+    
+    
+def named_entities_codec(text):
+    """Encode codec that converts Unicode characters into named entities (where
+    the names are known), or failing that, numerical entities."""
+    
+    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
+        s = []
+        for c in text.object[text.start:text.end]:
+            if ord(c) in codepoint2name:
+                s.append('&{};'.format(codepoint2name[ord(c)]))
+            else:
+                s.append('&#{};'.format(ord(c)))
+        return ''.join(s), text.end
+    else:
+        raise TypeError("Can't handle {}".format(text.__name__))
+
+codecs.register_error('named_entities', named_entities_codec)
+    
+
+def named_entities(text):
+    """Given a string, convert its numerical HTML entities to named HTML
+    entities. Works by converting the entire string to Unicode characters, then
+    re-encoding Unicode characters into named entities (where the names are
+    known), or failing that, numerical entities."""
+    
+    unescaped_text = unescape(text)
+    entities_text = unescaped_text.encode('ascii', 'named_entities')
+    return entities_text.decode("ascii", "strict")
+    
+    
+def encode_ampersands(text):
+    """Encode ampersands into &"""
+    
+    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
+    return text
+

File namedentities2.py

-"""Namedentities workhorse for Python 2."""
-
-
-from htmlentitydefs import codepoint2name, name2codepoint
-import re
-import codecs
-
-def unescape(text):
-    """Convert from HTML entities (named or numeric) to Unicode characters."""
-    
-    def fixup(m):
-        """Given a matched entity, return its Unicode equivalent.  NB this maps
-        existing named entities as well."""
-        
-        text = m.group(0)
-        if text[:2] == "&#":
-            # character reference
-            try:
-                if text[:3] == "&#x":
-                    return unichr(int(text[3:-1], 16))
-                else:
-                    return unichr(int(text[2:-1]))
-            except ValueError:
-                pass
-        else:
-            # named entity
-            try:
-                text = unichr(name2codepoint[text[1:-1]])
-            except KeyError:
-                pass
-        return text # leave as is
-    return re.sub("&#?\w+;", fixup, text)
-    
-    
-def named_entities_codec(text):
-    """Encode codec that converts Unicode characters into named entities (where
-    the names are known), or failing that, numerical entities."""
-    
-    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
-        s = []
-        for c in text.object[text.start:text.end]:
-            if ord(c) in codepoint2name:
-                s.append(u'&%s;' % codepoint2name[ord(c)])
-            else:
-                s.append(u'&#%s;' % ord(c))
-        return ''.join(s), text.end
-    else:
-        raise TypeError("Can't handle %s" % text.__name__)
-
-
-codecs.register_error('named_entities', named_entities_codec)
-    
-
-def named_entities(text):
-    """Given a string, convert its numerical HTML entities to named HTML
-    entities. Works by converting the entire string to Unicode characters, then
-    re-encoding Unicode characters into named entities (where the names are
-    known), or failing that, numerical entities."""
-    
-    unescaped_text = unescape(text)
-    return unescaped_text.encode('ascii', 'named_entities')
-    
-    
-def encode_ampersands(text):
-    """Encode ampersands into &"""
-    
-    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
-    return text
-

File namedentities3.py

-"""Namedentities workhorse for Python 3."""
-
-
-from html.entities import codepoint2name, name2codepoint
-import re
-import codecs
-
-def unescape(text):
-    """Convert from HTML entities (named or numeric) to Unicode characters."""
-    
-    def fixup(m):
-        """Given a matched entity, return its Unicode equivalent.  NB this maps
-        existing named entities as well."""
-        
-        text = m.group(0)
-        if text[:2] == "&#":
-            # character reference
-            try:
-                if text[:3] == "&#x":
-                    return chr(int(text[3:-1], 16))
-                else:
-                    return chr(int(text[2:-1]))
-            except ValueError:
-                pass
-        else:
-            # named entity
-            try:
-                text = chr(name2codepoint[text[1:-1]])
-            except KeyError:
-                pass
-        return text # leave as is
-    return re.sub("&#?\w+;", fixup, text)
-    
-    
-def named_entities_codec(text):
-    """Encode codec that converts Unicode characters into named entities (where
-    the names are known), or failing that, numerical entities."""
-    
-    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
-        s = []
-        for c in text.object[text.start:text.end]:
-            if ord(c) in codepoint2name:
-                s.append('&{};'.format(codepoint2name[ord(c)]))
-            else:
-                s.append('&#{};'.format(ord(c)))
-        return ''.join(s), text.end
-    else:
-        raise TypeError("Can't handle {}".format(text.__name__))
-
-codecs.register_error('named_entities', named_entities_codec)
-    
-
-def named_entities(text):
-    """Given a string, convert its numerical HTML entities to named HTML
-    entities. Works by converting the entire string to Unicode characters, then
-    re-encoding Unicode characters into named entities (where the names are
-    known), or failing that, numerical entities."""
-    
-    unescaped_text = unescape(text)
-    return unescaped_text.encode('ascii', 'named_entities')
-    
-    
-def encode_ampersands(text):
-    """Encode ampersands into &"""
-    
-    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
-    return text
-
     version='1.0',
     author='Jonathan Eunice',
     author_email='jonathan.eunice@gmail.com',
-    description='Simple way to convert numeric HTML entites to far more readable named entities.',
+    description='Simple way to convert numeric HTML entities to far more readable named entities.',
     long_description=README_TEXT,
     url='http://bitbucket.org/jeunice/namedentities',
-    py_modules=['namedentities', 'named'],
+    py_modules=['namedentities', 'namedentities2.py', 'namedentities3.py'],
     install_requires=[],
     classifiers=[
         'Development Status :: 4 - Beta',