Commits

Jonathan Eunice  committed 05d611c

restructured into package

  • Participants
  • Parent commits 20918ff

Comments (0)

Files changed (7)

File namedentities.py

-"""
-Named HTML entities are much easier to comprehend than numeric entities. This
-module helps convert between the more typical numerical entiies and the more
-attractive named entities.
-"""
-
-# Primarily a packaging of Ian Beck's work from
-# http://beckism.com/2009/03/named_entities_python/
-
-# There are too many little differences in Python 2 and Python 3 string handling
-# syntax and symantics to easily have just one implementation. So there are two
-# (very similar) parallel implementations, multiplexed here.
-
-import sys
-if sys.version_info[0] >= 3:
-    from namedentities3 import named_entities, encode_ampersands
-else:
-    from namedentities2 import named_entities, encode_ampersands

File namedentities/__init__.py

+"""
+Named HTML entities are much easier to comprehend than numeric entities. This
+module helps convert between the more typical numerical entiies and the more
+attractive named entities.
+"""
+
+# Primarily a packaging of Ian Beck's work from
+# http://beckism.com/2009/03/named_entities_python/
+
+# Given the many little differences between Python 2 and Python 3 string handling
+# syntax and symantics, easier to have two very similar, parallel implementations.
+# They are multiplexed here into one logical ``namedentities`` package.
+
+import sys
+if sys.version_info[0] >= 3:
+    from namedentities.namedentities3 import named_entities, encode_ampersands
+else:
+    from namedentities.namedentities2 import named_entities, encode_ampersands

File namedentities/namedentities2.py

+"""Namedentities workhorse for Python 2."""
+
+from htmlentitydefs import codepoint2name, name2codepoint
+import re
+import codecs
+
+def unescape(text):
+    """
+    Convert from HTML entities (named or numeric) to Unicode characters.
+    """
+    
+    def fixup(m):
+        """
+        Given an HTML entity (named or numeric), return its Unicode
+        equivalent. Does not, however, unescape < > and & (decimal 60,
+        62, and 38). Those are 'special' in that they are often escaped for very
+        important, specific reasons (e.g. to describe HTML within HTML). Any
+        messing with them is likely to break things badly.
+        """
+        
+        text = m.group(0)
+        if text[:2] == "&#":            # numeric entity
+            try:
+                codepoint = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1])
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return unichr(codepoint)
+            except ValueError:
+                pass
+        else:                           # named entity
+            try:
+                codepoint = name2codepoint[text[1:-1]]
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return unichr(codepoint)
+            except KeyError:
+                pass
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
+    
+    
+def named_entities_codec(text):
+    """
+    Encode codec that converts Unicode characters into named entities (where
+    the names are known), or failing that, numerical entities.
+    """
+    
+    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
+        s = []
+        for c in text.object[text.start:text.end]:
+            if ord(c) in codepoint2name:
+                s.append(u'&%s;' % codepoint2name[ord(c)])
+            else:
+                s.append(u'&#%s;' % ord(c))
+        return ''.join(s), text.end
+    else:
+        raise TypeError("Can't handle %s" % text.__name__)
+
+
+codecs.register_error('named_entities', named_entities_codec)
+    
+
+def named_entities(text):
+    """
+    Given a string, convert its numerical HTML entities to named HTML
+    entities. Works by converting the entire string to Unicode characters, then
+    re-encoding Unicode characters into named entities (where the names are
+    known), or failing that, numerical entities.
+    """
+    
+    unescaped_text = unescape(text)
+    return unescaped_text.encode('ascii', 'named_entities')
+    
+    
+def encode_ampersands(text):
+    """Encode ampersands into &"""
+    
+    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
+    return text
+

File namedentities/namedentities3.py

+"""Namedentities workhorse for Python 3."""
+
+from html.entities import codepoint2name, name2codepoint
+import re
+import codecs
+
+def unescape(text):
+    """
+    Convert from HTML entities (named or numeric) to Unicode characters.
+    """
+    
+    def fixup(m):
+        """
+        Given an HTML entity (named or numeric), return its Unicode
+        equivalent. Does not, however, unescape < > and & (decimal 60,
+        62, and 38). Those are 'special' in that they are often escaped for very
+        important, specific reasons (e.g. to describe HTML within HTML). Any
+        messing with them is likely to break things badly.
+        """
+        
+        text = m.group(0)
+        if text[:2] == "&#":            # numeric entity
+            try:
+                codepoint = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1])
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return chr(codepoint)
+            except ValueError:
+                pass
+        else:                           # named entity
+            try:
+                codepoint = name2codepoint[text[1:-1]]
+                if codepoint != 38 and codepoint != 60 and codepoint != 62:
+                    return chr(codepoint)
+            except KeyError:
+                pass
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
+    
+    
+def named_entities_codec(text):
+    """
+    Encode codec that converts Unicode characters into named entities (where
+    the names are known), or failing that, numerical entities.
+    """
+    
+    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
+        s = []
+        for c in text.object[text.start:text.end]:
+            if ord(c) in codepoint2name:
+                s.append('&{};'.format(codepoint2name[ord(c)]))
+            else:
+                s.append('&#{};'.format(ord(c)))
+        return ''.join(s), text.end
+    else:
+        raise TypeError("Can't handle {}".format(text.__name__))
+
+
+codecs.register_error('named_entities', named_entities_codec)
+    
+
+def named_entities(text):
+    """
+    Given a string, convert its numerical HTML entities to named HTML
+    entities. Works by converting the entire string to Unicode characters, then
+    re-encoding Unicode characters into named entities (where the names are
+    known), or failing that, numerical entities.
+    """
+    
+    unescaped_text = unescape(text)
+    entities_text = unescaped_text.encode('ascii', 'named_entities')
+    return entities_text.decode("ascii", "strict")
+    
+    
+def encode_ampersands(text):
+    """
+    Encode ampersands into &
+    """
+    
+    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
+    return text
+

File namedentities2.py

-"""Namedentities workhorse for Python 2."""
-
-from htmlentitydefs import codepoint2name, name2codepoint
-import re
-import codecs
-
-def unescape(text):
-    """
-    Convert from HTML entities (named or numeric) to Unicode characters.
-    """
-    
-    def fixup(m):
-        """
-        Given an HTML entity (named or numeric), return its Unicode
-        equivalent. Does not, however, unescape < > and & (decimal 60,
-        62, and 38). Those are 'special' in that they are often escaped for very
-        important, specific reasons (e.g. to describe HTML within HTML). Any
-        messing with them is likely to break things badly.
-        """
-        
-        text = m.group(0)
-        if text[:2] == "&#":            # numeric entity
-            try:
-                codepoint = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1])
-                if codepoint != 38 and codepoint != 60 and codepoint != 62:
-                    return unichr(codepoint)
-            except ValueError:
-                pass
-        else:                           # named entity
-            try:
-                codepoint = name2codepoint[text[1:-1]]
-                if codepoint != 38 and codepoint != 60 and codepoint != 62:
-                    return unichr(codepoint)
-            except KeyError:
-                pass
-        return text # leave as is
-    return re.sub("&#?\w+;", fixup, text)
-    
-    
-def named_entities_codec(text):
-    """
-    Encode codec that converts Unicode characters into named entities (where
-    the names are known), or failing that, numerical entities.
-    """
-    
-    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
-        s = []
-        for c in text.object[text.start:text.end]:
-            if ord(c) in codepoint2name:
-                s.append(u'&%s;' % codepoint2name[ord(c)])
-            else:
-                s.append(u'&#%s;' % ord(c))
-        return ''.join(s), text.end
-    else:
-        raise TypeError("Can't handle %s" % text.__name__)
-
-
-codecs.register_error('named_entities', named_entities_codec)
-    
-
-def named_entities(text):
-    """
-    Given a string, convert its numerical HTML entities to named HTML
-    entities. Works by converting the entire string to Unicode characters, then
-    re-encoding Unicode characters into named entities (where the names are
-    known), or failing that, numerical entities.
-    """
-    
-    unescaped_text = unescape(text)
-    return unescaped_text.encode('ascii', 'named_entities')
-    
-    
-def encode_ampersands(text):
-    """Encode ampersands into &"""
-    
-    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
-    return text
-

File namedentities3.py

-"""Namedentities workhorse for Python 3."""
-
-from html.entities import codepoint2name, name2codepoint
-import re
-import codecs
-
-def unescape(text):
-    """
-    Convert from HTML entities (named or numeric) to Unicode characters.
-    """
-    
-    def fixup(m):
-        """
-        Given an HTML entity (named or numeric), return its Unicode
-        equivalent. Does not, however, unescape < > and & (decimal 60,
-        62, and 38). Those are 'special' in that they are often escaped for very
-        important, specific reasons (e.g. to describe HTML within HTML). Any
-        messing with them is likely to break things badly.
-        """
-        
-        text = m.group(0)
-        if text[:2] == "&#":            # numeric entity
-            try:
-                codepoint = int(text[3:-1], 16) if text[:3] == "&#x" else int(text[2:-1])
-                if codepoint != 38 and codepoint != 60 and codepoint != 62:
-                    return chr(codepoint)
-            except ValueError:
-                pass
-        else:                           # named entity
-            try:
-                codepoint = name2codepoint[text[1:-1]]
-                if codepoint != 38 and codepoint != 60 and codepoint != 62:
-                    return chr(codepoint)
-            except KeyError:
-                pass
-        return text # leave as is
-    return re.sub("&#?\w+;", fixup, text)
-    
-    
-def named_entities_codec(text):
-    """
-    Encode codec that converts Unicode characters into named entities (where
-    the names are known), or failing that, numerical entities.
-    """
-    
-    if isinstance(text, (UnicodeEncodeError, UnicodeTranslateError)):
-        s = []
-        for c in text.object[text.start:text.end]:
-            if ord(c) in codepoint2name:
-                s.append('&{};'.format(codepoint2name[ord(c)]))
-            else:
-                s.append('&#{};'.format(ord(c)))
-        return ''.join(s), text.end
-    else:
-        raise TypeError("Can't handle {}".format(text.__name__))
-
-
-codecs.register_error('named_entities', named_entities_codec)
-    
-
-def named_entities(text):
-    """
-    Given a string, convert its numerical HTML entities to named HTML
-    entities. Works by converting the entire string to Unicode characters, then
-    re-encoding Unicode characters into named entities (where the names are
-    known), or failing that, numerical entities.
-    """
-    
-    unescaped_text = unescape(text)
-    entities_text = unescaped_text.encode('ascii', 'named_entities')
-    return entities_text.decode("ascii", "strict")
-    
-    
-def encode_ampersands(text):
-    """
-    Encode ampersands into &
-    """
-    
-    text = re.sub('&(?!([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);)', '&', text)
-    return text
-
     description='Converts numeric HTML entities and Unicode characters to nice, neat named HTML entities',
     long_description=open('README.rst').read(),
     url='http://bitbucket.org/jeunice/namedentities',
-    py_modules=['namedentities', 'namedentities2', 'namedentities3'],
+    packages=['namedentities'],
     install_requires=[],
     tests_require = ['tox', 'pytest','six'],
     zip_safe = True,