Commits

Waylan Limberg committed 37ed7a9

Adjustments to match unicode policy as discussed on list and other minor
cleanup in preparation for release 1.7.

  • Participants
  • Parent commits 39bd928

Comments (0)

Files changed (6)

 #!/usr/bin/env python
 
-version = "1.6b"
-version_info = (1,6,2,"rc-2")
+version = "1.7"
+version_info = (1,7,0,"rc-1")
 __revision__ = "$Rev$"
 
 """
 file.)
 
 Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
-maintained  by [Yuri Takhteyev](http://www.freewisdom.org).
+maintained  by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan
+Limberg](http://achinghead.com/).
 
 Contact: yuri [at] freewisdom.org
+         waylan [at] gmail.com
 
 License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
 
         Markdown text """
 
 
-    def __init__(self, source=None,  # deprecated
+    def __init__(self, source=None,  # depreciated
                  extensions=[],
                  extension_configs=None,
-                 encoding="utf-8",
                  safe_mode = False):
         """Creates a new Markdown instance.
 
-           @param source: The text in Markdown format.
-           @param encoding: The character encoding of <text>. """
+           @param source: The text in Markdown format. Depreciated!
+           @param extensions: A list if extensions.
+           @param extension-configs: Configuration setting for extensions.
+           @param safe_mode: Disallow raw html. """
 
+        self.source = source
+        if source is not None:
+            message(WARN, "The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.")
         self.safeMode = safe_mode
-        self.encoding = encoding
-        self.source = source
         self.blockGuru = BlockGuru()
         self.registeredExtensions = []
         self.stripTopLevelTags = 1
                                LINK_ANGLED_PATTERN,
                                LINK_PATTERN,
                                IMAGE_LINK_PATTERN,
-			       IMAGE_REFERENCE_PATTERN,
-			       AUTOLINK_PATTERN,
+			                   IMAGE_REFERENCE_PATTERN,
+			                   AUTOLINK_PATTERN,
                                AUTOMAIL_PATTERN,
                                LINE_BREAK_PATTERN_2,
                                LINE_BREAK_PATTERN,
         """Return the document in XHTML format.
 
         @returns: A serialized XHTML body."""
-        #try :
 
         if source is not None: #Allow blank string
             self.source = source
 
         if not self.source:
-            return ""
+            return u""
 
-        self.source = removeBOM(self.source, self.encoding)
+        try:
+            self.source = unicode(self.source)
+        except UnicodeDecodeError:
+            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii  input.')
+            return u""
 
         for pp in self.textPreprocessors:
             self.source = pp.run(self.source)
         doc = self._transform()
         xml = doc.toxml()
 
-        #finally:
-        #    doc.unlink()
 
         # Return everything but the top level tag
 
         return (self.docType + xml).strip()
 
 
-    __str__ = convert   # deprecated - will be changed in 1.7 to report
-                        # information about the MD instance
-    
-    toString = __str__  # toString() method is deprecated
+    def __str__(self):
+        ''' Report info about instance. Markdown always returns unicode. '''
+        if self.source is None:
+            status = 'in which no source text has been assinged.'
+        else:
+            status = 'which contains %d chars and %d line(s) of source.'%\
+                     (len(self.source), self.source.count('\n')+1)
+        return 'An instance of "%s" %s'% (self.__class__, status)
 
+    __unicode__ = convert # markdown should always return a unicode string
 
-    def __unicode__(self):
-        """Return the document in XHTML format as a Unicode object.
-        """
-        return str(self)#.decode(self.encoding)
-
-
-    toUnicode = __unicode__  # deprecated - will be removed in 1.7
 
 
 
     text = input_file.read()
     input_file.close()
 
-    new_text = markdown(text, extensions, encoding, safe_mode = safe)
+    text = removeBOM(text, encoding)
+
+    new_text = markdown(text, extensions, safe_mode = safe)
 
     if output:
         output_file = codecs.open(output, "w", encoding=encoding)
 
 def markdown(text,
              extensions = [],
-             encoding = None,
              safe_mode = False):
     
     message(DEBUG, "in markdown.markdown(), received text:\n%s" % text)
 
 setup(
     name = 'markdown',
-    version = '1.3',
+    version = '1.7',
     description = "Python implementation of Markdown.",
     author = "Manfred Stienstra and Yuri takhteyev",
     maintainer = "Yuri Takhteyev",

tests/markdown-test/benchmark.dat.tmp

 construction:0.000000:0.000000
-amps-and-angle-encoding:0.020000:0.000000
-auto-links:0.020000:0.000000
-backlash-escapes:0.110000:131072.000000
-blockquotes-with-dode-blocks:0.000000:0.000000
-hard-wrapped:0.000000:0.000000
-horizontal-rules:0.070000:0.000000
-inline-html-advanced:0.020000:0.000000
-inline-html-comments:0.030000:0.000000
-inline-html-simple:0.100000:0.000000
-links-inline:0.050000:0.000000
-links-reference:0.090000:0.000000
-literal-quotes:0.040000:0.000000
-markdown-documentation-basics:0.350000:770048.000000
-markdown-syntax:1.790000:1540096.000000
-nested-blockquotes:0.040000:0.000000
-ordered-and-unordered-list:0.240000:-16384.000000
-strong-and-em-together:0.050000:0.000000
-tabs:0.060000:0.000000
-tidyness:0.050000:0.000000
+amps-and-angle-encoding:0.250000:266240.000000
+auto-links:0.230000:0.000000
+backlash-escapes:0.950000:245760.000000
+blockquotes-with-dode-blocks:0.070000:0.000000
+hard-wrapped:0.060000:0.000000
+horizontal-rules:0.700000:0.000000
+inline-html-advanced:0.280000:0.000000
+inline-html-comments:0.340000:0.000000
+inline-html-simple:0.970000:0.000000
+links-inline:0.500000:0.000000
+links-reference:0.650000:0.000000
+literal-quotes:0.390000:0.000000
+markdown-documentation-basics:3.390000:1437696.000000
+markdown-syntax:14.780000:2035712.000000
+nested-blockquotes:0.520000:-110592.000000
+ordered-and-unordered-list:2.210000:0.000000
+strong-and-em-together:0.640000:0.000000
+tabs:0.690000:0.000000
+tidyness:0.590000:0.000000

tests/misc/benchmark.dat.tmp

 construction:0.000000:0.000000
-adjacent-headers:0.010000:0.000000
-amp-in-url:0.010000:0.000000
-ampersand:0.000000:0.000000
-arabic:0.040000:0.000000
-attributes2:0.010000:0.000000
-bidi:0.080000:0.000000
+adjacent-headers:0.050000:0.000000
+amp-in-url:0.040000:0.000000
+ampersand:0.040000:0.000000
+arabic:0.330000:0.000000
+attributes2:0.090000:0.000000
+bidi:0.910000:0.000000
 blank:0.000000:0.000000
-blank-block-quote:0.000000:0.000000
-blockquote-hr:0.020000:0.000000
-br:0.020000:0.000000
-bracket_re:1.230000:0.000000
-code-first-line:0.010000:0.000000
-comments:0.010000:0.000000
-div:0.010000:0.000000
-email:0.010000:0.000000
-funky-list:0.020000:0.000000
-h1:0.010000:0.000000
-hash:0.020000:0.000000
-headers:0.020000:0.000000
-hline:0.020000:0.000000
-html:0.030000:0.000000
-image:0.010000:0.000000
-image-2:0.010000:0.000000
-image_in_links:0.020000:0.000000
-inside_html:0.020000:0.000000
-japanese:0.050000:0.000000
-lazy-block-quote:0.020000:0.000000
-lists:0.050000:0.000000
-lists2:0.020000:0.000000
-lists3:0.010000:0.000000
-lists4:0.020000:0.000000
-lists5:0.010000:0.000000
-markup-inside-p:0.020000:0.000000
-mismatched-tags:0.020000:0.000000
-more_comments:0.010000:0.000000
-multi-line-tags:0.030000:0.000000
-multi-paragraph-block-quote:0.020000:0.000000
-multi-test:0.050000:0.000000
-multiline-comments:0.020000:0.000000
-normalize:0.020000:0.000000
-numeric-entity:0.030000:0.000000
-php:0.030000:0.000000
-pre:0.030000:0.000000
-russian:0.070000:-12288.000000
-some-test:0.090000:0.000000
-span:0.040000:0.000000
-stronintags:0.050000:0.000000
-tabs-in-lists:0.070000:0.000000
-two-spaces:0.060000:0.000000
-uche:0.050000:0.000000
-underscores:0.040000:0.000000
-url_spaces:0.030000:0.000000
-utfbom:0.020000:0.000000
+blank-block-quote:0.050000:0.000000
+blockquote-hr:0.210000:0.000000
+br:0.170000:0.000000
+bracket_re:21.910000:0.000000
+code-first-line:0.050000:0.000000
+comments:0.110000:0.000000
+div:0.130000:0.000000
+email:0.130000:0.000000
+funky-list:0.220000:0.000000
+h1:0.110000:0.000000
+hash:0.160000:0.000000
+headers:0.180000:0.000000
+hline:0.110000:0.000000
+html:0.310000:0.000000
+image:0.150000:0.000000
+image-2:0.220000:0.000000
+image_in_links:0.190000:0.000000
+inside_html:0.180000:0.000000
+japanese:0.540000:0.000000
+lazy-block-quote:0.190000:0.000000
+lists:0.450000:0.000000
+lists2:0.170000:0.000000
+lists3:0.170000:0.000000
+lists4:0.210000:0.000000
+lists5:0.260000:0.000000
+markup-inside-p:0.270000:0.000000
+mismatched-tags:0.180000:0.000000
+more_comments:0.210000:0.000000
+multi-line-tags:0.260000:0.000000
+multi-paragraph-block-quote:0.280000:0.000000
+multi-test:0.540000:0.000000
+multiline-comments:0.340000:0.000000
+normalize:0.270000:0.000000
+numeric-entity:0.310000:0.000000
+php:0.350000:0.000000
+pre:0.310000:0.000000
+russian:0.760000:-172032.000000
+some-test:0.850000:0.000000
+span:0.500000:0.000000
+stronintags:0.500000:0.000000
+tabs-in-lists:0.670000:0.000000
+two-spaces:0.550000:0.000000
+uche:0.540000:0.000000
+underscores:0.490000:0.000000
+url_spaces:0.420000:0.000000

tests/misc/utfbom.html

-
-
-<h1>A heading.</h1>
-<p>text text text text text text.
-</p>
-
-

tests/misc/utfbom.txt

-A heading.
-==========
-
-text text text text text text.