Anonymous avatar Anonymous committed 1031f1f

[svn] upgrade markdown from 1.5 to 1.6a

Comments (0)

Files changed (1)

webhelpers/markdown.py

 #!/usr/bin/env python
 
+# The following constant specifies the name used in the usage
+# statement displayed for python versions lower than 2.3.  (With
+# python2.3 and higher the usage statement is generated by optparse
+# and uses the actual name of the executable called.)
+
+EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"
+
 SPEED_TEST = 0
 
 """
 ====================================================================
-IF YOU ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
+IF YOA ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
 ====================================================================
 
 Python-Markdown
 
 License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
 
-Version: 1.5 (May 15, 2006)
+Version: 1.6a (October 12, 2006)
+
 
 For changelog, see end of file
 """
 
-import re, sys, os, random
+import re, sys, os, random, codecs
 
 # set debug level: 3 none, 2 critical, 1 informative, 0 all
 (VERBOSE, INFO, CRITICAL, NONE) = range(4)
 
 # --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
 
-FN_BACKLINK_TEXT = "zz1337820767766393qq"
 # a template for html placeholders
 HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"
 HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"
 BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
                         'dl', 'ol', 'ul', 'script', 'noscript',
                         'form', 'fieldset', 'iframe', 'math', 'ins',
-                        'del', 'hr', 'hr/']
+                        'del', 'hr', 'hr/', 'style']
 
 def is_block_level (tag) :
     return ( (tag in BLOCK_LEVEL_ELEMENTS) or
 
     def normalizeEntities(self, text) :
 
-        pairs = [ #("&", "&"),
+        pairs = [ ("&", "&"),
                   ("<", "&lt;"),
                   (">", "&gt;"),
                   ("\"", "&quot;")]
 
+
         for old, new in pairs :
             text = text.replace(old, new)
         return text
 Preprocessors munge source text before we start doing anything too
 complicated.
 
-Each preprocessor implements a "run" method that takes a pointer to
-a list of lines of the document, modifies it as necessary and
-returns either the same pointer or a pointer to a new list.
+Each preprocessor implements a "run" method that takes a pointer to a list of lines of the document,
+modifies it as necessary and returns either the same pointer or a
+pointer to a new list.  Preprocessors must extend
+markdown.Preprocessor.
+
 """
 
-class HeaderPreprocessor :
+
+class Preprocessor :
+    pass
+
+
+class HeaderPreprocessor (Preprocessor):
 
     """
        Replaces underlined headers with hashed headers to avoid
 
     def run (self, lines) :
 
-        for i in range(len(lines)) :
-            if not lines[i] :
+        i = -1
+        while i+1 < len(lines) :
+            i = i+1
+            if not lines[i].strip() :
                 continue
 
             if lines[i].startswith("#") :
                     lines[i] = "## " + lines[i].strip()
                     lines[i+1] = ""
 
+        #for l in lines :
+        #    print l.encode('utf8')
+        #sys.exit(0)
+
         return lines
 
 HEADER_PREPROCESSOR = HeaderPreprocessor()
 
-class LinePreprocessor :
+class LinePreprocessor (Preprocessor):
     """Deals with HR lines (needs to be done before processing lists)"""
 
     def run (self, lines) :
 LINE_PREPROCESSOR = LinePreprocessor()
 
 
-class LineBreaksPreprocessor :
+class LineBreaksPreprocessor (Preprocessor):
     """Replaces double spaces at the end of the lines with <br/ >."""
 
     def run (self, lines) :
 LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()
 
 
-class HtmlBlockPreprocessor :
+class HtmlBlockPreprocessor (Preprocessor):
     """Removes html blocks from self.lines"""
+    
+    def _get_left_tag(self, block):
+        return block[1:].replace(">", " ", 1).split()[0].lower()
 
+
+    def _get_right_tag(self, left_tag, block):
+        return block.rstrip()[-len(left_tag)-2:-1].lower()
+
+    def _equal_tags(self, left_tag, right_tag):
+        if left_tag in ['?', '?php', 'div'] : # handle PHP, etc.
+            return True
+        if ("/" + left_tag) == right_tag:
+            return True
+        elif left_tag == right_tag[1:] \
+            and right_tag[0] != "<":
+            return True
+        else:
+            return False
+
+    def _is_oneliner(self, tag):
+        return (tag in ['hr', 'hr/'])
+
+    
     def run (self, lines) :
         new_blocks = []
         text = "\n".join(lines)
-        for block in text.split("\n\n") :
+        text = text.split("\n\n")
+        
+        items = []
+        left_tag = ''
+        right_tag = ''
+        in_tag = False # flag
+        
+        for block in text:
             if block.startswith("\n") :
                 block = block[1:]
-            if ( (block.startswith("<") and block.rstrip().endswith(">"))
-                 and (block[1] in ["!", "?", "@", "%"]
-                      or is_block_level( block[1:].replace(">", " ")
-                                         .split()[0].lower()))) :
-                new_blocks.append(
-                    self.stash.store(block.strip()))
-            else :
+
+            if not in_tag:
+
+                if block.startswith("<"):
+                    
+                    left_tag = self._get_left_tag(block)
+                    right_tag = self._get_right_tag(left_tag, block)
+
+                    if not (is_block_level(left_tag) \
+                        or block[1] in ["!", "?", "@", "%"]):
+                        new_blocks.append(block)
+                        continue
+
+                    if self._is_oneliner(left_tag):
+                        new_blocks.append(block.strip())
+                        continue
+                        
+                    if block[1] == "!":
+                        # is a comment block
+                        left_tag = "--"
+                        right_tag = self._get_right_tag(left_tag, block)
+                        # keep checking conditions below and maybe just append
+                        
+                    if block.rstrip().endswith(">") \
+                        and self._equal_tags(left_tag, right_tag):
+                        new_blocks.append(
+                            self.stash.store(block.strip()))
+                        continue
+                    elif not block[1] == "!":
+                        # if is block level tag and is not complete
+                        items.append(block.strip())
+                        in_tag = True
+                        continue
+                    
                 new_blocks.append(block)
+
+            else:
+                items.append(block.strip())
+                
+                right_tag = self._get_right_tag(left_tag, block)
+                if self._equal_tags(left_tag, right_tag):
+                    # if find closing tag
+                    in_tag = False
+                    new_blocks.append(
+                        self.stash.store('\n\n'.join(items)))
+                    items = []
+                    
         return "\n\n".join(new_blocks).split("\n")
 
 HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
 
 
-class ReferencePreprocessor :
+class ReferencePreprocessor (Preprocessor):
 
     def run (self, lines) :
+
         new_text = [];
         for line in lines:
             m = RE.regExp['reference-def'].match(line)
             if m:
                 id = m.group(2).strip().lower()
-                title = dequote(m.group(4).strip()) #.replace('"', "&quot;")
-                self.references[id] = (m.group(3), title)
+                t = m.group(4).strip()  # potential title
+                if not t :
+                    self.references[id] = (m.group(3), t)
+                elif (len(t) >= 2
+                      and (t[0] == t[-1] == "\""
+                           or t[0] == t[-1] == "\'"
+                           or (t[0] == "(" and t[-1] == ")") ) ) :
+                    self.references[id] = (m.group(3), t[1:-1])
+                else :
+                    new_text.append(line)
             else:
                 new_text.append(line)
+
         return new_text #+ "\n"
 
 REFERENCE_PREPROCESSOR = ReferencePreprocessor()
 ======================================================================
 
 Inline patterns such as *emphasis* are handled by means of auxiliary
-objects, one per pattern.  Each pattern object uses a single regular
+objects, one per pattern.  Pattern objects must be instances of classes
+that extend markdown.Pattern.  Each pattern object uses a single regular
 expression and needs support the following methods:
 
   pattern.getCompiledRegExp() - returns a regular expression
                                 a NanoDom node (as a part of the provided
                                 doc) or None
 
-All of python markdown's built-in patterns subclass from BasePatter,
+All of python markdown's built-in patterns subclass from Patter,
 but you can add additional patterns that don't.
 
 Also note that all the regular expressions used by inline must
 capture the whole block.  For this reason, they all start with
 '^(.*)' and end with '(.*)!'.  In case with built-in expression
-BasePattern takes care of adding the "^(.*)" and "(.*)!".
+Pattern takes care of adding the "^(.*)" and "(.*)!".
 
 Finally, the order in which regular expressions are applied is very
 important - e.g. if we first replace http://.../ links with <a> tags
 IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
 NOT_STRONG_RE = r'( \* )'                        # stand-alone * or _
 AUTOLINK_RE = r'<(http://[^>]*)>'                # <http://www.123.com>
-AUTOMAIL_RE = r'<([^> ]*@[^> ]*)>'               # <me@example.com>
-HTML_RE = r'(\<[^\>]*\>)'                        # <...>
+AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'               # <me@example.com>
+#HTML_RE = r'(\<[^\>]*\>)'                        # <...>
+HTML_RE = r'(\<[a-zA-Z/][^\>]*\>)'               # <...>
 ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'                # &amp;
 
-class BasePattern:
+class Pattern:
 
     def __init__ (self, pattern) :
         self.pattern = pattern
     def getCompiledRegExp (self) :
         return self.compiled_re
 
-class SimpleTextPattern (BasePattern) :
+BasePattern = Pattern # for backward compatibility
+
+class SimpleTextPattern (Pattern) :
 
     def handleMatch(self, m, doc) :
         return doc.createTextNode(m.group(2))
 
-class SimpleTagPattern (BasePattern):
+class SimpleTagPattern (Pattern):
 
     def __init__ (self, pattern, tag) :
-        BasePattern.__init__(self, pattern)
+        Pattern.__init__(self, pattern)
         self.tag = tag
 
     def handleMatch(self, m, doc) :
         el.appendChild(doc.createTextNode(m.group(2)))
         return el
 
-class BacktickPattern (BasePattern):
+class BacktickPattern (Pattern):
 
     def __init__ (self, pattern):
-        BasePattern.__init__(self, pattern)
+        Pattern.__init__(self, pattern)
         self.tag = "code"
 
     def handleMatch(self, m, doc) :
         el = doc.createElement(self.tag)
         text = m.group(2).strip()
-        text = text.replace("&", "&amp;")
+        #text = text.replace("&", "&amp;")
         el.appendChild(doc.createTextNode(text))
         return el
 
         return el1
 
 
-class HtmlPattern (BasePattern):
+class HtmlPattern (Pattern):
 
     def handleMatch (self, m, doc) :
         place_holder = self.stash.store(m.group(2))
         return doc.createTextNode(place_holder)
 
 
-class LinkPattern (BasePattern):
+class LinkPattern (Pattern):
 
     def handleMatch(self, m, doc) :
         el = doc.createElement('a')
         return el
 
 
-class ImagePattern (BasePattern):
+class ImagePattern (Pattern):
 
     def handleMatch(self, m, doc):
         el = doc.createElement('img')
         el.setAttribute('alt', truealt)
         return el
 
-class ReferencePattern (BasePattern):
+class ReferencePattern (Pattern):
 
     def handleMatch(self, m, doc):
         if m.group(9) :
         return el
 
 
-class AutolinkPattern (BasePattern):
+class AutolinkPattern (Pattern):
 
     def handleMatch(self, m, doc):
         el = doc.createElement('a')
         el.appendChild(doc.createTextNode(m.group(2)))
         return el
 
-class AutomailPattern (BasePattern):
+class AutomailPattern (Pattern):
 
     def handleMatch(self, m, doc) :
         el = doc.createElement('a')
 preprocessors in that they need to implement a "run" method.  Unlike
 pre-processors, they take a NanoDom document as a parameter and work
 with that.
-#
+
+Post-Processor should extend markdown.Postprocessor.
+
 There are currently no standard post-processors, but the footnote
 extension below uses one.
 """
+
+class Postprocessor :
+    pass
+
+
 """
 ======================================================================
 ========================== MISC AUXILIARY CLASSES ====================
         Markdown text """
 
 
-    def __init__(self, source=None):
+    def __init__(self, source=None,
+                 extensions=[],
+                 extension_configs=None,
+                 encoding=None,
+                 safe_mode = True):
         """Creates a new Markdown instance.
 
-           @param source: The text in Markdown format. """
-        
-        if isinstance(source, unicode):
-            source = source.encode('utf8')
+           @param source: The text in Markdown format.
+           @param encoding: The character encoding of <text>. """
+
+        self.safeMode = safe_mode
+        self.encoding = encoding
         self.source = source
         self.blockGuru = BlockGuru()
         self.registeredExtensions = []
         self.stripTopLevelTags = 1
+        self.docType = ""
 
         self.preprocessors = [ HEADER_PREPROCESSOR,
                                LINE_PREPROCESSOR,
         self.postprocessors = [] # a footnote postprocessor will get
                                  # inserted later
 
+        self.textPostprocessors = [] # a footnote postprocessor will get
+                                     # inserted later                                 
+
         self.prePatterns = []
         
 
                                 # The order of the handlers matters!!!
                                 ]
 
+        self.registerExtensions(extensions = extensions,
+                                configs = extension_configs)
+
         self.reset()
 
+
+    def registerExtensions(self, extensions, configs) :
+
+        if not configs :
+            configs = {}
+
+        for ext in extensions :
+
+            extension_module_name = "mdx_" + ext
+
+            try :
+                module = __import__(extension_module_name)
+
+            except :
+                message(CRITICAL,
+                        "couldn't load extension %s (looking for %s module)"
+                        % (ext, extension_module_name) )
+            else :
+
+                if configs.has_key(ext) :
+                    configs_for_ext = configs[ext]
+                else :
+                    configs_for_ext = []
+                extension = module.makeExtension(configs_for_ext)    
+                extension.extendMarkdown(self, globals())
+
+
+
+
     def registerExtension(self, extension) :
+        """ This gets called by the extension """
         self.registeredExtensions.append(extension)
 
     def reset(self) :
         parent_elem.appendChild(pre)
         pre.appendChild(code)
         text = "\n".join(detabbed).rstrip()+"\n"
-        text = text.replace("&", "&amp;")
+        #text = text.replace("&", "&amp;")
         code.appendChild(self.doc.createTextNode(text))
         self._processSection(parent_elem, theRest, inList)
 
         else :
             return None
 
-    def __str__(self):
+    def __str__(self, source = None):
         """Return the document in XHTML format.
 
         @returns: A serialized XHTML body."""
         #try :
+
+        if source :
+            self.source = source
+        
         doc = self._transform()
         xml = doc.toxml()
+
         #finally:
         #    doc.unlink()
 
         # Let's stick in all the raw html pieces
 
         for i in range(self.htmlStash.html_counter) :
+            html = self.htmlStash.rawHtmlBlocks[i]
+            if self.safeMode :
+                html = "[HTML_REMOVED]"
+                
             xml = xml.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
-                              self.htmlStash.rawHtmlBlocks[i] + "\n")
+                              html + "\n")
             xml = xml.replace(HTML_PLACEHOLDER % i,
-                              self.htmlStash.rawHtmlBlocks[i])
-
-        xml = xml.replace(FN_BACKLINK_TEXT, "&#8617;")
+                              html)
 
         # And return everything but the top level tag
 
         if self.stripTopLevelTags :
-            xml = xml.strip()[23:-7]
+            xml = xml.strip()[23:-7] + "\n"
 
-        if isinstance(xml, unicode) :
-            xml = xml.encode("utf8")
+        for pp in self.textPostprocessors :
+            xml = pp.run(xml)
 
-        return xml
+        return self.docType + xml
 
 
     toString = __str__
 
 
-"""
-========================= FOOTNOTES =================================
+    def __unicode__(self):
+        """Return the document in XHTML format as a Unicode object.
+        """
+        return str(self)#.decode(self.encoding)
 
-This section adds footnote handling to markdown.  It can be used as
-an example for extending python-markdown with relatively complex
-functionality.  While in this case the extension is included inside
-the module itself, it could just as easily be added from outside the
-module.  Not that all markdown classes above are ignorant about
-footnotes.  All footnote functionality is provided separately and
-then added to the markdown instance at the run time.
 
-Footnote functionality is attached by calling extendMarkdown()
-method of FootnoteExtension.  The method also registers the
-extension to allow it's state to be reset by a call to reset()
-method.
-"""
+    toUnicode = __unicode__
 
-class FootnoteExtension :
 
-    DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
-    SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
 
-    FN_PLACE_MARKER = "///Footnotes Go Here///"
-
-    def __init__ (self) :
-        self.reset()
-
-    def extendMarkdown(self, md) :
-
-        self.md = md
-
-        # Stateless extensions do not need to be registered
-        md.registerExtension(self)
-
-        # Insert a preprocessor before ReferencePreprocessor
-        index = md.preprocessors.index(REFERENCE_PREPROCESSOR)
-        preprocessor = FootnotePreprocessor(self)
-        preprocessor.md = md
-        md.preprocessors.insert(index, preprocessor)
-
-        # Insert an inline pattern before ImageReferencePattern
-        FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
-        index = md.inlinePatterns.index(IMAGE_REFERENCE_PATTERN)
-        md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
-
-        # Insert a post-processor that would actually add the footnote div
-        postprocessor = FootnotePostprocessor(self)
-        postprocessor.extension = self
-        
-        md.postprocessors.append(postprocessor)
-
-
-    def reset(self) :
-        # May be called by Markdown is state reset is desired
-
-        self.footnote_suffix = "-" + str(int(random.random()*1000000000))
-        self.used_footnotes={}
-        self.footnotes = {}
-
-    def findFootnotesPlaceholder(self, doc) :
-        def findFootnotePlaceholderFn(node=None, indent=0):
-            if node.type == 'text':
-                if node.value.find(self.FN_PLACE_MARKER) > -1 :
-                    return True
-
-        fn_div_list = doc.find(findFootnotePlaceholderFn)
-        if fn_div_list :
-            return fn_div_list[0]
-
-
-    def setFootnote(self, id, text) :
-        self.footnotes[id] = text
-
-    def makeFootnoteId(self, num) :
-        return 'fn%d%s' % (num, self.footnote_suffix)
-
-    def makeFootnoteRefId(self, num) :
-        return 'fnr%d%s' % (num, self.footnote_suffix)
-
-    def makeFootnotesDiv (self, doc) :
-        """Creates the div with class='footnote' and populates it with
-           the text of the footnotes.
-
-           @returns: the footnote div as a dom element """
-
-        if not self.footnotes.keys() :
-            return None
-
-        div = doc.createElement("div")
-        div.setAttribute('class', 'footnote')
-        hr = doc.createElement("hr")
-        div.appendChild(hr)
-        ol = doc.createElement("ol")
-        div.appendChild(ol)
-
-        footnotes = [(self.used_footnotes[id], id)
-                     for id in self.footnotes.keys()]
-        footnotes.sort()
-
-        for i, id in footnotes :
-            li = doc.createElement('li')
-            li.setAttribute('id', self.makeFootnoteId(i))
-
-            self.md._processSection(li, self.footnotes[id].split("\n"))
-
-            #li.appendChild(doc.createTextNode(self.footnotes[id]))
-
-            backlink = doc.createElement('a')
-            backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
-            backlink.setAttribute('class', 'footnoteBackLink')
-            backlink.setAttribute('title',
-                                  'Jump back to footnote %d in the text' % 1)
-            backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
-
-            if li.childNodes :
-                node = li.childNodes[-1]
-                if node.type == "text" :
-                    node = li
-                node.appendChild(backlink)
-
-            ol.appendChild(li)
-
-        return div
-
-
-class FootnotePreprocessor :
-
-    def __init__ (self, footnotes) :
-        self.footnotes = footnotes
-
-    def run(self, lines) :
-
-        self.blockGuru = BlockGuru()
-        lines = self._handleFootnoteDefinitions (lines)
-
-        # Make a hash of all footnote marks in the text so that we
-        # know in what order they are supposed to appear.  (This
-        # function call doesn't really substitute anything - it's just
-        # a way to get a callback for each occurence.
-
-        text = "\n".join(lines)
-        self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
-
-        return text.split("\n")
-
-
-    def recordFootnoteUse(self, match) :
-
-        id = match.group(1)
-        id = id.strip()
-        nextNum = len(self.footnotes.used_footnotes.keys()) + 1
-        self.footnotes.used_footnotes[id] = nextNum
-
-
-    def _handleFootnoteDefinitions(self, lines) :
-        """Recursively finds all footnote definitions in the lines.
-
-            @param lines: a list of lines of text
-            @returns: a string representing the text with footnote
-                      definitions removed """
-
-        i, id, footnote = self._findFootnoteDefinition(lines)
-
-        if id :
-
-            plain = lines[:i]
-
-            detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
-
-            self.footnotes.setFootnote(id,
-                                       footnote + "\n"
-                                       + "\n".join(detabbed))
-
-            more_plain = self._handleFootnoteDefinitions(theRest)
-            return plain + [""] + more_plain
-
-        else :
-            return lines
-
-    def _findFootnoteDefinition(self, lines) :
-        """Finds the first line of a footnote definition.
-
-            @param lines: a list of lines of text
-            @returns: the index of the line containing a footnote definition """
-
-        counter = 0
-        for line in lines :
-            m = self.footnotes.DEF_RE.match(line)
-            if m :
-                return counter, m.group(2), m.group(3)
-            counter += 1
-        return counter, None, None
-
-
-class FootnotePattern (BasePattern) :
-
-    def __init__ (self, pattern, footnotes) :
-
-        BasePattern.__init__(self, pattern)
-        self.footnotes = footnotes
-
-    def handleMatch(self, m, doc) :
-        sup = doc.createElement('sup')
-        a = doc.createElement('a')
-        sup.appendChild(a)
-        id = m.group(2)
-        num = self.footnotes.used_footnotes[id]
-        sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
-        a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
-        a.appendChild(doc.createTextNode(str(num)))
-        return sup
-
-class FootnotePostprocessor :
-
-    def __init__ (self, footnotes) :
-        self.footnotes = footnotes
-
-    def run(self, doc) :
-        footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
-        if footnotesDiv :
-            fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
-            if fnPlaceholder :
-                fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
-            else :
-                doc.documentElement.appendChild(footnotesDiv)
 
 # ====================================================================
 
-def markdown(text) :
-    message(VERBOSE, "in markdown.py, received text:\n%s" % text)
-    return Markdown(text).toString()
+def markdownFromFile(input = None,
+                     output = None,
+                     extensions = [],
+                     encoding = None,
+                     message_threshold = CRITICAL,
+                     safe = False) :
 
-def markdownWithFootnotes(text):
-    message(VERBOSE, "Running markdown with footnotes, "
-            + "received text:\n%s" % text)
-    md = Markdown()
-    footnoteExtension = FootnoteExtension()
-    footnoteExtension.extendMarkdown(md)
-    md.source = text
+    global MESSAGE_THRESHOLD
+    MESSAGE_THRESHOLD = message_threshold
 
-    return str(md)
+    message(VERBOSE, "input file: %s" % input)
 
-def test_markdown(args):
-    """test markdown at the command line.
-        in each test, arg 0 is the module name"""
-    print "\nTEST 1: no arguments on command line"
-    cmd_line(["markdown.py"])
-    print "\nTEST 2a: 1 argument on command line: a good option"
-    cmd_line(["markdown.py","-footnotes"])
-    print "\nTEST 2b: 1 argument on command line: a bad option"
-    cmd_line(["markdown.py","-foodnotes"])
-    print "\nTEST 3: 1 argument on command line: non-existent input file"
-    cmd_line(["markdown.py","junk.txt"])
-    print "\nTEST 4: 1 argument on command line: existing input file"
-    lines = """
-Markdown text with[^1]:
 
-2. **bold text**,
-3. *italic text*.
+    if not encoding :
+        encoding = "utf-8"
 
-Then more:
+    input_file = codecs.open(input, mode="r", encoding="utf-8")
+    text = input_file.read()
+    input_file.close()
 
-    beginning of code block;
-    another line of code block.
+    new_text = markdown(text, extensions, encoding, safe_mode = safe)
+
+    if output :
+        output_file = codecs.open(output, "w", encoding=encoding)
+        output_file.write(new_text)
+        output_file.close()
+
+    else :
+        sys.stdout.write(new_text.encode(encoding))
+
+def markdown(text,
+             extensions = [],
+             encoding = None,
+             safe_mode = False) :
     
-    a second paragraph of code block.
+    message(VERBOSE, "in markdown.markdown(), received text:\n%s" % text)
 
-more text to end our file.
+    extension_names = []
+    extension_configs = {}
+    
+    for ext in extensions :
+        pos = ext.find("(") 
+        if pos == -1 :
+            extension_names.append(ext)
+        else :
+            name = ext[:pos]
+            extension_names.append(name)
+            pairs = [x.split("=") for x in ext[pos+1:-1].split(",")]
+            configs = [(x.strip(), y.strip()) for (x, y) in pairs]
+            extension_configs[name] = configs
+            #print configs
 
-[^1]: "italic" means emphasis.
-"""
-    fid = "markdown-test.txt"
-    f1 = open(fid, 'w+')
-    f1.write(lines)
-    f1.close()
-    cmd_line(["markdown.py",fid])
-    print "\nTEST 5: 2 arguments on command line: nofootnotes and input file"
-    cmd_line(["markdown.py","-nofootnotes", fid])
-    print "\nTEST 6: 2 arguments on command line: footnotes and input file"
-    cmd_line(["markdown.py","-footnotes", fid])
-    print "\nTEST 7: 3 arguments on command line: nofootnotes,inputfile, outputfile"
-    fidout = "markdown-test.html"
-    cmd_line(["markdown.py","-nofootnotes", fid, fidout])
+    md = Markdown(text, extensions=extension_names,
+                  extension_configs=extension_configs,
+                  safe_mode = safe_mode)
 
+    return md.toString()
+        
 
-def get_vars(args):
-    """process the command-line args received; return usable variables"""
-    #firstly get the variables
+class Extension :
 
-    message(VERBOSE, "in get_vars(), args: %s" % args) 
+    def __init__(self, configs = {}) :
+        self.config = configs
 
-    if len(args) <= 1:
-        option, inFile, outFile = (None, None, None)
-    elif len(args) >= 4:
-        option, inFile, outFile = args[1:4]
-    elif len(args) == 3:
-        temp1, temp2 = args[1:3]
-        if temp1[0] == '-':
-            #then we have an option and inFile
-            option, inFile, outFile = temp1, temp2, None
-        else:
-            #we have no option, so we must have inFile and outFile
-            option, inFile, outFile = None, temp1, temp2
-    else:
-        #len(args) = 2
-        #we have only one usable arg: might be an option or a file
-        temp1 = args[1]
-        
-        message(VERBOSE, "our single arg is: %s" % str(temp1))
+    def getConfig(self, key) :
+        if self.config.has_key(key) :
+            #print self.config[key][0]
+            return self.config[key][0]
+        else :
+            return ""
 
-        if temp1[0] == '-':
-            #then we have an option 
-            option, inFile, outFile = temp1, None, None
-        else:
-            #we have no option, so we must have inFile
-            option, inFile, outFile = None, temp1, None
+    def getConfigInfo(self) :
+        return [(key, self.config[key][1]) for key in self.config.keys()]
+
+    def setConfig(self, key, value) :
+        self.config[key][0] = value
+
+
+OPTPARSE_WARNING = """
+Python 2.3 or higher required for advanced command line options.
+For lower versions of Python use:
+
+      %s INPUT_FILE > OUTPUT_FILE
     
-    message(VERBOSE,
-            "prior to validation, option: %s, inFile: %s, outFile: %s" %
-            (str(option), str(inFile), str(outFile),))
-    
-    return option, inFile, outFile
+""" % EXECUTABLE_NAME_FOR_USAGE
 
+def parse_options() :
 
-USAGE = """
-\nUsing markdown.py:
+    try :
+        optparse = __import__("optparse")
+    except :
+        if len(sys.argv) == 2 :
+            return {'input' : sys.argv[1],
+                    'output' : None,
+                    'message_threshold' : CRITICAL,
+                    'safe' : False,
+                    'extensions' : [],
+                    'encoding' : None }
 
-    python markdown.py [option] input_file_with_markdown.txt [output_file.html]
-
-Options:
-
-    -footnotes or -fn   : generate markdown with footnotes
-    -test or -t         : run a self-test
-    -help or -h         : print this message
-
-"""
-    
-VALID_OPTIONS = ['footnotes','nofootnotes', 'fn', 'test', 't', 'f',
-                 'help', 'h']
-
-EXPANDED_OPTIONS =  { "fn" : "footnotes",
-                      "t"  : "test",
-                      "h"  : "help" }
-
-
-def validate_option(option) :
-
-    """ Check if the option makes sense and print an appropriate message
-        if it isn't.
-        
-        @return: valid option string or None
-    """
-
-    #now validate the variables
-    if (option is not None):
-        if (len(option) > 1 and option[1:] in VALID_OPTIONS) :
-            option = option[1:]
-
-            if option in EXPANDED_OPTIONS.keys() :
-                option = EXPANDED_OPTIONS[option]
-            return option
-        else:
-            message(CRITICAL,
-                    "\nSorry, I don't understand option %s" % option)
-            message(CRITICAL, USAGE)
+        else :
+            print OPTPARSE_WARNING
             return None
 
+    parser = optparse.OptionParser(usage="%prog INPUTFILE [options]")
 
-def validate_input_file(inFile) :        
-    """ Check if the input file is specified and exists.
+    parser.add_option("-f", "--file", dest="filename",
+                      help="write output to OUTPUT_FILE",
+                      metavar="OUTPUT_FILE")
+    parser.add_option("-e", "--encoding", dest="encoding",
+                      help="encoding for input and output files",)
+    parser.add_option("-q", "--quiet", default = CRITICAL,
+                      action="store_const", const=NONE, dest="verbose",
+                      help="suppress all messages")
+    parser.add_option("-v", "--verbose",
+                      action="store_const", const=INFO, dest="verbose",
+                      help="print info messages")
+    parser.add_option("-s", "--safe",
+                      action="store_const", const=True, dest="safe",
+                      help="same mode (strip user's HTML tag)")
+    
+    parser.add_option("--noisy",
+                      action="store_const", const=VERBOSE, dest="verbose",
+                      help="print debug messages")
+    parser.add_option("-x", "--extension", action="append", dest="extensions",
+                      help = "load extension EXTENSION", metavar="EXTENSION")
 
-        @return: valid input file path or None
-    """
+    (options, args) = parser.parse_args()
 
-    if not inFile :
-        message(CRITICAL,
-                "\nI need an input filename.\n")
-        message(CRITICAL, USAGE)
+    if not len(args) == 1 :
+        parser.print_help()
         return None
-    
-        
-    if os.access(inFile, os.R_OK):
-        return inFile
     else :
-        message(CRITICAL, "Sorry, I can't find input file %s" % str(inFile))
-        return None
+        input_file = args[0]
 
-    
-            
+    if not options.extensions :
+        options.extensions = []
 
-def cmd_line(args):
-
-    message(VERBOSE, "in cmd_line with args: %s" % args)
-
-    option, inFile, outFile = get_vars(args)
-
-    if option :
-        option = validate_option(option)
-        if not option : return
-
-    if option == "help" :
-        message(CRITICAL, USAGE)
-        return
-    elif option == "test" :
-        test_markdown(None)
-        return
-
-    inFile = validate_input_file(inFile)
-    if not inFile :
-        return
-    else :
-        input = file(inFile).read()
-
-    message(VERBOSE, "Validated command line parameters:" +             
-             "\n\toption: %s, \n\tinFile: %s, \n\toutFile: %s" % (
-             str(option), str(inFile), str(outFile),))
-
-    if option == "footnotes" :
-        md_function = markdownWithFootnotes
-    else :
-        md_function = markdown
-
-    if outFile is None:
-        print md_function(input)
-    else:
-        output = md_function(input)
-        f1 = open(outFile, "w+")
-        f1.write(output)
-        f1.close()
-        
-        if os.access(outFile, os.F_OK):
-            message(INFO, "Successfully wrote %s" % outFile)
-        else:
-            message(INFO, "Failed to write %s" % outFile)
-
+    return {'input' : input_file,
+            'output' : options.filename,
+            'message_threshold' : options.verbose,
+            'safe' : options.safe,
+            'extensions' : options.extensions,
+            'encoding' : options.encoding }
 
 if __name__ == '__main__':
-    """ Run Markdown from the command line.
-        Set debug = 3 at top of file to get diagnostic output"""
-    args = sys.argv
-        
-    #set testing=1 to test the command-line response of markdown.py
-    testing = 0
-    if testing:
-        test_markdown(args)
-    else:
-        import time
-        t0 = time.time()
-        #for x in range(10) :
-        cmd_line(args)
-        #import profile
-        #profile.run('cmd_line(args)', 'profile')
-        t1 = time.time()
-        #print "Time: %f - %f = %f" % (t1, t0, t1-t0)
+    """ Run Markdown from the command line. """
 
-"""
-CHANGELOG
-=========
+    options = parse_options()
 
-May 15, 2006: A bug with lists, recursion on block-level elements,
-run-in headers, spaces before headers, unicode input (thanks to Aaron
-Swartz). Sourceforge tracker #s: 1489313, 1489312, 1489311, 1488370,
-1485178, 1485176. (v. 1.5)
+    #if os.access(inFile, os.R_OK):
 
-Mar. 24, 2006: Switched to a not-so-recursive algorithm with
-_handleInline.  (Version 1.4)
+    if not options :
+        sys.exit(0)
+    
+    markdownFromFile(**options)
 
-Mar. 15, 2006: Replaced some instance variables with class variables
-(a patch from Stelios Xanthakis).  Chris Clark's new regexps that do
-not trigger midword underlining.
 
-Feb. 28, 2006: Clean-up and command-line handling by Stewart
-Midwinter. (Version 1.3)
 
-Feb. 24, 2006: Fixed a bug with the last line of the list appearing
-again as a separate paragraph.  Incorporated Chris Clark's "mailto"
-patch.  Added support for <br /> at the end of lines ending in two or
-more spaces.  Fixed a crashing bug when using ImageReferencePattern.
-Added several utility methods to Nanodom.  (Version 1.2)
 
-Jan. 31, 2006: Added "hr" and "hr/" to BLOCK_LEVEL_ELEMENTS and
-changed <hr/> to <hr />.  (Thanks to Sergej Chodarev.)
 
-Nov. 26, 2005: Fixed a bug with certain tabbed lines inside lists
-getting wrapped in <pre><code>.  (v. 1.1)
 
-Nov. 19, 2005: Made "<!...", "<?...", etc. behave like block-level
-HTML tags.
 
-Nov. 14, 2005: Added entity code and email autolink fix by Tiago
-Cogumbreiro.  Fixed some small issues with backticks to get 100%
-compliance with John's test suite.  (v. 1.0)
 
-Nov. 7, 2005: Added an unlink method for documents to aid with memory
-collection (per Doug Sauder's suggestion).
 
-Oct. 29, 2005: Restricted a set of html tags that get treated as
-block-level elements.
 
-Sept. 18, 2005: Refactored the whole script to make it easier to
-customize it and made footnote functionality into an extension.
-(v. 0.9)
-
-Sept. 5, 2005: Fixed a bug with multi-paragraph footnotes.  Added
-attribute support.
-
-Sept. 1, 2005: Changed the way headers are handled to allow inline
-syntax in headers (e.g. links) and got the lists to use p-tags
-correctly (v. 0.8)
-
-Aug. 29, 2005: Added flexible tabs, fixed a few small issues, added
-basic support for footnotes.  Got rid of xml.dom.minidom and added
-pretty-printing. (v. 0.7)
-
-Aug. 13, 2005: Fixed a number of small bugs in order to conform to the
-test suite.  (v. 0.6)
-
-Aug. 11, 2005: Added support for inline html and entities, inline
-images, autolinks, underscore emphasis. Cleaned up and refactored the
-code, added some more comments.
-
-Feb. 19, 2005: Rewrote the handling of high-level elements to allow
-multi-line list items and all sorts of nesting.
-
-Feb. 3, 2005: Reference-style links, single-line lists, backticks,
-escape, emphasis in the beginning of the paragraph.
-
-Nov. 2004: Added links, blockquotes, html blocks to Manfred
-Stienstra's code
-
-Apr. 2004: Manfred's version at http://www.dwerg.net/projects/markdown/
-
-"""
-
-
-
-
-
-
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.