Commits

Anonymous committed 255dd8b

lists; image caption fix; documented parts of the parser source

Comments (0)

Files changed (2)

formatters/acceptor_f-mediawiki.lua

   end
 end
 
+--[[ldx--
+<source>Lists</source>
+<p>List work <em>very</em> differently in Wikitext from the way they work in
+<logo label="context"/>. This is, presumably, mainly due to the target backend
+being a browser, not a typesetting engine. In principle, Wikitext notation for
+lists is nothing more than a short notation for HTML list markup, and a very
+limited notation for that matter. As more complex things are recommended
+(see http://en.wikipedia.org/wiki/Help:List) to be expressed using HTML
+directly, there’s probably no way to correctly parse Wikitext lists without
+implementing at least a partial HTML renderer. Therefore, the following will
+work only with pure Wikitext lists.</p>
+--ldx]]--
+
+local utfcharacters = string.utfcharacters
+do
+  --local parse_prefix = function (pfx)
+    --local result = { }
+    --for char in utfcharacters(pfx) do
+      --result[#result+1] = char
+    --end
+    --return result
+  --end
+  local item_template         = "\n%s\\item %s"
+  local stopitemize_template  = "\n%s\\stopitemize"
+  local startitemize_template = "\n%s\\startitemize[%s]"
+  local definition_template   = "\n%s\\item {\\bold %s}\\par\\hskip2em %s"
+  local item_indent           = function (n) return rep("  ", n) end
+  local liststack             = { }
+  local itemize_formats       = function (f)
+    if     f == "#" then return "n"
+    elseif f == "*" then return tostring(#liststack)
+    elseif f == ";" then return ""
+    end
+  end
+  formatter.list_block = function (raw)
+    local result = { }
+    local item_cnt = tablemaxn(raw)
+    local last_prefix = ""
+    local pass
+    for n_item=1, item_cnt do
+      if pass == true then
+        pass = nil
+      else
+        local prefix, content = raw[n_item][1], raw[n_item][2]
+        if prefix ~= last_prefix then
+          local prefix_diff = #last_prefix - #prefix
+          if prefix_diff > 0 then -- pop
+            for i=1, prefix_diff do
+              result[#result+1] = fmt(stopitemize_template, item_indent(#liststack-1))
+              liststack[#liststack] = nil
+            end
+          elseif prefix_diff < 0 then -- push, happens only one at a time, doesn’t it?
+            liststack[#liststack+1] = sub(prefix, #prefix)
+            local format = itemize_formats(liststack[#liststack])
+            result[#result+1] = fmt(startitemize_template, item_indent(#liststack-1), format)
+          else -- replace the current level, only one at a time
+            if not (sub(last_prefix, #last_prefix) == ";" and
+                    sub(prefix,      #prefix     ) == ":") then
+              liststack[#liststack] = sub(prefix, #prefix)
+              local format = itemize_formats(liststack[#liststack])
+              result[#result+1] = fmt(stopitemize_template,  item_indent(#liststack-1))
+              result[#result+1] = fmt(startitemize_template, item_indent(#liststack-1), format)
+            end
+          end
+        end -- if prefix
+        if liststack[#liststack] == ";" then -- def list
+          local definiendum, definiens = match(content, "^([^:]+)[ \t\v]*:?[ \t\v]*(.*)$")
+          if definiens == "" then -- might be on next line
+            local nxt_prefix, nxt_content = raw[n_item+1][1], raw[n_item+1][2]
+            if #prefix == #nxt_prefix and sub(nxt_prefix, #nxt_prefix) == ":" then -- continuation
+              definiens = nxt_content
+              pass      = true
+            end
+          end
+          result[#result+1] = fmt(definition_template,
+                                  item_indent(#liststack),
+                                  definiendum,
+                                  process_inline(definiens))
+        else
+          result[#result+1] = fmt(item_template, item_indent(#liststack), process_inline(content))
+        end
+        last_prefix = prefix
+      end -- if pass
+    end
+    result[#result+1] = fmt(stopitemize_template,  item_indent(0))
+    liststack = { }
+    docmain[#docmain+1] = tableconcat(result)
+  end
+end
+
+
 --======================================================================
 --  Inline Elements
 --======================================================================
   --table.print(data)
   local name       = gsub(data.name, " ", "_")
   local parameters = handle_image_parameters(data.parameters)
-  --local file_url   = media_wiki_url(name, parameters)
   local file_url   = media_wiki_url(name)
   local reference  = ""
   local position   = "here"

parsers/acceptor_p-mediawiki.lua

 --------------------------------------------------------------------------------
 --
 
+
+--[[ldx--
+<source>Parsers for WikiText markup.</source>
+
+<p>This file is part of the Acceptor module for <logo label="context"/>.</p>
+<p>As the processing of a Wiki markup file is done in multiple passes, we have
+a couple of different parsers for different stages.</p>
+--ldx]]--
+
 local parsers   = thirddata.acceptor.parsers
 local aux       = thirddata.acceptor.aux
 local formatter = thirddata.acceptor.formatters.wikimedia
 local stringlower = string.lower
 local utf8_char   = unicode.utf8.char
 
-local balanced_anything = P{
-  "balanced",
-  lbracket         = P"[",
-  rbracket         = P"]",
-  lbrace           = P"{",
-  rbrace           = P"}",
-  escaped          = P"\\" * (V"lbracket" + V"rbracket" + V"lbrace" + V"rbrace"),
-  balanced_inner   = (V"escaped"
-                   + V"balanced_brace"
-                   + V"balanced_bracket"
-                   + 1 - V"rbrace" - V"rbracket")^0
-                   ,
-  balanced_brace   = V"lbrace"   * V"balanced_inner" * V"rbrace",
-  balanced_bracket = V"lbracket" * V"balanced_inner" * V"rbracket",
-  balanced         = V"balanced_brace" + V"balanced_bracket",
-}
+--[[ldx--
+<p>First we define some terminal symbols and other low-level entities that are
+expected to occur in multiple parsers.</p>
+--ldx]]--
 
-------------------------------------------------------------------------
---  Fundamentals: Low-Level Rules and Terminals
-------------------------------------------------------------------------
 local newline               = P"\r\n" + P"\n\r" + P"\n" + P"\r"
 local eof                   = P(-1)
 local bol                   = newline
                             
 local character             = whitespace_char + non_whitespace_char + html_entity
 local characters            = character^1 -- not in the spec but referred to everywhere
-                            
+
 local lbrack                = P"["
+
+--[[ldx--
+The <type>balanced</type> patterns are needed for distinguishing internal
+markup from ordinary characters in nested elements like captions. This can get
+messy, as double-brace terminated elements like templates may themselves
+contain other double-brace terminated stuff ...
+--ldx]]--
+
 local rbrack                = P"]"
 local double_lbrack         = lbrack * lbrack
 local double_rbrack         = rbrack * rbrack
   balanced  = double_lbrace * V"_balanced"^-1 * double_rbrace,
 }
 
-                            
+local balanced_anything = P{
+  "balanced",
+  escaped          = P"\\" * (lbrack + rbrack + lbrace + rbrace),
+  balanced_inner   = (V"escaped"
+                   + V"balanced_brace"
+                   + V"balanced_bracket"
+                   + 1 - rbrace - rbrack)^0
+                   ,
+  balanced_brace   = lbrace * V"balanced_inner" * rbrace,
+  balanced_bracket = lbrack * V"balanced_inner" * rbrack,
+  balanced         = V"balanced_brace" + V"balanced_bracket",
+}
+
 --- No comment: „Harmless-characters mean characters that couldn't be anything
 ---              else.“
 ---              <http://www.mediawiki.org/wiki/Markup_spec/BNF/Inline_text#Text>
 --- Let’s give it a shot ...
-local  harmless_character   = letter + decimal_digit
+local harmless_character    = letter + decimal_digit
 local word_char             = ucase_letter + lcase_letter + decimal_digit
 local non_word_char         = 1 - word_char
                             
                             
 local goodies               = Cs(triple_dot / "\\dots ")
                             + Cs(underscore / "\\letterunderscore ")
-                            
+
+
+--[[ldx--
+<p>Special patterns for HTML-style elements.
+The Wikimedia parser would pass them straigth through to the output, which is
+not quite feasible with a <logo label="tex"/> backend.
+Therefore, only a subset of the given CSS arguments will be recognized at all
+and even those will have to be mapped to some meaningful equivalent; it’s more
+a matter of interpretation.</p>
+--ldx]]--
+
 local url, CSS_parameter_list, permitted_HTML_tags
 do
   local CSS_parameter_pair    = C(harmless_character^1) / stringlower
   local CSS_parameter__list   = (space_tab^0 * Cg(CSS_parameter_pair))^0
   CSS_parameter_list          = Cf(Ct"" * CSS_parameter__list, rawset) * space_tab^0
 
-    --protocol                = ALLOWED_PROTOCOL_FROM_CONFIG (e.g. "http://", "mailto:")
   local protocol              = P"http://" -- what else?
                               + P"mailto:"
                               + P"ftp://"
                               + S"!*'();:@&=+$,/?#[]-_.~"
                               + escaped_url_char
   local url_path              = url_char^1
-    --url_char                = LEGAL_URL_ENTITY
   url                         = protocol * url_path
 
   local permitted_HTML_tags   = P"var"    + P"ul"      + P"u"      + P"tt"
 
 local list_item_marker      = colon + gartenzaun + asterisk
 
---category_namespace = NS_CATEGORY
---category_namespace = R("az", "AZ")^1,
 --- http://en.wikipedia.org/wiki/Help:Category
 --- TODO: What about other wikis??
 category_namespace          = P"Main"            + P"Talk"
                             + P"Book talk"       + P"Book"
 
 
---======================================================================
---  Inline Text
---======================================================================
+--[[ldx--
+<source>Inline Text</source>
+<p>The main parsing is split into two stages that correspond vaguely to <loge
+label="tex"/>’s <t>vertical</t> and <t>horizontal</t> modes.
+The latter stage is concerned with mapping a sequence of elements that can
+appear inside a paragraph onto their <logo label="context"/> counterparts.</p>
+<p>This element parser was initially based on the BNF’s at
+http://www.mediawiki.org/wiki/Markup_spec/BNF
+and
+http://slps.sourceforge.net/zoo/wiki/mediawiki-bnf.html,
+neither of which is complete.
+As they describe ambiguous context-free grammars, they naturally had to be
+restructured for use with lpeg.
+Other parts had to be guessed or imported from elsewhere, e.g. CSS,
+html-attributes etc., which lacked a description (bottom non-terminals).</p>
+--ldx]]--
+
 parsers.p_wiki_inline = P{
   "inline_text",
   inline_text          = Ct((V"innocent_text" + V"inline_element")^1),
                        + V"nbsp_after"
                        + html_entity
                        + html_unsafe_symbol
-                       --+ V"text"
-                       --+ V"random_character"
                        ,
   text                 = (V"plain_text" - newline)
-                       --+ V"wiki_markup_characters"
                        ,
   plain_text           = P"<nowiki>" * S[[|[]<>{}]]^0 * P"</nowiki>"
                        + V"unicode_wiki" * space^0 * S[["*#:;]]
                        + P" '" * V"unicode_wiki"
                        + V"unicode_wiki"
                        + V"single_markup_char" -- hack
-                       --+ V"no_format_apostrophe"
                        ,
   no_format_apostrophe   = P"'" - V"formatting",
   unicode_wiki           = utf8char - V"wiki_markup_characters",
                        + V"bold_toggle"
                        + V"italic_toggle"
                        ,
-  --bold_italic_toggle   = P[[''''']] / function () return "\\toggle_bi " end,
-  --bold_toggle          = P[[''']]   / function () return "\\toggle_bi " end,
-  --italic_toggle        = P[['']]    / function () return "\\toggle_bi " end,
   bold_italic_toggle   = Cs(P[[''''']] / [=[\togglewiki[bolditalic]]=]),
   bold_toggle          = Cs(P[[''']]   / [=[\togglewiki[bold]]=]      ),
   italic_toggle        = Cs(P[['']]    / [=[\togglewiki[italic]]=]    ),
-  --- Those were undefined
 
-  ------------------------------------------------------------------------
-  --  References
-  ------------------------------------------------------------------------
+  --[[ldx--
+  <p><em>References</em>.</p>
+  --ldx]]--
+
 
   reference        = Ct(V"empty_reference" + V"normal_reference")
   --reference        = Ct(V"normal_reference" + V"empty_reference")
   reference_body   = Cg((1 - V"reference_stop")^0, "content")
                    ,
 
-  --- Inline HTML (argh!)
-  --- (incomplete!) tag list from
-  --- <http://meta.wikimedia.org/wiki/Help:HTML_in_wikitext#Permitted_HTML>
+  --[[ldx--
+  <p><em>Inline HTML</em></p>
+  <p>(<em>incomplete</em>!) tag list from
+  http://meta.wikimedia.org/wiki/Help:HTML_in_wikitext#Permitted_HTML
+  </p>
+  --ldx]]--
   inline_HTML            = V"permitted_HTML_empty" + V"permitted_HTML"
                          --+ V"HTML_comment"
                          ,
   behaviourswitch_notoc         = P"__NOTOC__",
   behaviourswitch_noeditsection = P"__NOEDITSECTION__",
   behaviourswitch_nogallery     = P"__NOGALLERY__",
-  --- Images (messy)
+
+  --[[ldx--
+  <p><em>Images</em>.</p>
+  --ldx]]--
+
   image_inline           = Ct(V"image_start"
                             * V"image_name"
                             * Cg(V"image_elms", "parameters")
                          + P"middle"   + P"bottom" + P"text-bottom"
                          ,
   
-  caption                = balanced_anything^1,
+  caption                = (balanced_double_brack + 1 - double_rbrack)^1,
   --- Media
   media_inline        = V"media_inline_start"
                       * C((1 - double_rbrack)^1)
                         * space_tab^0
                         * Cg((1 - whitespace_rest)^0, "caption")
                         ,
-  ------------------------------------------------------------------------
-  --  Noparse-Block
-  ------------------------------------------------------------------------
+
+  --[[ldx--
+  <p><em>Noparse block</em>.</p>
+  --ldx]]--
+
   noparse_block = V"nowiki_block"
                 + V"HTML_block"
                 --+ V"math_block" -- not in spec
   --nowiki_body        = characters,
 
   pre_block       = V"pre_opening_tag"
-                  --* whitespace^0
-                  --* V"pre_body"
-                  --* whitespace^0
                   * (1 - V"pre_closing_tag")^0
                   * V"pre_closing_tag"^-1
                   ,
   pre_opening_tag = P"<pre" * (whitespace * characters)^0 * P">",
   pre_closing_tag = P"</pre" * whitespace * P">",
-  --pre_body        = characters,
 
   HTML_block       = V"HTML_opening_tag"
                    * (1 - V"HTML_closing_tag")^0
   HTML_comment_start = P"<!--",
   HTML_comment_stop  = P"-->",
 
-  ------------------------------------------------------------------------
-  --  Links
-  ------------------------------------------------------------------------
+  --[[ldx--
+  <source>Links</source>
+  <p><em>Internal links</em>.</p>
+  --ldx]]--
 
-  --- Internal links
   internal_link       = Ct(V"_internal_link")
                       / formatter.internal_link
                       ,
                       * C(V"article_link")
                       * (gartenzaun * Cg(V"section_id", "section_id"))^-1
                       * (bar * Cg(V"internal_link_description", "description"))^-1
-                      * V"internal_link_end"
+                      * V"internal_link_stop"
                       * Cg(V"extra_description", "extra_description")^-1 -- ?
                       ,
-  --article_link        = (V"interwiki_prefix" + colon)^-1
-                      --* V"namespace_prefix"^-1 * V"article_title"
-                      --,
   article_link        = (V"interwiki_prefix" + colon)^-1 * V"namespace_prefix"^-1 * V"article_title"
                       + P"/"     * V"article_title"
                       + P"../"^0 * V"article_title"^-1
   namespace_prefix    = V"namespace"^-1 * colon,
   namespace           = letter^1,
   
-  --internal_link_description = Ct(Cs((goodies
-                                   --+ 1 - V"inline_element" - double_rbrack)^1
-                                  --+ V"inline_element")^0)
-                            --,
   ignore_formatting   = (V"formatting" / "" + 1),
   internal_link_description = Ct(Cs((goodies -- formatting in links drives ConTeXt mad ...
                                    + V"ignore_formatting" - double_rbrack)^1)^0)
   extra_description   = letter^1,
   
   internal_link_start = double_lbrack,
-  internal_link_end   = double_rbrack,
+  internal_link_stop  = double_rbrack,
   
   section_id          = (title_legal_chars - P"|" + P"%" + P"#")^1,
 
-  --- Categories
+  --[[ldx--
+  <p><em>Category links</em>.</p>
+  --ldx]]--
+
   category_link      = Ct(V"internal_link_start"
                         * Cg(category_namespace, "namespace")
                         * colon
-                        * Cg((1 - bar - V"internal_link_end")^0, "category")
+                        * Cg((1 - bar - V"internal_link_stop")^0, "category")
                         * (bar * Cg(V"sort_key", "sortkey"))^-1
-                        * V"internal_link_end")
+                        * V"internal_link_stop")
                      / formatter.category_link
                      ,
   sort_key           = harmless_character^1,  -- improvised (utf8char would match everything ...
 
-  --- External links
+  --[[ldx--
+  <p><em>External links</em>.</p>
+  --ldx]]--
+
   external_link       = Ct(V"external_link_start"
                          * C(url)
                          --* whitespace^-1 -- actually, it doesn’t accept newlines here!
   external_link_start = lbrack,
   external_link_end   = rbrack,
 
-  ------------------------------------------------------------------------
-  --  Magic Links
-  ------------------------------------------------------------------------
+  --[[ldx--
+  <p><em>Magic links</em>.</p>
+  --ldx]]--
+  --- TODO: find a use for magic links
+
   magic_link    = V"isbn" + V"rfc_number" + V"pmid_number",
   isbn          = P"ISBN" * P" "^0 * V"isbn_number" * non_word_char^-1,
   isbn_number   = (P"97" * S"89" * S" -"^-1)
 } --[[ 6018 Rules ]]
 
 
-------------------------------------------------------------------------
---  Block Scanner
-------------------------------------------------------------------------
+
+--[[ldx--
+<source>Block Scanner</source>
+<p>Corresponding to <logo label="tex"/>’s vertical mode, the block scanner
+breaks down the input into successive blocks and passes them to their
+respective handlers.</p>
+--ldx]]--
 
 parsers.p_wiki_block = P{ -- Block scanner (no inline).
   "wiki_page",
 
-  ------------------------------------------------------------------------
-  -- Titles
-  ------------------------------------------------------------------------
-  canonical_article_title   = V"canonical_page" * V"canonical_sub_pages"^-1,
-  
-  canonical_sub_pages       = V"canonical_sub_page" * V"canonical_sub_pages"^-1,
-  canonical_sub_page        = V"sub_page_separator" * V"canonical_page_chars",
-  
-  canonical_page            = V"canonical_page_first_char" * V"canonical_page_chars"^-1,
-  canonical_page_chars      = V"canonical_page_char" * V"canonical_page_chars"^-1,
-  
-  canonical_page_first_char = ucase_letter + decimal_digit + underscore,
-  canonical_page_char       = letter + decimal_digit + underscore,
-  
-  sub_page_separator        = P"/",
+  --[[ldx--
+  <p><em>Article</em>.
+  The top level rule is <t>wiki_page</t>, referring either to a redirect (not
+  implemented) or a valid article.</p>
+  --ldx]]--
 
-  --article_title        = V"page" * V"sub_pages"^-1,
-  sub_pages            = V"sub_page" * V"sub_pages"^-1,
-  sub_page             = V"sub_page_separator" * V"page_chars",
-  page                 = V"page_first_char" * V"page_chars"^-1,
-  page_chars           = V"page_char" * V"page_chars"^-1,
-  page_first_char      = V"canonical_page_first_char" + lcase_letter,
-  page_char            = V"canonical_page_char" + space,
-  page_name            = title_character^1,
-  bad_title_characters = S"[]{}<>_|#",
-  title_character      = utf8char - bad_title_characters,
-
-  ------------------------------------------------------------------------
-  --  Article
-  ------------------------------------------------------------------------
-  wiki_page    = Ct(V"redirect" * V"article"^-1
-                  + V"article"^-1)
-               ,
+  wiki_page           = Ct(V"redirect" * V"article"^-1
+                         + V"article"^-1)
+                      ,
                            
-  redirect     = V"redirect_tag"
-               * characters
-               * V"internal_link_start"
-               * (1 - V"redirect_stop")^1
-               * V"redirect_stop"
-               ,
-  redirect_stop       = V"internal_link_end" + bar + newline,
+  redirect            = V"redirect_tag"
+                      * characters
+                      * V"internal_link_start"
+                      * (1 - V"redirect_stop")^1
+                      * V"redirect_stop"
+                      ,
+  redirect_stop       = V"internal_link_stop" + bar + newline,
   internal_link_start = double_lbrack,
-  internal_link_end   = double_rbrack,
+  internal_link_stop  = double_rbrack,
   
   --redirect_tag = FROM_LANGUAGE_FILE
-  redirect_tag = P"#redirect",
+  redirect_tag        = P"#redirect",
 
   article                = (whitespace_rest^0 * V"block")^1
                          ,
   paragraph_and_more     = V"paragraph"
                          ,
 
+
+  --[[ldx--
+  <p><em>Paragraps</em>. A paragraph is just a bunch of consecutive lines whose
+  first characters don’t match any of the special block markers.</p>
+  --ldx]]--
+
   paragraph     = C(V"lines_of_text")
                 / formatter.paragraph
                 ,
                  --+ V"space_block")
                 + whitespace_rest
                 ,
-  ------------------------------------------------------------------------
-  --  Start Markers
-  ------------------------------------------------------------------------
+
+  --[[ldx--
+  <p><em>Start Markers</em>, easing the recognition of a new type of block.</p>
+  --ldx]]--
+
   list_start    = list_item_marker,
   table_start   = table_start_marker,
 
-  ------------------------------------------------------------------------
-  --  Special Blocks
-  ------------------------------------------------------------------------
+  --[[ldx--
+  <p><em>Special Blocks</em>.</p>
+  --ldx]]--
+
   special_block    = V"horizontal_rule"
                    + V"heading" 
-                   + V"list_item"
+                   + V"list_block"
                    + Ct(V"table")
                    + V"space_block"
                    + V"template_block"
   template_content = C(balanced_double_brace)
                    / formatter.template_block
                    ,
-  --- Rules
+  --[[ldx--
+  <p><em>Rules</em>. Wikitext seems to support only very thin horizontal rules.</p>
+  --ldx]]--
+
   horizontal_rule  = dash^4 * (1 - newline)^1 * newline,
-  --dashes           = P"-" * V"dashes",
-  --- Headings
-  --- The following exception renders any definition by means of fixed amounts
-  --- of equal signs incomplete:
-  ---   “Unbalanced tags are treated as the shorter of the two tags”
-  ---   <http://www.mediawiki.org/wiki/Markup_spec/BNF/Special_block#Heading>
-  --- Therefore, the associated function will have to determine the heading
-  --- level based on the shorter marker string.
+
+
+  --[[ldx--
+  <p><em>Headings</em>.</p>
+  <p>
+  The following exception renders any definition by means of fixed amounts
+  of equal signs incomplete:
+    “Unbalanced tags are treated as the shorter of the two tags”
+    http://www.mediawiki.org/wiki/Markup_spec/BNF/Special_block#Heading
+  Therefore, the associated function will have to determine the heading
+  level based on the shorter marker string.
+  </p>
+  --ldx]]--
   heading          = C(V"heading_marker")
                    --* C((1 - V"heading_marker" * whitespace_rest)^1)
                    * C((1 - V"heading_marker")^1)
                    / formatter.heading
                    ,
   heading_marker   = equals^1,
-  --- Lists
-  --- “indent_item” -> definition list
-  --list_item        = V"indent_item" +  V"enumerated_item" + V"bullet_item",
-  --indent_item      = colon      * (V"list_item" + V"item_body")^-1,
-  --enumerated_item  = gartenzaun * (V"list_item" + V"item_body")^-1,
-  --bullet_item      = asterisk   * (V"list_item" + V"item_body")^-1,
-  --- rewritten to avoid recursion
-  list_item        = (V"indent_item" +  V"enumerated_item" + V"bullet_item")^1 * V"item_body"^-1,
-  indent_item      = colon,
+
+  --[[ldx--
+  <p><em>Lists</em>, unordered, numbered and definition lists. The latter of
+  which turn out to be a minor hassle, because they are permitted to extend
+  onto the following line, if its marker string ends with a colon instead a
+  semicolon.</p>.
+  --ldx]]--
+
+  list_block       = Ct(V"first_item" * V"list_item"^0) / formatter.list_block,
+  first_item       = Ct(C((V"def_item" + V"enumerated_item" + V"bullet_item")^1)                    * space_tab^0 * V"item_rest"),
+  list_item        = Ct(C((V"def_item" + V"enumerated_item" + V"bullet_item" + V"def_item_cont")^1) * space_tab^0 * V"item_rest"),
+  item_rest        = C(V"item_body"^-1) * newline,
+  def_item         = semicolon,
+  def_item_cont    = colon,
   enumerated_item  = gartenzaun,
   bullet_item      = asterisk,
   item_body        = V"defined_term"
                    ,
   defined_term     = semicolon * (1 - newline)^1 * V"definition"^-1,
   definition       = colon     * (1 - newline)^1,
-  --- Table
+
+  --[[ldx--
+  <p><em>Tables</em>.</p>
+  --ldx]]--
+
   table                    = Ct(V"_table")
                            / formatter.table
                            ,
                                  * V"table__caption")
                               , "caption")
                            ,
-  --table_header             = (space * V"table_parameters")^-1
-                           --* newline
-                           --* V"table_caption"
-                           --,
-  --- Space Block          
+
+  --[[ldx--
+  <p><em>Space Blocks</em>. Those have yet to be implemented.</p>
+  --ldx]]--
+
   space_block              = space * (1 - newline)^1 * newline * V"space_block_2"^0,
   space_block_2            = space * (1 - newline)^0 * newline,
 } --[[ 3519 Rules ]]
 
-------------------------------------------------------------------------
---  Infoboxes
-------------------------------------------------------------------------
+
+--[[ldx--
+<source>Infoboxes</source>
+<p>Infoboxes are fancy markup exceptions that are often encountered at the top
+of WP articles. Basically, they consist of a series of bar-delimited key-value
+statements, and as such get parsed into a hash table.
+As the rendering of an infobox is highly dependent on its respective
+<t>subtype</t>, there will probably never be a meaningful <logo
+label="context"/> mapping for all of them.</p>
+--ldx]]--
 
 parsers.p_wiki_infobox = P{
   "infobox",
   whitespace_rest   = space_tab^0 * (eol + eof),
 }
 
+
+--[[ldx--
+<source>Escaping</source>
+<p>Escaping is done in two steps.
+First, some characters are replaced that are not assigned a markup purpose in
+Wikitext, but would interfere with <logo label="context"/>. This normally
+happens before the inline parser is applied.
+The second step handles elements that have a meaning in both markups and is
+applied only after the conversion.</p>
+--ldx]]--
+
+-- TODO: find out which of the following could safely be moved to the “goodies” pattern
 do
   local pre_escape_chars = {
     ["$"] = [[{\letterdollar}]],