Commits

Anonymous committed 6229ce3

SmartQuotes transform: language-depended quote characters.

  • Participants
  • Parent commits 01a31b9

Comments (0)

Files changed (7)

docutils/HISTORY.txt

 ==================
 
 :Author: David Goodger; open to all Docutils developers
-:Contact: goodger@python.org
+:Contact: docutils-develop@lists.sourceforge.net
 :Date: $Date$
 :Revision: $Revision$
 :Web site: http://docutils.sourceforge.net/
 
 * docutils/transforms/universal.py
 
-  - Add SmartQuotes transform for typographic quotes and dashes.
+  - SmartQuotes transform for typographic quotes and dashes.
 
 * docutils/utils/__init__.py
 

docutils/RELEASE-NOTES.txt

 ========================
 
 :Contact: grubert@users.sourceforge.net
+:Maintainer: docutils-develop@lists.sourceforge.net
 :Date: $Date$
 :Revision: $Revision$
 :Web site: http://docutils.sourceforge.net/
 
 * General:
 
+  - SmartQuotes transform for typographic quotes and dashes.
+
   - ``docutils/math``, ``docutils/error_reporting.py``, and
     ``docutils/urischemes.py`` moved to the utils package.
     Code importing these modules needs to adapt, e.g.::

docutils/docs/user/config.txt

 interchangably in entry names; hyphens are automatically converted to
 underscores.
 
-For on/off switch settings (booleans), the following values are
+For on/off switch settings (_`booleans`), the following values are
 recognized:
 
 :On: "true", "yes", "on", "1"
 
 List values can be comma- or colon-delimited.
 
-strip_classes_, strip_elements_with_classes_, stylesheet_, and
-stylesheet_path_ use the comma as delimiter,
+strip_classes_, strip_elements_with_classes_, stylesheet, and
+stylesheet_path use the comma as delimiter,
 whitespace around list values is stripped. ::
 
     strip-classes: ham,eggs,
 
 
 _`syntax_highlight`
-
     Token type names used by Pygments_ when parsing contents of the code_
     directive and role.
 
 
     Default: "long".  Option: ``--syntax-highlight``.
 
+    New in Docutils 0.9.
+
 .. _Pygments: http://pygments.org/
 .. _code: ../ref/rst/directives.html#code
 .. _Pygments-generated stylesheets:
    http://pygments.org/docs/cmdline/#generating-styles
 
+_`smart_quotes`
+    Change straight quotation marks to typographic form. `Quote characters`_
+    are selected according to the language of the current block element (see
+    language_code_). Also changes consequtive runs of hyphen-minus and full
+    stops (``---``, ``--``, ``...``) to em-dash, en-dash and ellipsis
+    Unicode characters respectively.
+
+    Supported values:
+
+    booleans_ (yes/no)
+      Use smart quotes?
+
+    alt (or "alternative")
+      Use alternative quote set (if defined for the language).
+
+    Default: "no". Option: ``--smart-quotes``.
+
+    New in Docutils 0.10.
+
+.. _quote characters:
+   http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
+
 
 [readers]
 ---------

docutils/docutils/nodes.py

                 index = node.parent.index(node)
                 for sibling in node.parent[index+1:]:
                     r.extend(sibling.traverse(include_self=True,
-                                              descend=descend, 
+                                              descend=descend,
                                               siblings=False, ascend=False,
                                               condition=condition))
                 if not ascend:
 
     has_key = hasattr
 
-    # support operator in
+    # support operator ``in``
     __contains__ = hasattr
 
+    def get_language_code(self, fallback=''):
+        """Return node's language tag.
+
+        Look iteratively in self and parents for a class argument
+        starting with ``language-`` and return the remainder of it
+        (which should be a `BCP49` language tag) or the `fallback`.
+        """
+        for cls in self.get('classes', []):
+            if cls.startswith('language-'):
+                return cls[9:]
+        try:
+            return self.parent.get_language(fallback)
+        except AttributeError:
+            return fallback
+
     def append(self, item):
         self.setup_child(item)
         self.children.append(item)

docutils/docutils/transforms/universal.py

 # $Id$
-# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer
+# -*- coding: utf8 -*-
+# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
+# Maintainer: docutils-develop@lists.sourceforge.net
 # Copyright: This module has been placed in the public domain.
 
 """
 
     default_priority = 850
 
-    texttype = {True: 'literal',
-                False: 'plain'}
+    def __init__(self, document, startnode):
+        Transform.__init__(self, document, startnode=startnode)
+        self.unsupported_languages = set()
+
+    def get_tokens(self, txtnodes):
+        # A generator that yields ``(texttype, nodetext)`` tuples for a list
+        # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
+
+        texttype = {True: 'literal', # "literal" text is not changed:
+                    False: 'plain'}
+        for txtnode in txtnodes:
+            nodetype = texttype[isinstance(txtnode.parent,
+                                           (nodes.literal,
+                                            nodes.math,
+                                            nodes.image,
+                                            nodes.raw,
+                                            nodes.problematic))]
+            yield (nodetype, txtnode.astext())
+
 
     def apply(self):
-        if self.document.settings.smart_quotes is False:
+        smart_quotes = self.document.settings.smart_quotes
+        if not smart_quotes:
             return
-        
+        try:
+            alternative = smart_quotes.startswith('alt')
+        except AttributeError:
+            alternative = False
+        # print repr(alternative)
+
+        document_language = self.document.settings.language_code
+
         # "Educate" quotes in normal text. Handle each block of text
         # (TextElement node) as a unit to keep context around inline nodes:
         for node in self.document.traverse(nodes.TextElement):
             txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
                         if not isinstance(txtnode.parent,
                                           nodes.option_string)]
-            # smartquotes.educate_tokens() iterates over
-            # ``(texttype, nodetext)`` tuples. `texttype` is "literal" 
-            # or "plain" where "literal" text is not changed:
-            tokens = [(self.texttype[isinstance(txtnode.parent,
-                                                  (nodes.literal,
-                                                   nodes.math,
-                                                   nodes.image,
-                                                   nodes.raw,
-                                                   nodes.problematic))],
-                       txtnode.astext()) for txtnode in txtnodes]
 
-            # Iterator educating quotes in plain text
-            # 2 : set all, using old school en- and em- dash shortcuts
-            teacher = smartquotes.educate_tokens(tokens, attr='2')
+            # language: use smart-quotes for language "lang"
+            lang = node.get_language_code(document_language)
+            # use alternative form if `smart-quotes` setting starts with "alt":
+            if alternative:
+                if '-x-altquot' in lang:
+                    lang = lang.replace('-x-altquot', '')
+                else:
+                    lang += '-x-altquot'
+            # drop subtags missing in quotes:
+            for tag in utils.normalize_language_tag(lang):
+                if tag in smartquotes.smartchars.quotes:
+                    lang = tag
+                    break
+            else: # language not supported: (keep ASCII quotes)
+                if lang not in self.unsupported_languages:
+                    self.document.reporter.warning('No smart quotes '
+                        'defined for language "%s".'%lang, base_node=node)
+                self.unsupported_languages.add(lang)
+                lang = ''
+
+            # Iterator educating quotes in plain text:
+            # '2': set all, using old school en- and em- dash shortcuts
+            teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
+                                                 attr='2', language=lang)
 
             for txtnode, newtext in zip(txtnodes, teacher):
                 txtnode.parent.replace(txtnode, nodes.Text(newtext))
+
+            self.unsupported_languages = set() # reset

docutils/docutils/utils/smartquotes.py

 # :Copyright: © 2010 Günter Milde,
 #             original `SmartyPants`_: © 2003 John Gruber
 #             smartypants.py:          © 2004, 2007 Chad Miller
+# :Maintainer: docutils-develop@lists.sourceforge.net
 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
 #
 #    Copying and distribution of this file, with or without modification,
 ========  =====  =========
 
 This is useful, for example, when you want to use straight quotes as
-foot and inch marks: 6'2" tall; a 17" iMac.
+foot and inch marks: 6\\'2\\" tall; a 17\\" iMac.
 
 Options
 =======
 "-1"
         Stupefy mode. Reverses the SmartyPants transformation process, turning
         the characters produced by SmartyPants into their ASCII equivalents.
-        E.g.  "“" is turned into a simple double-quote ("), "—" is
+        E.g.  "“" is turned into a simple double-quote (\"), "—" is
         turned into two dashes, etc.
 
 
 Version History
 ===============
 
+1.7     2012-11-19
+        - Internationalization: language-dependent quotes.
+
 1.6.1:  2012-11-06
         - Refactor code, code cleanup,
         - `educate_tokens()` generator as interface for Docutils.
 
 import re
 
-class smart(object):
+class smartchars(object):
     """Smart quotes and dashes
+    """
 
-    TODO: internationalization, see e.g.
-    http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen
-    """
     endash   = u'–' # "&#8211;" EN DASH
     emdash   = u'—' # "&#8212;" EM DASH
-    lquote   = u'‘' # "&#8216;" LEFT SINGLE QUOTATION MARK
-    rquote   = u'’' # "&#8217;" RIGHT SINGLE QUOTATION MARK
-    #lquote  = u'‚' # "&#8218;" SINGLE LOW-9 QUOTATION MARK (German)
-    ldquote  = u'“' # "&#8220;" LEFT DOUBLE QUOTATION MARK
-    rdquote  = u'”' # "&#8221;" RIGHT DOUBLE QUOTATION MARK
-    #ldquote = u'„' # "&#82212" DOUBLE LOW-9 QUOTATION MARK (German)
     ellipsis = u'…' # "&#8230;" HORIZONTAL ELLIPSIS
 
-def smartyPants(text, attr=default_smartypants_attr):
+    # quote characters (language-specific, set in __init__())
+    #
+    # English smart quotes (open primary, close primary, open secondary, close
+    # secondary) are:
+    #   opquote  = u'“' # "&#8220;" LEFT DOUBLE QUOTATION MARK
+    #   cpquote  = u'”' # "&#8221;" RIGHT DOUBLE QUOTATION MARK
+    #   osquote  = u'‘' # "&#8216;" LEFT SINGLE QUOTATION MARK
+    #   csquote  = u'’' # "&#8217;" RIGHT SINGLE QUOTATION MARK
+    # For other languages see:
+    # http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
+    # http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen
+    quotes = {'af':        u'“”‘’',
+              'af-x-altquot': u'„”‚’',
+              'ca':           u'«»“”',
+              'ca-x-altquot': u'“”‘’',
+              'cs':           u'„“‚‘',
+              'cs-x-altquot': u'»«›‹',
+              'de':           u'„“‚‘',
+              'de-x-altquot': u'»«›‹',
+              'de-ch':        u'«»‹›',
+              'el':           u'«»“”',
+              'en':           u'“”‘’',
+              'en-UK':        u'‘’“”',
+              'eo':           u'“”‘’',
+              'es':           u'«»“”',
+              'es-x-altquot': u'“”‘’',
+              'fi':           u'””’’',
+              'fi-x-altquot': u'»»’’',
+              'fr':           (u'« ',  u' »', u'‹ ', u' ›'),
+              'fr-x-altquot': (u'“ ',  u' ”', u'‘ ', u' ’'),
+              'fr-ch':        u'«»‹›',
+              'he':           u'”“»«',
+              'he-x-altquot': u'„”‚’',
+              'it':           u'«»“”',
+              'it-ch':        u'«»‹›',
+              'it-x-altquot': u'“”‘’',
+              'ja':           u'「」『』',
+              'lt':           u'„“‚‘',
+              'nl':           u'“”‘’',
+              'nl-x-altquot': u'„”‚’',
+              'pl':           u'„”«»',
+              'pl-x-altquot': u'«»“”',
+              'pt':           u'«»“”',
+              'pt_br':        u'“”‘’',
+              'ro':           u'„”«»',
+              'ro-x-altquot': u'«»„”',
+              'ru':           u'«»„“',
+              'sk':           u'„“‚‘',
+              'sk-x-altquot': u'»«›‹',
+              'sv':           u'„“‚‘',
+              'sv-x-altquot': u'»«›‹',
+              'zh_cn':        u'“”‘’',
+              'it':           u'«»“”',
+              'zh_tw':        u'「」『』',
+             }
+
+    def __init__(self, language='en'):
+        self.language = language
+        try:
+            (self.opquote, self.cpquote,
+             self.osquote, self.csquote) = self.quotes[language]
+        except KeyError:
+            self.opquote, self.cpquote, self.osquote, self.csquote = u'""\'\''
+
+
+def smartyPants(text, attr=default_smartypants_attr, language='en'):
     """Main function for "traditional" use."""
 
-    return "".join([t for t in educate_tokens(tokenize(text), attr)])
+    return "".join([t for t in educate_tokens(tokenize(text),
+                                              attr, language)])
 
 
-def educate_tokens(text_tokens, attr=default_smartypants_attr):
-    """Return iterator that "educates" `text_tokens`.
+def educate_tokens(text_tokens, attr=default_smartypants_attr, language='en'):
+    """Return iterator that "educates" the items of `text_tokens`.
     """
 
     # Parse attributes:
         if "w" in attr: convert_quot = True
 
     prev_token_last_char = " "
-    # Get context around inline mark-up. (Remember the last character of the
-    # previous text token, to use as context to curl single-character quote
-    # tokens correctly.)
+    # Last character of the previous text token. Used as
+    # context to curl leading quote characters correctly.
 
-    for cur_token in text_tokens:
-        t = cur_token[1]
+    for (ttype, text) in text_tokens:
 
         # skip HTML and/or XML tags (do not update last character)
-        if cur_token[0] == 'tag':
-            yield t
+        if ttype == 'tag':
+            yield text
             continue
 
-        last_char = t[-1:] # Remember last char of this token before processing.
-
         # skip literal text (math, literal, raw, ...)
-        if cur_token[0] == 'literal':
-            yield t
+        if ttype == 'literal':
+            prev_token_last_char = text[-1]
+            yield text
             continue
 
-        t = processEscapes(t)
+        last_char = text[-1:] # Remember last char before processing.
+
+        text = processEscapes(text)
 
         if convert_quot:
-            t = re.sub('&quot;', '"', t)
+            text = re.sub('&quot;', '"', text)
 
         if do_dashes == 1:
-            t = educateDashes(t)
+            text = educateDashes(text)
         elif do_dashes == 2:
-            t = educateDashesOldSchool(t)
+            text = educateDashesOldSchool(text)
         elif do_dashes == 3:
-            t = educateDashesOldSchoolInverted(t)
+            text = educateDashesOldSchoolInverted(text)
 
         if do_ellipses:
-            t = educateEllipses(t)
+            text = educateEllipses(text)
 
         # Note: backticks need to be processed before quotes.
         if do_backticks:
-            t = educateBackticks(t)
+            text = educateBackticks(text, language)
 
         if do_backticks == 2:
-            t = educateSingleBackticks(t)
+            text = educateSingleBackticks(text, language)
 
         if do_quotes:
-            t = educateQuotes(prev_token_last_char+t)[1:]
+            text = educateQuotes(prev_token_last_char+text, language)[1:]
 
         if do_stupefy:
-            t = stupefyEntities(t)
+            text = stupefyEntities(text, language)
 
-        # print prev_token_last_char, t.encode('utf8')
+        # Remember last char as context for the next token
         prev_token_last_char = last_char
 
-        yield t
+        text = processEscapes(text, restore=True)
 
+        yield text
 
 
-def educateQuotes(text):
+
+def educateQuotes(text, language='en'):
     """
-    Parameter:  String (unicode or bytes).
+    Parameter:  - text string (unicode or bytes).
+                - language (`BCP 47` language tag.)
     Returns:    The `text`, with "educated" curly quote characters.
 
     Example input:  "Isn't this fun?"
     Example output: “Isn’t this fun?“;
     """
 
+    smart = smartchars(language)
+
     # oldtext = text
     punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
 
     # Special case if the very first character is a quote
-    # followed by punctuation at a non-word-break. Close the quotes by brute force:
-    text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), smart.rquote, text)
-    text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), smart.rdquote, text)
+    # followed by punctuation at a non-word-break.
+    # Close the quotes by brute force:
+    text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), smart.csquote, text)
+    text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), smart.cpquote, text)
 
     # Special case for double sets of quotes, e.g.:
     #   <p>He said, "'Quoted' words in a larger quote."</p>
-    text = re.sub(r""""'(?=\w)""", smart.ldquote+smart.lquote, text)
-    text = re.sub(r"""'"(?=\w)""", smart.lquote+smart.ldquote, text)
+    text = re.sub(r""""'(?=\w)""", smart.opquote+smart.osquote, text)
+    text = re.sub(r"""'"(?=\w)""", smart.osquote+smart.opquote, text)
 
     # Special case for decade abbreviations (the '80s):
-    text = re.sub(r"""\b'(?=\d{2}s)""", smart.rquote, text)
+    text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text)
 
     close_class = r"""[^\ \t\r\n\[\{\(\-]"""
     dec_dashes = r"""&#8211;|&#8212;"""
                     '                 # the quote
                     (?=\w)            # followed by a word character
                     """ % (dec_dashes,), re.VERBOSE)
-    text = opening_single_quotes_regex.sub(r'\1'+smart.lquote, text)
+    text = opening_single_quotes_regex.sub(r'\1'+smart.osquote, text)
 
     closing_single_quotes_regex = re.compile(r"""
                     (%s)
                     '
                     (?!\s | s\b | \d)
                     """ % (close_class,), re.VERBOSE)
-    text = closing_single_quotes_regex.sub(r'\1'+smart.rquote, text)
+    text = closing_single_quotes_regex.sub(r'\1'+smart.csquote, text)
 
     closing_single_quotes_regex = re.compile(r"""
                     (%s)
                     '
                     (\s | s\b)
                     """ % (close_class,), re.VERBOSE)
-    text = closing_single_quotes_regex.sub(r'\1%s\2' % smart.rquote, text)
+    text = closing_single_quotes_regex.sub(r'\1%s\2' % smart.csquote, text)
 
     # Any remaining single quotes should be opening ones:
-    text = re.sub(r"""'""", smart.lquote, text)
+    text = re.sub(r"""'""", smart.osquote, text)
 
     # Get most opening double quotes:
     opening_double_quotes_regex = re.compile(r"""
                     "                 # the quote
                     (?=\w)            # followed by a word character
                     """ % (dec_dashes,), re.VERBOSE)
-    text = opening_double_quotes_regex.sub(r'\1'+smart.ldquote, text)
+    text = opening_double_quotes_regex.sub(r'\1'+smart.opquote, text)
 
     # Double closing quotes:
     closing_double_quotes_regex = re.compile(r"""
                     "
                     (?=\s)
                     """ % (close_class,), re.VERBOSE)
-    text = closing_double_quotes_regex.sub(smart.rdquote, text)
+    text = closing_double_quotes_regex.sub(smart.cpquote, text)
 
     closing_double_quotes_regex = re.compile(r"""
                     (%s)   # character that indicates the quote should be closing
                     "
                     """ % (close_class,), re.VERBOSE)
-    text = closing_double_quotes_regex.sub(r'\1'+smart.rdquote, text)
+    text = closing_double_quotes_regex.sub(r'\1'+smart.cpquote, text)
 
     # Any remaining quotes should be opening ones.
-    text = re.sub(r'"', smart.ldquote, text)
+    text = re.sub(r'"', smart.opquote, text)
 
     return text
 
 
-def educateBackticks(text):
+def educateBackticks(text, language='en'):
     """
     Parameter:  String (unicode or bytes).
     Returns:    The `text`, with ``backticks'' -style double quotes
     Example input:  ``Isn't this fun?''
     Example output: “Isn't this fun?“;
     """
+    smart = smartchars(language)
 
-    text = re.sub(r"""``""", smart.ldquote, text)
-    text = re.sub(r"""''""", smart.rdquote, text)
+    text = re.sub(r"""``""", smart.opquote, text)
+    text = re.sub(r"""''""", smart.cpquote, text)
     return text
 
 
-def educateSingleBackticks(text):
+def educateSingleBackticks(text, language='en'):
     """
     Parameter:  String (unicode or bytes).
     Returns:    The `text`, with `backticks' -style single quotes
     Example input:  `Isn't this fun?'
     Example output: ‘Isn’t this fun?’
     """
+    smart = smartchars(language)
 
-    text = re.sub(r"""`""", smart.lquote, text)
-    text = re.sub(r"""'""", smart.rquote, text)
+    text = re.sub(r"""`""", smart.osquote, text)
+    text = re.sub(r"""'""", smart.csquote, text)
     return text
 
 
                 an em-dash character.
     """
 
-    text = re.sub(r"""---""", smart.endash, text) # en  (yes, backwards)
-    text = re.sub(r"""--""", smart.emdash, text) # em (yes, backwards)
+    text = re.sub(r"""---""", smartchars.endash, text) # en  (yes, backwards)
+    text = re.sub(r"""--""", smartchars.emdash, text) # em (yes, backwards)
     return text
 
 
                 an em-dash character.
     """
 
-    text = re.sub(r"""---""", smart.emdash, text)    # em (yes, backwards)
-    text = re.sub(r"""--""", smart.endash, text)    # en (yes, backwards)
+    text = re.sub(r"""---""", smartchars.emdash, text)
+    text = re.sub(r"""--""", smartchars.endash, text)
     return text
 
 
                 the shortcut should be shorter to type. (Thanks to Aaron
                 Swartz for the idea.)
     """
-    text = re.sub(r"""---""", smart.endash, text)    # em
-    text = re.sub(r"""--""", smart.emdash, text)    # en
+    text = re.sub(r"""---""", smartchars.endash, text)    # em
+    text = re.sub(r"""--""", smartchars.emdash, text)    # en
     return text
 
 
     Example output: Huh&#8230;?
     """
 
-    text = re.sub(r"""\.\.\.""", smart.ellipsis, text)
-    text = re.sub(r"""\. \. \.""", smart.ellipsis, text)
+    text = re.sub(r"""\.\.\.""", smartchars.ellipsis, text)
+    text = re.sub(r"""\. \. \.""", smartchars.ellipsis, text)
     return text
 
 
-def stupefyEntities(text):
+def stupefyEntities(text, language='en'):
     """
     Parameter:  String (unicode or bytes).
     Returns:    The `text`, with each SmartyPants character translated to
     Example input:  “Hello — world.”
     Example output: "Hello -- world."
     """
+    smart = smartchars(language)
 
     text = re.sub(smart.endash, "-", text)  # en-dash
     text = re.sub(smart.emdash, "--", text) # em-dash
 
-    text = re.sub(smart.lquote, "'", text)  # open single quote
-    text = re.sub(smart.rquote, "'", text)  # close single quote
+    text = re.sub(smart.osquote, "'", text)  # open single quote
+    text = re.sub(smart.csquote, "'", text)  # close single quote
 
-    text = re.sub(smart.ldquote, '"', text)  # open double quote
-    text = re.sub(smart.rdquote, '"', text)  # close double quote
+    text = re.sub(smart.opquote, '"', text)  # open double quote
+    text = re.sub(smart.cpquote, '"', text)  # close double quote
 
     text = re.sub(smart.ellipsis, '...', text)# ellipsis
 
     return text
 
 
-def processEscapes(text):
+def processEscapes(text, restore=False):
     r"""
     Parameter:  String (unicode or bytes).
     Returns:    The `text`, with after processing the following backslash
                 \-      &#45;
                 \`      &#96;
     """
-    text = re.sub(r"""\\\\""", r"""&#92;""", text)
-    text = re.sub(r'''\\"''', r"""&#34;""", text)
-    text = re.sub(r"""\\'""", r"""&#39;""", text)
-    text = re.sub(r"""\\\.""", r"""&#46;""", text)
-    text = re.sub(r"""\\-""", r"""&#45;""", text)
-    text = re.sub(r"""\\`""", r"""&#96;""", text)
+    replacements = ((r'\\', r'&#92;'),
+                    (r'\"', r'&#34;'),
+                    (r"\'", r'&#39;'),
+                    (r'\.', r'&#46;'),
+                    (r'\-', r'&#45;'),
+                    (r'\`', r'&#96;'))
+    if restore:
+        for (ch, rep) in replacements:
+            text = text.replace(rep, ch[1])
+    else:
+        for (ch, rep) in replacements:
+            text = text.replace(ch, rep)
 
     return text
 

docutils/test/test_transforms/test_smartquotes.py

 #!/usr/bin/env python
 # -*- coding: utf8 -*-
-
 # $Id$
-
+#
 # :Copyright: © 2011 Günter Milde.
+# :Maintainer: docutils-develop@lists.sourceforge.net
 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
 #
 #    Copying and distribution of this file, with or without modification,
 
 def suite():
     parser = Parser()
+    settings = {'smart_quotes': True}
     s = DocutilsTestSupport.TransformTestSuite(
-        parser, suite_settings={'smart_quotes': True})
+        parser, suite_settings=settings)
     s.generateTests(totest)
+    settings['language_code'] = 'de'
+    s.generateTests(totest_de)
+    settings['smart_quotes'] = 'alternative'
+    s.generateTests(totest_de_alt)
     return s
 
 
 totest = {}
+totest_de = {}
+totest_de_alt = {}
 
 totest['transitions'] = ((SmartQuotes,), [
 ["""\
 """],
 ["""\
 Do not "educate" quotes ``inside "literal" text`` and ::
- 
+
   "literal" blocks.
 
 Keep quotes straight in code and math: 
 .. code::
 
    print "hello"
-   
+  
 .. math::
 
    f'(x) = df(x)/dx
-  
+
 """,
 u"""\
 <document source="test data">
                 <title_reference>
                     inline “roles”\
 """],
+["""\
+.. class:: language-de
+
+German "smart quotes" and 'single smart quotes'.
+
+.. class:: language-foo
+
+"Quoting style" for unknown languages is 'ASCII'.
+
+.. class:: language-de-x-altquot
+
+Alternative German "smart quotes" and 'single smart quotes'.
+""",
+u"""\
+<document source="test data">
+    <paragraph classes="language-de">
+        German „smart quotes“ and ‚single smart quotes‘.
+    <paragraph classes="language-foo">
+        "Quoting style" for unknown languages is 'ASCII'.
+    <paragraph classes="language-de-x-altquot">
+        Alternative German »smart quotes« and ›single smart quotes‹.
+    <system_message level="2" line="7" source="test data" type="WARNING">
+        <paragraph>
+            No smart quotes defined for language "foo".
+"""],
 ])
 
+totest_de['transitions'] = ((SmartQuotes,), [
+["""\
+German "smart quotes" and 'single smart quotes'.
+
+.. class:: language-en-UK
+
+English "smart quotes" and 'single smart quotes'.
+""",
+u"""\
+<document source="test data">
+    <paragraph>
+        German „smart quotes“ and ‚single smart quotes‘.
+    <paragraph classes="language-en-uk">
+        English “smart quotes” and ‘single smart quotes’.
+"""],
+])
+
+totest_de_alt['transitions'] = ((SmartQuotes,), [
+["""\
+Alternative German "smart quotes" and 'single smart quotes'.
+
+.. class:: language-en-UK
+
+English "smart quotes" and 'single smart quotes' have no alternative.
+
+.. class:: language-ro
+
+Alternative Romanian "smart quotes" and 'single' smart quotes.
+""",
+u"""\
+<document source="test data">
+    <paragraph>
+        Alternative German »smart quotes« and ›single smart quotes‹.
+    <paragraph classes="language-en-uk">
+        English “smart quotes” and ‘single smart quotes’ have no alternative.
+    <paragraph classes="language-ro">
+        Alternative Romanian «smart quotes» and „single” smart quotes.
+"""],
+])
 
 if __name__ == '__main__':
     import unittest