Commits

Brian Mearns  committed ff19a2f Merge

Merged to get better handling of UTF-8 and a few character accents.

  • Participants
  • Parent commits 723e831, 133a5f5

Comments (0)

Files changed (4)

File src/tome/Tome.py

         '!': 'em',
     }
 
+    __ACCENT_MAP = {
+        '`': 'grave',
+        "'": 'acute',
+        '^': 'circumflex',
+        '"': 'umlaut',
+        '~': 'tilde',
+        'c': 'cedilla',
+    }
+
     __BLOCK_MAP = {
         "'": 'sq',
         '"': 'q',
                                     while colnum < linelen and line[colnum].isspace():
                                         colnum += 1
 
+                                elif c == '^':
+                                    #Accents
+                                    if len(text) > 0:
+                                        self.appendText(text, filename, linenum, colnum)
+                                        text = ""
+
+                                    colnum += 1
+                                    if colnum == linelen:
+                                        raise ParsingError("Invalid opening accent at end of line.")
+                                    c = line[colnum]
+
+                                    if c in TomeOtlParser.__ACCENT_MAP:
+                                        self.appendLeaf(TomeOtlParser.__ACCENT_MAP[c], filename, linenum, colnum)
+                                        colnum += 1
+
+                                        #Consume white space between command and content.
+                                        while colnum < linelen and line[colnum].isspace():
+                                            colnum += 1
+
                                 elif c == '{':
                                     #Escaped open brace.
                                     text += '{'

File src/tome/writeEpub.py

     This is the main functional class of this module, used for producing an EPUB document from a
     |Tome| object and the files in the specified :ref:`template directory <template_dir>`.
     """
+
+    __ACCENT_MAP = {
+        'grave': ('grave', 'aeiouAEIOU'),
+        'acute': ('acute', 'aeiouyAEIOUY'),
+        'circumflex': ('circ', 'aeiouAEIOU'),
+        'umlaut': ('uml', 'aeiouyAEIOUY'),
+        'tilde': ('tilde', 'anoANO'),
+        'cedilla': ('cedil', 'cC'),
+    }
+
     def __init__(
         self, tome, ofile, templateDirectory=None,
         tdFilter=None, tmplFilter=None, chapterTemplatePath=None,
                 ostream.write("<u>")
                 close = "</u>"
 
+            ### Accents
+            # FIXME: This needs to be numeric entities, not named.
+            elif tag in EpubWriter.__ACCENT_MAP:
+                if len(segment) != 1 or not isinstance(segment[0], Tome.TextSegment):
+                    raise Exception("Invalid use of %s accent: must have exactly one text segment child.")
+                target = segment[0].text()
+                if len(target) != 1:
+                    raise Exception("Invalid use of %s accent: segment content must be a single character: " + target)
+                label, allowed = EpubWriter.__ACCENT_MAP[tag]
+                if target not in allowed:
+                    raise Exception("Invalid use of %s accent: cannot be applied to '%s'." % target)
+                ostream.write("&" + target + label + ";")
+                return
+
+
             ### Text Objects.
             elif tag == "ellips":
                 ostream.write("&#x2026;")

File src/tome/writeLatex.py

             ostream.write("\\underline{")
             close = "}"
 
+        #Accents
+        elif tag == 'grave':
+            ostream.write("\\`{")
+            close = "}"
+        elif tag == 'acute':
+            ostream.write("\\'{")
+            close = "}"
+        elif tag == 'circumflex':
+            ostream.write("\\^{")
+            close = "}"
+        elif tag == 'umlaut':
+            ostream.write("\\\"{")
+            close = "}"
+        elif tag == 'tilde':
+            ostream.write("\\~{")
+            close = "}"
+        elif tag == 'cedilla':
+            ostream.write("\\c{")
+            close = "}"
+
         #Text objects
         elif tag == "ellips":
             ostream.write("{\\ldots}")
             ostream.write("\n")
             return
 
+
         #Block elements
         elif tag == "q":
             return writeBlockSegment(ostream, "``", "''", segment, dropCap, verbatim=verbatim, prefix="``")
         else:
             content = escape(content)
 
-        ostream.write(content)
+        if not isinstance(content, unicode):
+            content = unicode(content, "utf-8")
+        ostream.write(content.encode("utf-8"))
 
     else:
         raise TypeError("Unexpected type for segment.")

File src/tome/writeText.py

                 ostream.write("\n")
                 return
 
+            #accents (nothing to do)
+            elif tag in ("grave", "acute", "circumflex", "umlaut", "tilde", "cedilla"):
+                close = ""
+
             #Block elements
             elif tag == "q":
                 return self.writeBlockSegment(ostream, "\"", "\"", segment, prefix="\"")
 
         ostream.write("\n"*3)
 
+        titleWidth = int(0.7 * float(self.__linewidth))
+        if titleWidth < 20:
+            titleWidth = self.__linewidth
+
         lmTitles = tome.allTitles()
         if len(lmTitles) > 0:
-            titleWidth = int(0.7 * float(self.__linewidth))
-            if titleWidth < 20:
-                titleWidth = self.__linewidth
-
             for title in tome.allTitles():
                 self.writeCenteredLine(ostream, title, self.__linewidth, titleWidth)
                 self.writeHr(ostream, 3, self.__linewidth)