Commits

Robert Mařík committed 23b28b1

handle img tag directly in html parser

Comments (0)

Files changed (3)

 ###############################################################################
 #   Sage: Open Source Mathematical Software
-#       Copyright (C) 2009 Wilfried Huss <huss@finanz.math.tugraz.at>
+#       Copyright (C) 2009-2010 Wilfried Huss <huss@finanz.math.tugraz.at>
+#       Copyright (C) 2009-2010 Robert Marik <marik@mendelu.cz>
 #  Distributed under the terms of the GNU General Public License (GPL),
 #  version 2 or any later version.  The full text of the GPL is available at:
 #                  http://www.gnu.org/licenses/
 
 class Html2Latex(HTMLParser.HTMLParser):
 
-    def __init__(self, options = None):
+    def __init__(self, options = None, cell_id = None):
         HTMLParser.HTMLParser.__init__(self)
         self.options = options
+        self.cell_id = cell_id
         self.in_math_mode = False
         self.latex = ['']                # latex code
         self.data = ''
         elif tag == 'font':
             pass
         elif tag == 'img':
-            self.latex[-1] += "<img src='%s'>"%dict(attrs).get('src')
+            # dict(attrs).get('src') looks like cell://name.png
+            self.latex[-1] += "\\includegraphics{sage_worksheet/cells/%s/%s}" % (self.cell_id,dict(attrs).get('src')[7:])
         else:
             if self.options != None and not self.options.ignore_unknown_tags:
                 self.latex[-1] += self.tag2string(tag, attrs)
             else:
                 self.data += ref
 
-    def parse_html(self, text, OutputCell=False):
+    def parse_html(self, text, OutputCell = False, cell_id = None):
         text = detect_math(text)
+        self.cell_id = cell_id
         #print text
-        self.feed(text)
+        self.feed(text)        
 
         if OutputCell:
             return self.data
 ###############################################################################
 #   Sage: Open Source Mathematical Software
-#       Copyright (C) 2009 Wilfried Huss <huss@finanz.math.tugraz.at>
+#       Copyright (C) 2009-2010 Wilfried Huss <huss@finanz.math.tugraz.at>
+#       Copyright (C) 2009-2010 Robert Marik <marik@mendelu.cz>
 #  Distributed under the terms of the GNU General Public License (GPL),
 #  version 2 or any later version.  The full text of the GPL is available at:
 #                  http://www.gnu.org/licenses/
 
 class Html_table2Latex_tabular(HTMLParser.HTMLParser):
 
-    def __init__(self, columns = 0):
+    def __init__(self, columns = 0, cell_id = None):
         HTMLParser.HTMLParser.__init__(self)
         self.columns = columns
         self.latex = ''                # latex tabular code
         self.rowCount = 0              # row counter
         self.columnCount = 0           # column counter
         self.data = ''
+        self.cell_id = cell_id
 
         # calculate the maximal number
         # of columns in the table
 
     def start_img(self, attrs):
         try:
-            attrs = dict(attrs)
-            src = attrs["src"]
-            # we make sure that the img-tag stays in the latex_code,
-            # it is dealt with later on
-            self.data = "<img src='%s'>" % src
+            self.data = "\\includegraphics{sage_worksheet/cells/%s/%s}" % (self.cell_id,dict(attrs).get('src')[7:])
         except KeyError:
             pass
 
     def handle_data(self, data):
         self.data += data
 
-def parse_table(text):
+def parse_table(text, cell_id = None):
     # first pass to count the number of columns
+    
     parser = Html_table2Latex_tabular()
     parser.feed(text)
 
     # second pass to generate the LaTeX code
-    parser = Html_table2Latex_tabular(parser.maxColumns)
+    parser = Html_table2Latex_tabular(parser.maxColumns, cell_id = cell_id)
     parser.feed(text)
 
     return parser.latex
         f.close()
 
     else:
-        print 'Usage: %s filename' % sys.argv[0]
+        print 'Usage: %s filename' % sys.argv[0]
 #! /usr/bin/env sage
 ###############################################################################
 #   Sage: Open Source Mathematical Software
-#       Copyright (C) 2009 Robert Marik
-#       Copyright (C) 2009 Wilfried Huss <huss@finanz.math.tugraz.at>
+#       Copyright (C) 2009-2010 Robert Marik <marik@mendelu.cz>
+#       Copyright (C) 2009-2010 Wilfried Huss <huss@finanz.math.tugraz.at>
 #  Distributed under the terms of the GNU General Public License (GPL),
 #  version 2 or any later version.  The full text of the GPL is available at:
 #                  http://www.gnu.org/licenses/
     """
     cell_type = "output"
     language = 'sage'
-    # re_math = re.compile(r"<html><(?P<tag>span|div) class=\"math\">(?:\\newcommand{\\Bold}\[1\]{\\mathbf{#1}})?(.*?)</(?P=tag)></html>")
-    re_img = re.compile(r"""<img src=['"]cell://(.*?)['"]>""")
-    # re_font = re.compile(r"<html><font color='black'>(.*?)</font></html>")
 
     def __init__(self, text, cell_id, options=None, language='sage'):
         Cell.__init__(self, text, cell_id, options)
             else:
                 if output.strip() != "":
                     if output.find('<div class="notruncate">') != -1: # output is a table
-                        table_output = parse_table(output)
-                        latex_str += self.re_img.sub("\\sagegraphics{%d}{\\1}" % self.cell_id, table_output)
+                        latex_str += parse_table(output, cell_id=self.cell_id)
                     else:
-                        html_output = self.html2latex.parse_html(output, OutputCell=True)
-                        #TODO: handle graphics in the html-parser itself
                         latex_str += "{\\color{blue}\n"
-                        latex_str += self.re_img.sub("\\sagegraphics{%d}{\\1}" % self.cell_id, html_output)
+                        latex_str += self.html2latex.parse_html(output, OutputCell=True, cell_id=self.cell_id)
                         latex_str += "\n}\n"
                 verbatim = True
             latex_str += "\n\n" # paragraph between <html>...</html> blocks