rbeezer avatar rbeezer committed e8f602b

Pure python worksheet creation, some housekeeping

Comments (0)

Files changed (1)

 #! /usr/bin/env sage
-# Use sage to pick up notebook library
+#
+# Use sage to pick up sagenb notebook library
+# To fully test experimental pure Python, replace "sage" with "python"
 
 class TeXtoSWS(object):
 
         the numerical value of this first string.
 
         The directory where all these files live is recorded
-        as ``self._basename`` and based on the number of HTML
-        files discovered a ``_likely_format`` is set.
+        as ``self._input_dir``.  Based on the number of HTML
+        files discovered, a ``_likely_format`` is set.
         """
         from os import listdir  # to inspect directory
         import re               # to massage filenames
             input_dir = './'
         directory = listdir(input_dir)
 
-        # tex4ht builds a HTML files and a CSS file
+        # tex4ht builds HTML files and a CSS file
         # Infer basename of project from single CSS file in directory
         # Use this to find all HTML files
         cssfiles = [afile for afile in directory if afile.endswith('.css')]
         basename = cssfilename[:-4]
 
         # Find all html files
-        # $ matches end-of-string, avoids backups with tildes (Robert Marik)
+        # $ matches end-of-string, avoids backup-files with tildes (Robert Marik)
         htmlfile_pattern = re.compile( r'^' + basename + r'(li|)([0-9]*)(.html)$' )
         files = []
         for afile in directory:
             if m:
                 ws_number = m.group(2)
                 # Main HTML file does not get a number from tex4ht
-                # Fits best as worksheet 0
+                # Fits best as worksheet 0 when there are multiple files
                 if not ws_number:
                     ws_number = '0'
                 files.append((ws_number, afile))
         if len(files) == 1:
             self._likely_format = 'sws'
         else:
+            # Need a new Sage container format here
             self._likely_format = 'tar'
         self._input_dir = input_dir
         self._basename = basename
         import re     # regular expressions for parsing
 
         #  Using verbatim environments for Sage code
-        #  allows some XML escape codes to slip through
-        #  <,> are two obvious ones and easy to handle
-        #  The XML escape character, &, is trickier
+        #  allows some XML escape codes to slip through.
+        #  <,> are two obvious ones and easy to handle.
+        #  The XML escape character, &, is trickier.
         #  We only protect against breaking character
-        #  codes like &#1234;  but not  codes like &lt;
+        #  codes like &#1234;  but not codes like &lt;.
         #
         #  Recognize when sage cells begin or end
         sage_start_pattern = re.compile( r'(.*)<sage>(.*)' )
         sage_block = False
         xmlcontent = []
         html_file = open(html_name,'r')
+        ## count = 0
         for aline in html_file.readlines():
-            if sage_block and re.match(sage_end_pattern, aline):
+            if sage_block and sage_end_pattern.match(aline):
                 sage_block = False
             elif sage_block:
-                pieces = re.split( ampersand_pattern, aline )
+                pieces = ampersand_pattern.split(aline)
                 if len(pieces)>1:
                     for i in range(len(pieces)):
                         if pieces[i] == '&':
                     aline = ''.join(pieces)
                 aline = aline.replace('<', r'&#60;')
                 aline = aline.replace('>', r'&#62;')
-            elif not(sage_block) and re.match(sage_start_pattern, aline):
+            elif not(sage_block) and sage_start_pattern.match(aline):
                 sage_block = True
             xmlcontent.append(aline)
+            ## count+=1
+            ## print count, aline
 
         # Can now parse valid XHTML
         tree = dom.parseString( ''.join(xmlcontent) )
 
         # Find and modify links in place
         if linkbase:
-            link_pattern = re.compile( r'^'+linkbase+'(li|)([0-9]*)(.html)(.*)$' )
+            link_pattern = re.compile( r'^' + linkbase + r'(li|)([0-9]*)(.html)(.*)$' )
             for e in tree.getElementsByTagName('a'):
                 attr = e.attributes
                 if e.hasAttribute('href'):
         nb.export_worksheet(W.filename(), basename+'.sws')
         return basename+'.sws'
 
+    def _pure_python(self, basename):
+        r"""
+        EXPERIMENTAL:
+        Build an sws file without any notebook code.
+        Assumes just a single file of HTML.
+        Edit shebang to just call python, not sage
+        """
+        import time  # for last change in pickled worksheet info
+        import tempfile
+        import tarfile
+        import cPickle
+        import os
+        import StringIO
+
+        css_name = basename + '.css'
+        html_name = basename + '.html'
+
+        # Break out tex4ht output
+        title, graphics, cells = self._parse_tex4ht(html_name, None)
+
+        # Piece back together in worksheet format
+        content=[]
+        content.append( r'<link type="text/css" rel="stylesheet" href="' + css_name + r'" />' )
+        graphics.append(css_name)
+
+        # Recognize cells, adorn compute cells
+        for c in cells:
+            if c[0] == 'plain':
+                content.append(c[1])
+            if c[0] == 'compute':
+                content.append('{{{' + c[1] + '}}}')
+
+        # Make a generic worksheet configuration as a Python dictionary
+        basic = {
+            'name':title,
+            'system':'sage',
+            'owner':'admin',
+            'last_change':('admin', time.time()),
+            }
+
+        # Build sws as a tar file, with expected name
+        prefix = self._input_dir + 'sage_worksheet/'
+        T = tarfile.open(basename + '.sws', 'w:bz2')
+
+        # Pickled configuration file
+        fd, configfile =  tempfile.mkstemp()
+        config = cPickle.dumps(basic)
+        open(configfile, 'w').write(config)
+        T.add(configfile, prefix + 'worksheet_conf.pickle')
+        os.unlink(configfile)
+        os.fdopen(fd,'w').close()
+
+        # Worksheet files, new and old styles
+        body = ''.join(content).encode('ascii', 'xmlcharrefreplace')
+        fd, wsfile =  tempfile.mkstemp()
+        open(wsfile, 'w').write(body)
+        T.add(wsfile, prefix + 'worksheet.html')
+        os.unlink(wsfile)
+        os.fdopen(fd,'w').close()
+        #  For older versions of notebook, backward compatible
+        #  Just have two extra lines of info in header
+        header = [title, '\n', 'system:', basic['system'], '\n']
+        header = ''.join(header).encode('ascii', 'xmlcharrefreplace')
+        fd_old, oldwsfile =  tempfile.mkstemp()
+        open(oldwsfile, 'w').write(header + body)
+        T.add(oldwsfile, prefix + 'worksheet.txt')
+        os.unlink(oldwsfile)
+        os.fdopen(fd_old,'w').close()
+        #  End backward compatibility
+
+        # Data files, graphics, css, whatever
+        dataprefix = prefix + 'data/'
+        for f in graphics:
+            T.add(f, dataprefix + f)
+
+        T.close()
+
 
     def _create_tar_archive(self, basename):
         # this is all ad-hoc for testing
             self._create_single_sws(self._basename)
         if format == 'tar':
             self._create_tar_archive(self._basename)
-        # testing parsing, not permanent
+        ## Calls to testing routines, not permanent
         if format == 'xml-test':
             print self._parse_tex4ht(self._basename+'.html', self._basename)
+        if format == 'pure-python':
+            print self._pure_python(self._basename)
 
 ############################
 # Main
 
 t2s = TeXtoSWS()
 t2s.convert()
+## Testing, experimental calls
+## t2s.convert(format = 'pure-python')
 ## t2s.convert(format='xml-test')
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.