rbeezer avatar rbeezer committed 59836b3

Command-line switches, pure python, better handling of filenames and paths

Comments (0)

Files changed (1)

-#! /usr/bin/env sage
-#
-# Use sage to pick up sagenb notebook library
-# To fully test experimental pure Python, replace "sage" with "python"
+#! /usr/bin/env python
 
 ################################################################################
 #            Copyright 2010 Robert A. Beezer <beezer@ups.edu>
 
 class TeXtoSWS(object):
 
-    def __init__(self, input_dir=None ):
+    def __init__(self, input_dir=None, basename=None, output_file=None):
         r"""
-        Discover as much as possible about files that were
-        output by tex4ht to a directory.
+        Configure the working environment for a conversion,
+        making educated guesses when lacking explicit information.
 
         INPUT:
 
         - ``input_dir`` - a directory that contains all of the
-        output from a run of tex4ht on a latex file.  This
-        directory should contain the associated graphics
-        files but we locate them later in the HTML sources.
+          output from a run of tex4ht on a latex file.  This
+          directory should contain the associated graphics
+          files but we locate them later in the HTML sources.
+          If input as ``None`` the current directory is used.
+
+        - ``basename`` - a string that describes the base
+          for the filenames created by tex4ht.  For example,
+          if the original tex file is ``foo.tex`` then the
+          basename is ``foo`` and all of the tex4ht output
+          files hanve names beginning with this string.
+          If input as ``None`` then the ``input_dir`` is
+          searched for the existence of exactly one CSS file
+          and the basename is derived from that.
+
+        - ``output_file`` - name for output Sage worksheet.
+          If input as ``None`` then it will be located in the
+          ``input_dir`` using the value of the ``basename``
+          and the extension ``.sws``.
 
         OUTPUT:
+        Besides the items mentioned above several other
+        items are also computed and recorded in this routine.
 
-        Several items are set here.  One is a ``basename`` which
-        tex4ht will have derived from the original LaTeX source
-        file.  So if we begin with ``foo.tex`` all of the
-        files involved will begin with ``foo``.
+        First is a list of pairs.  The second part of each pair
+        is the filename for an HTML file.  The first part of the
+        pair is the (relative) worksheet number for that file,
+        with counting starting at zero, and as a string (not
+        an integer).  The pairs are sorted according to the
+        numerical value of this first string.
 
-        Another item returned is a list of pairs.  The
-        second part of each pair is the filename for an
-        HTML file.  The first part of the pair is the
-        (relative) worksheet number for that file, with
-        counting starting at zero, and as a string (not
-        an integer). The pairs are sorted according to
-        the numerical value of this first string.
-
-        The directory where all these files live is recorded
-        as ``self._input_dir``.  Based on the number of HTML
-        files discovered, a ``_likely_format`` is set.
+        Based on the number of HTML files discovered,
+        a ``_likely_format`` is set.
         """
-        from os import listdir  # to inspect directory
-        import re               # to massage filenames
+        import os        # getcwd, listdir,
+        import os.path   # splitext
+        import re        # to massage filenames
 
         if not input_dir:
-            input_dir = './'
-        directory = listdir(input_dir)
+            input_dir = os.getcwd()
+        directory = os.listdir(input_dir)
 
-        # tex4ht builds HTML files and a CSS file
+        # tex4ht builds HTML file(s) and a single CSS file
         # Infer basename of project from single CSS file in directory
-        # Use this to find all HTML files
-        cssfiles = [afile for afile in directory if afile.endswith('.css')]
-        if len(cssfiles) != 1:
-            raise ValueError('no CSS file, or multiple CSS files in directory')
-        cssfilename = cssfiles[0]
-        basename = cssfilename[:-4]
+        cssfiles = [afile for afile in directory if os.path.splitext(afile)[1]=='.css']
+        #print 'DIR: ', directory
+        #print 'CSS: ', cssfiles
+        if not basename:
+            if len(cssfiles) != 1:
+                raise ValueError('need exactly one CSS file in %s directory to determine project' % input_dir)
+            else:
+                basename = cssfiles[0][:-4]
+
+        # Use input_dir and basename to form default worksheet filename
+        if not output_file:
+            output_file = os.path.join(input_dir, basename + '.sws')
 
         # Find all html files
         # $ matches end-of-string, avoids backup-files with tildes (Robert Marik)
             self._likely_format = 'tar'
         self._input_dir = input_dir
         self._basename = basename
+        self._output_file = output_file
         self._files = files
 
 
 
     def _convert_one_file(self, html_name, css_name, nb, user, linkbase=None):
         r"""
+        OBSOLETE
+        MAYBE FILENAMES/PATHS ARE BROKEN IN FULLY GENERAL USE
+        
         Create a single worksheet from a parsed tex4ht XHTML file.
 
         INPUT:
         return W
 
 
-    def _create_single_sws(self, basename):
+    def _old_create_single_sws(self, basename):
         r"""
-        Creates a single Sage worksheet in a portable sws format from a one-section LaTeX document.
+        OBSOLETE
+        MAYBE FILENAMES/PATHS ARE BROKEN IN FULLY GENERAL USE
 
-        INPUT:
-
-        - `basename` - a string. This is the basename of the original
-        LaTeX input file and the basename of the tex4ht output.
-        So, for example, suppose your original file is foo.tex, and
-        when processed by tex4ht it produces an HTML/jsMath file called
-        foo.html, and an associated CSS file foo.css.  You would provide
-        `foo` as the input sting, and would end up creating ``foo.sws``.
-        So this routine will create a single worksheet faithfully representing
-        the original intent in the LaTeX file and possibly including Sage
-        compute cells. This assumes the necessary files are in the current
-        working directory.
-
-        OUTPUT:  This routine creates a file  foo.sws  in the current working directory.
-        The return value is simply this filename as a string.
-        """
         # We make a temporary notebook to work in
         # This is located in $HOME/.sage/temp/hostname/pid/
         # Temporary directory gets deleted automatically (as process ends?)
+        """
         from sage.misc.misc import tmp_dir
         from sagenb.notebook.notebook import Notebook
         nbdir = tmp_dir() + 'converter.sagenb'
         nb.export_worksheet(W.filename(), basename+'.sws')
         return basename+'.sws'
 
-    def _pure_python(self, basename):
+    def _create_single_sws(self):
         r"""
-        EXPERIMENTAL:
-        Build an sws file without any notebook code.
-        Assumes just a single file of HTML.
-        Edit shebang to just call python, not sage
+        Creates a single Sage worksheet in a portable sws format from a one-section LaTeX document.
+
+        This routine creates a worksheet "from scratch" using just Python
+        and none of the notebook code.  This makes for quicker startup times
+        and the ability to run without Sage present.
+
+        OUTPUT:  This routine creates a Sage worksheet in the file whose
+        name is ``self._output_file``. The return value is simply this
+        filename as a string.
         """
         import time  # for last change in pickled worksheet info
         import tempfile
         import tarfile
         import cPickle
         import os
-        import StringIO
+        import os.path
 
-        css_name = basename + '.css'
-        html_name = basename + '.html'
+        input_dir = self._input_dir
+        basename = self._basename
+        output_file = self._output_file
 
         # Break out tex4ht output
+        # There should not be any cross-worksheet links,
+        #   so we don't pass a base for the linking URLs
+        html_name = os.path.join(input_dir, basename + '.html')
         title, graphics, cells = self._parse_tex4ht(html_name, None)
 
         # Piece back together in worksheet format
+        # "content" will be the text representation of a worksheet
         content=[]
+
+        # We first link in the CSS information from the data directory
+        # The CSS file should be in with the graphics files, so doesn't need a path,
+        # We add it to the data directory along with all the graphics files
+        css_name = basename + '.css'
         content.append( r'<link type="text/css" rel="stylesheet" href="' + css_name + r'" />' )
         graphics.append(css_name)
 
             'last_change':('admin', time.time()),
             }
 
-        # Build sws as a tar file, with expected name
-        prefix = self._input_dir + 'sage_worksheet/'
-        T = tarfile.open(basename + '.sws', 'w:bz2')
+        # Build sws as a tar file, with expected naming conventions
+        prefix = 'sage_worksheet'
+        T = tarfile.open(output_file, 'w:bz2')
 
         # Pickled configuration file
         fd, configfile =  tempfile.mkstemp()
         config = cPickle.dumps(basic)
         open(configfile, 'w').write(config)
-        T.add(configfile, prefix + 'worksheet_conf.pickle')
+        T.add(configfile, os.path.join(prefix, 'worksheet_conf.pickle'))
         os.unlink(configfile)
         os.fdopen(fd,'w').close()
 
         body = ''.join(content).encode('ascii', 'xmlcharrefreplace')
         fd, wsfile =  tempfile.mkstemp()
         open(wsfile, 'w').write(body)
-        T.add(wsfile, prefix + 'worksheet.html')
+        T.add(wsfile, os.path.join(prefix, 'worksheet.html'))
         os.unlink(wsfile)
         os.fdopen(fd,'w').close()
         #  For older versions of notebook, backward compatible
         header = ''.join(header).encode('ascii', 'xmlcharrefreplace')
         fd_old, oldwsfile =  tempfile.mkstemp()
         open(oldwsfile, 'w').write(header + body)
-        T.add(oldwsfile, prefix + 'worksheet.txt')
+        T.add(oldwsfile, os.path.join(prefix, 'worksheet.txt'))
         os.unlink(oldwsfile)
         os.fdopen(fd_old,'w').close()
         #  End backward compatibility
 
         # Data files, graphics, css, whatever
-        dataprefix = prefix + 'data/'
+        dataprefix = os.path.join(prefix, 'data')
         for f in graphics:
-            T.add(f, dataprefix + f)
+            base = os.path.split(f)[1]
+            T.add(os.path.join(input_dir, f), os.path.join(dataprefix, base))
 
         T.close()
-
+        return output_file
 
     def _create_tar_archive(self, basename):
         # this is all ad-hoc for testing
         T.add('linear')
         T.close()
 
-    def convert(self, dir = None, format = None):
+
+    def convert(self, format = None):
         r"""
         The one public method.
         """
         if not format:
             format = self._likely_format
-        if not dir:
-            dir = self._input_dir
         # pass a directory to _create_single_sws?
         if format == 'sws':
-            self._create_single_sws(self._basename)
+            self._create_single_sws()
         if format == 'tar':
             self._create_tar_archive(self._basename)
         ## Calls to testing routines, not permanent
 # Main
 ############################
 #
+# Analyze command line
 # Create converter class
 # Call convert()
 
-t2s = TeXtoSWS()
+from optparse import OptionParser
+optparse = OptionParser(usage = r"""usage: %prog [options]""")
+optparse.add_option("-v", "--verbose",
+                    action = "store_true", dest = "verbose", default = False,
+                    help = "print progress messages")
+
+optparse.add_option("-i", "--input_directory",
+                    action = "store", dest = "input_dir",
+                    help = "input directory with tex4ht output")
+
+optparse.add_option("-b", "--basename",
+                    action = "store", dest = "basename",
+                    help = "project basename, eg foo.tex has basename 'foo'")
+
+optparse.add_option("-o", "--output_file",
+                    action = "store", dest = "output_file",
+                    help = "filename for Sage worksheet")
+opts, args = optparse.parse_args()
+
+# Build a converter
+t2s = TeXtoSWS(input_dir = opts.input_dir,
+               basename = opts.basename,
+               output_file = opts.output_file)
+if opts.verbose:
+    print "Job: Converting %s project in %s to %s." % (t2s._basename, t2s._input_dir, t2s._output_file)
+
+# Do the conversion
 t2s.convert()
 ## Testing, experimental calls
-## t2s.convert(format = 'pure-python')
 ## t2s.convert(format='xml-test')
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.