Commits

Olemis Lang committed 36b7e5b

Comments (0)

Files changed (133)

+Copyright (c) 2005 Kristian Ovaska
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. The name of the author may not be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+include LICENSE.txt
+include MANIFEST.in
+include README.txt
+include fsl/*.py
+include fsl/test/testdir/*
+include fsl/test/fsltool_files/*
+include fsl/test/interpreter_files/*
+include fsl/test/lexer_files/*
+include fsl/test/make_struct.sh
+include fsl/test/struct.txt
+include doc/fsl.css
+include doc/fsltool.html
+include doc/index.html
+include doc/language.html
+include doc/todo.txt
+prune *.bak
+prune */old/*
+Metadata-Version: 1.0
+Name: fsl
+Version: 0.5
+Summary: File Selection Language
+Home-page: http://www.cs.helsinki.fi/u/hkovaska/fsl/
+Author: Kristian Ovaska
+Author-email: kristian.ovaska [at] helsinki.fi
+License: BSD
+Description: File Selection Language (FSL) is a descriptive language for file selection.
+        It is used to selectively pick files from a directory structure.
+        FSL is useful for selective backups, for instance. FSL uses glob patterns
+        as the basic building block. For fine-tuning the selection,
+        inclusion/exclusion rule combinations and conditional expressions are
+        available. File size and modification date can be used in expressions.
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: System Administrators
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Operating System :: POSIX
+Classifier: Programming Language :: Python
+Classifier: Topic :: System :: Archiving
+Author: Kristian Ovaska (kristian.ovaska [at] helsinki.fi) 
+WWW: http://www.cs.helsinki.fi/u/hkovaska/fsl/
+Version: 0.5
+
+File Selection Language (FSL) is a descriptive language for
+file selection. It is used to selectively pick files from
+a directory structure. FSL is useful for selective backups,
+for instance.
+FSL uses glob patterns as the basic building block. For
+fine-tuning the selection, inclusion/exclusion rule combinations and
+conditional expressions are available. File size and modification
+date can be used in expressions.
+
+For further documentation, see doc/index.html.
+body {
+    color: black;
+    background: #eff;
+    font-family: arial, helvetica, sans-serif;
+}
+
+a:link    { color: #00f; }
+a:visited { color: #417; }
+a:hover   { background: #cef; }
+a:active  { color: #f00; }
+
+p {
+    margin-left: 1em;
+    margin-right: 1.5em;
+}
+
+li { margin-right: 1.5em; }
+
+h1 { font-size: 150%; }
+h2 { font-size: 130%; }
+h3 { font-size: 115%; margin-left: 0.5em; }
+h4 { font-size: 107%; }
+
+pre.code {
+    color: #000;
+    background-color: #dee;
+    border-color: #888;
+    border-style: solid;
+    border-width: 1pt;
+    padding: 2pt;
+    margin-left: 3em;
+}
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+        "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html lang="en">
+
+<head>
+    <title>File Selection Language</title>
+    <link rel="stylesheet" href="fsl.css" type="text/css">
+    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+    <meta name="author" content="Kristian Ovaska">
+</head>
+
+<body>
+
+<h1>fsltool</h1>
+
+<p>
+Fsltool is a command-line tool for File Selection Language.
+It takes the root directory and one or more rule files as
+parameter and outputs a list of files. Optionally, it can
+print statistics about the files included.
+
+<h2>Usage</h2>
+<pre>
+usage: fsltool.py [options] &lt;source-filenames&gt;
+
+options:
+  -h, --help            show this help message and exit
+  --ast                 Print Abstract Syntax Tree only, don't run interpreter
+  -d                    Print detailed information
+  -l                    Print reLative filename instead of absolute
+  --lexer               Print lexer output only, don't run interpreter
+  -o OUTFILE            Write file list into file instead of stdout
+  -q                    Surround file name with quote marks
+  -r ROOTDIR            Root directory for relative filenames [.]
+  --reldir=PRINTING_RELATIVE_DIR
+                        With -l, print filenames relative to this directory
+                        instead of "."
+  -s                    Print statistics about the files included
+  -t STATS_LIMIT        Threshold in bytes for -s to print directory stats at
+                        scan time [1]
+</pre>
+
+<h2>Example, basic</h2>
+
+<p>
+In the following example, fsltool.py was run in the FSL directory
+with <tt>../doc</tt> as root directory for relative glob patterns.
+The only rule in <tt>rule.fsl</tt> is <tt>*</tt>.
+The files are printed relative to the <tt>../</tt> directory
+(<tt>-l</tt> and <tt>--reldir</tt> options).
+
+<pre>
+[~/src/python/fsl/fsl] python fsltool.py -r ../doc -l --reldir ../ rule.fsl
+doc/devel/architecture.dia
+doc/devel/ast.dia
+doc/devel/desing.txt
+doc/devel/frontend.dia
+doc/devel/interpreter.dia
+doc/fsl.css
+doc/fsltool.html
+doc/fsltool.html.bak
+doc/grammar.txt
+doc/index.html
+doc/language.html
+doc/todo.txt
+</pre>
+
+
+<h2>Example, statistics</h2>
+
+<p>
+In the following example, fsltool.py was run with the FSL directory
+<tt>.</tt> as root directory and the statistics option
+<tt>-s</tt> on.
+With <tt>-s</tt>, fsltool doesn't print filenames
+(except with <tt>-o</tt>).
+
+<pre>
+[~/src/python/fsl/fsl] python fsltool.py rule.fsl -l -s
+   3 files,    343 B test/fsltool_files
+  52 files,   5.4 KB test/interpreter_files
+  20 files,    818 B test/lexer_files
+   1 files,  22.7 KB test/old
+   3 files,    125 B test/testdir/dir xyz
+   2 files,    120 B test/testdir/dir1/dir2/dir3
+   3 files,    130 B test/testdir/dir1/dir2
+   5 files,    275 B test/testdir/dir1
+  16 files,    850 B test/testdir
+ 119 files, 153.5 KB test
+ 158 files, 327.2 KB .
+
+TOTAL   :   158 files, 327.2 KB, avg size   2.1 KB
+
+*.py    :    32 files, 176.2 KB, avg size   5.5 KB, 54%
+*.pyc   :    26 files, 141.7 KB, avg size   5.4 KB, 43%
+*.      :    91 files,   8.0 KB, avg size     89 B,  2%
+*.txt   :     2 files,   1.0 KB, avg size    508 B,  0%
+*.sh    :     2 files,    244 B, avg size    122 B,  0%
+*.bak   :     1 files,     85 B, avg size     85 B,  0%
+*.exe   :     1 files,     30 B, avg size     30 B,  0%
+*.fst   :     1 files,      9 B, avg size      9 B,  0%
+*.fsl   :     1 files,      1 B, avg size      1 B,  0%
+*.hidden:     1 files,      0 B, avg size      0 B,  0%
+
+* Directories *
+ 39 files, 173.7 KB, 53% .
+ 27 files, 123.4 KB, 38% test
+  1 files,  22.7 KB,  7% test/old
+ 52 files,   5.4 KB,  2% test/interpreter_files
+ 20 files,    818 B,  0% test/lexer_files
+  8 files,    450 B,  0% test/testdir
+  3 files,    343 B,  0% test/fsltool_files
+  2 files,    145 B,  0% test/testdir/dir1
+  3 files,    125 B,  0% test/testdir/dir xyz
+  2 files,    120 B,  0% test/testdir/dir1/dir2/dir3
+
+* Files *
+37.5 KB  test/test_interpreter.py
+32.0 KB  test/test_interpreter.pyc
+22.7 KB  test/old/test_interpreter_old.py
+15.4 KB  fslparser.py
+12.7 KB  Interpreter.py
+11.0 KB  AST.pyc
+10.8 KB  fslparser.pyc
+10.8 KB  Interpreter.pyc
+9.0 KB   fsltool.py
+8.5 KB   AST.py
+</pre>
+
+<p>
+<hr>
+Up: <a href="index.html">FSL index</a><br>
+Updated 2005-08-21
+</body>
+</html>
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+        "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html lang="en">
+
+<head>
+    <title>File Selection Language</title>
+    <link rel="stylesheet" href="fsl.css" type="text/css">
+    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+    <meta name="author" content="Kristian Ovaska">
+</head>
+
+<body>
+
+<h1>File Selection Language</h1>
+
+<p>
+WWW: <a href="http://www.cs.helsinki.fi/u/hkovaska/fsl/">
+     http://www.cs.helsinki.fi/u/hkovaska/fsl/</a> <br>
+Author: <a href="http://www.cs.helsinki.fi/u/hkovaska/">Kristian Ovaska</a>
+        (kristian.ovaska [at] helsinki.fi) <br>
+License: BSD<br>
+Version: 0.5 (2005-10-21)<br>
+Requirements: Python 2.3 or greater<br>
+Platforms: Unix, Windows
+
+<p>
+File Selection Language (FSL) is a descriptive language for
+file selection. It is used to selectively pick files from
+a directory structure. FSL is useful for selective backups,
+for instance.
+FSL uses glob patterns as the basic building block. For
+fine-tuning the selection, inclusion/exclusion rule combinations and
+conditional expressions are available. File size and modification
+date can be used in expressions.
+
+<p>
+See documentation:
+<ul>
+<li><a href="language.html">File Selection Language Reference</a>
+<li><a href="fsltool.html">fsltool</a> usage (command-line tool)
+</ul>
+
+<!-- UNCOMMENT for WWW index
+<h2 id="download">Download</h2>
+<p>
+FSL 0.5: 
+<a href="fsl-0.5.zip">fsl-0.5.zip</a> (72 KB)
+-->
+
+<h2>Example</h2>
+
+<pre class="code">
+IN home
+    *.gif, *.jpg, *.png
+    IN alfred
+        EACH f IN *.ps, *.eps IF age(f) < 365
+        *.pdf NONREC   # non-recursive
+
+usr
+NOT usr/local/*.sh
+
+NOT EACH f IN a* IF size(f) &gt 1000000 OR extract(date(f), "year") = 2001
+</pre>
+
+<p>
+This example program:
+<ul>
+<li>Includes all <tt>*.gif</tt>, <tt>*.jpg</tt>, <tt>*.png</tt> files
+    under <tt>/home</tt> (and its subdirectories)
+<li>Includes <tt>*.ps</tt>, <tt>*.eps</tt> files under
+    <tt>/home/alfred</tt> (and its subdirectories) that have been
+    modified in the past year
+<li>Includes <tt>*.pdf</tt> files located in <tt>/home/alfred</tt>, but
+    not in any subdirectories of <tt>/home/alfred</tt>
+<li>Includes all files under <tt>/usr</tt> ...
+<li>... except <tt>*.sh</tt> files under <tt>/usr/local</tt>
+    (in any subdirectory)
+<li>Excludes all files beginning with <tt>a</tt> with size
+    greater than 1,000,000 bytes or modification date in the year 2001.
+    This rule applies to all files that were matched above.
+</ul>
+
+<h2>Features</h2>
+
+<p>
+FSL can be used with a command line tool (<a href="fsltool.html">fsltool</a>)
+or, for Python programmers, with a programmable interface. For the Python
+interface, see the documentation of <tt>Interpreter.py</tt>.
+
+<p>
+Several FSL rule files can be combined in a cascading manner similar to CSS.
+The effect is the same as if the rule files were pasted into
+a single file.
+
+<p>
+Support for both Windows-like and Unix-like paths.
+
+<p>
+Strict parse-time type checking to catch as many errors as possible
+before run-time. For example, you can't say
+<code>EACH f IF size(5) &gt 1000</code> because function
+<code>size</code> expects filename argument.
+
+<h2>Installation</h2>
+
+<ol>
+<li>Download FSL
+<li>Unpack the archive
+<li>Install FSL:
+    <ul>
+        <li>If you have Python 2.4 or greater, and you want to install
+            automatically, run <code>python setup.py install</code>.
+            This installs FSL into your <tt>site-packages</tt> directory.
+            You must have write permissions under <tt>site-packages</tt>.
+        <li>If you have Python 2.3, you must install manually.
+            If you plan to use FSL from the command line interface only,
+            copy the FSL file tree into any directory of your choosing.
+            If you plan to use FSL as a Python module as well,
+            copy the fsl/ directory somewhere on your Python search
+            path. To make sure you installed FSL correctly on the search
+            path, execute <code>import fsl.fsltool</code> in a Python
+            session.
+    </ul>
+<li>Test that the installation works by executing <code>testall.py</code>
+    under <tt>fsl/test/</tt>. The tests take some time to finish.
+    Expected result: <em>one</em> test fails
+    (<code>test_lexer.TestLexer.test_invalid</code>), all others pass.
+    If you get this result, you can assume the program is installed correctly.
+    If more tests fail, try to see if there is something obviously
+    wrong with the installation or environment; if not, contact the author.
+<li>Next, you can start playing with fsltool (<tt>fsl/fsltool.py</tt>)
+</ol>
+
+<p>
+<hr>
+Updated 2005-10-21
+</body>
+</html>

doc/language.html

+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+        "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html lang="en">
+
+<head>
+    <title>File Selection Language Reference</title>
+    <link rel="stylesheet" href="fsl.css" type="text/css">
+    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+    <meta name="author" content="Kristian Ovaska">
+</head>
+
+<body>
+
+<h1>File Selection Language Reference</h1>
+
+<p>
+Version 0.5 (2005-10-21)<br>
+Kristian Ovaska (kristian.ovaska [at] helsinki.fi)
+
+<h2>Contents</h2>
+
+<ul>
+
+<li><a href="#overview">1. Overview</a>
+<li><a href="#general-syntax">2. General syntax</a>
+<li><a href="#glob-patterns">3. Glob patterns</a>
+<li><a href="#rules">4. Rules</a>
+    <ul>
+    <li><a href="#glob-list">4.1 Glob list rule</a>
+    <li><a href="#for-each">4.2 For-each rule</a>
+    <li><a href="#in-block">4.3 IN-block</a>
+    <li><a href="#if-block">4.4 IF-block</a>
+    </ul>
+<li><a href="#expressions">5. Expressions</a>
+    <ul>
+    <li><a href="#expressions-general">5.1 General</a>
+    <li><a href="#functions">5.2 Built-in functions</a>
+    </ul>
+<li><a href="#eval-order">6. Rule evaluation order</a>
+<!--
+<li><a href="#examples">7. Examples</a>
+-->
+</ul>
+
+<h2 id="overview">1. Overview</h2>
+
+<p>
+File Selection Language (FSL) is a descriptive language for
+file selection. A FSL program, also called a rule set, is
+made out of rules. Each rule tells whether a file should
+or should not be included in the file set.
+
+<p>
+FSL rules utilize glob patterns. The pattern <tt>*</tt> matches
+all files, <tt>dir1/*</tt> matches all files under dir1,
+<tt>dir1/somefile</tt> matches only the file dir1/somefile, and
+so on.
+However, FSL rules are not limited to bare globs. See
+<a href="#rules">below</a> for a full specification of the rules.
+
+<p>
+There are two basic kinds of rules: inclusive and exclusive.
+Inclusive rules tell that certain files should be included
+in the file set. When evaluating an inclusive rule, the
+file system is scanned for all files matching the rule.
+
+<p>
+Exclusive rule (starts with "NOT") is an exception to an
+inclusive rule. It says that even if a file matched an
+inclusive rule earlier, it must be excluded from the file set.
+Notice that exclusive rules don't cause any file system
+scanning by themselves: all scanning comes from inclusive rules.
+
+<p>
+If you exclude a directory with an exclusive rule, you exclude
+all files and subdirectories of it as well. This is a good
+way to speed up file scanning.
+
+<h2 id="general-syntax">2. General syntax</h2>
+
+<p>
+The program consists of a list of rules, which are separated by newlines.
+Simple rules (usually) fit on one line, while complex ones span several lines.
+
+<p>
+The character # marks the beginning of a comment. Everything after it
+on the same line is ignored.
+
+<p>
+Inside so-called block rules (see <a href="#rules">below</a>), the child
+rules must be indented with spaces or tabs. You can choose the indentation
+level, but you must always use the same amount within the rule file.
+Mixing spaces and tabs is unwise. Nested blocks are indented in the
+same manner.
+
+<p>
+Simple (non-block) rules generally fit on one line. Due to the block
+indentation system, simple rules normally even can't span several lines;
+it is a syntax error.
+However, expressions (see <a href="#expressions">below</a>)
+that have open parenthesis can span several lines freely.
+Python programmers will recognize the FSL indentation system.
+
+<p>
+Everything is case-insensitive: keywords and glob patterns.
+
+<h2 id="glob-patterns">3. Glob patterns</h2>
+
+<p>
+When you write glob patterns, you can use two forms: bare strings
+and quoted strings. Bare strings are written as-is, while quoted
+strings have quotation marks around them.
+
+<p>
+Bare string: <tt>dir1/*</tt><br>
+Quoted string: <tt>"dir1/*"</tt>
+
+<p>
+There are limitations to bare strings. Bare strings:
+<ul>
+<li>may not contain whitespace
+<li>may not contain the characters <tt>, ( ) " &lt; &gt; = ( ) # | !</tt>
+<li>may not be a reserved word: AND, EACH, IF, IN, NONREC, NOT, OR
+<li>can't be used in expressions (see <a href="#expressions">below</a>)
+</ul>
+
+<p>
+For example, the pattern "aaa bbb" must be a quoted string.
+
+<p>
+Glob patterns may contain both forward slashes (<tt>/</tt>) and
+backward slashes (<tt>\</tt>). Forward slashes work on Windows, too,
+and backward slashes work on Unix. Glob patterns may contain full
+Windows drive specifiers (e.g. <tt>c:\somedir\*</tt>); they don't obviously
+work on Unix.
+
+<p>
+By default, glob patterns are recursive, i.e. <tt>*</tt> matches all files,
+including the subdirectories. You get nonrecursive behaviour by
+appending "NONREC" to the glob pattern. For example, <tt>* NONREC</tt>
+matches only the files in current root directory, but not in
+subdirectories.
+
+<p>
+There are two flavours of glob patterns: absolute and relative.
+Absolute patterns start with a forward or backward slash or a Windows
+drive specifier. A pattern that is not absolute is, logically, a
+relative pattern.
+
+<p>
+Relative glob patterns are evaluated in the context of a root
+directory. By default, the root directory is the current working
+directory, but may be set to any directory.
+For example, the rule <tt>*</tt> will produce all files in the
+file system if the root directory is the file system root, but only
+the files under <tt>/usr/local</tt> if the root directory
+is <tt>/usr/local</tt>.
+The root directory is given to the FSL interpreter as parameter. Also,
+so-called IN-blocks (see <a href="#in-block">below</a>) change the
+effective root directory temporarily. Absolute patterns are not
+allowed inside IN-blocks.
+
+<p>
+Usually, it is better to use relative globs, since they are more
+flexible than absolute globs.
+Absolute globs are always evaluated in the context of the same
+root directory, the file system root.
+Let's say you have created a relative rule-set for your Unix machine
+that you normally evaluate with <tt>/</tt> as the root directory.
+Some day, you mirror the file system into another Unix machine
+(or a Windows machine using Samba) into a directory <tt>/usr/somedir</tt>.
+Now, you can simply use your existing relative rule-set. This wouldn't
+be possible if you had hard-coded all the paths into the rules.
+
+
+<h2 id="rules">4. Rules</h2>
+
+<p>
+There are two basic rule types: <a href="#glob-list">glob list</a> rule
+and <a href="#for-each">for-each</a> rule.
+Both may be prefixed with "NOT", which makes them exclusive rules.
+There is also two compound rule types: <a href="#in-block">IN-block</a>
+and <a href="#if-block">IF-block</a>.
+
+<pre class="code">
+&lt;rule&gt; := (NOT)? &lt;glob-list&gt;
+        | (NOT)? &lt;for-each&gt;
+        | IN &lt;directory&gt; &lt;start-block&gt; &lt;rule&gt;+ &lt;end-block&gt;
+        | IF &lt;expression&gt; &lt;start-block&gt; &lt;rule&gt;+ &lt;end-block&gt;
+</pre>
+
+<h3 id="glob-list">4.1 Glob list rule</h3>
+
+<p>
+This is the most basic rule. Glob list rule is, as the
+name implies, a list of glob patterns separated by commas.
+A file matches a glob list rule if it matches any of the globs.
+
+In glob patterns, bare strings and quoted string may be mixed
+freely, as can recursive and nonrecursive (NONREC) glob patterns.
+
+<p>
+Format:
+<pre class="code">
+&lt;glob-pattern&gt; (, &lt;glob-pattern&gt;)* (IF &lt;expression&gt;)?
+</pre>
+
+<p>
+The IF-expression is optional. If present, the
+glob list rule is applied only if the <a href="#expressions">expression</a>
+is true. The expression is evaluated only once, not for every
+file. When using expressions, you usually want to evaluate
+the expression for every file in turn. In this case,
+you have to use the <a href="#for-each">for-each</a> rule.
+
+<p>
+Examples:
+<pre class="code">
+usr/local/*
+somefile, "some file with spaces"
+*.gif, *.jpg, *.png
+NOT *.ps, *.eps
+    (excludes both *.ps and *.eps)
+*.html IF exists("index.html")
+    (include *.html files only if index.html is present)
+</pre>
+
+<h3 id="for-each">4.2 For-each rule</h3>
+
+<p>
+Format:
+<pre class="code">
+EACH &lt;variable name&gt; (IN &lt;glob list&gt;)? IF &lt;expression&gt;
+</pre>
+
+<p>
+For-each rule is an enchanced glob list. Each file matched by
+the glob list is included/excluded only if the <a href="#expressions">expression</a>
+matches.
+The expression is evaluated for every file in turn.
+
+<p>
+The IN-section is optional. If omitted, the glob <tt>*</tt> is used.
+
+<p>
+Examples:
+<pre class="code">
+EACH f IN * IF size(f) &gt; 1024     (include files larger than 1 KB)
+EACH f IF size(f) &gt; 1024          (the same)
+NOT EACH f IN *.ps IF date(f) &lt; "2000" 
+  (excludes *.ps files from the previous millennium)
+</pre>
+
+<h3 id="in-block">4.3 IN-block</h3>
+
+<p>
+IN-block contains a list of rules that are executed in a different
+root directory. The effective root directory is calculated by
+concatenating the previous root directory and the directory given
+in the IN-block header.
+
+<p>
+The rules under the IN-block can be any rules: glob lists,
+for-each rules, or other IN-blocks.
+
+<p>
+Format:
+<pre class="code">
+IN &lt;directory&gt;
+    &lt;rule1&gt;
+    &lt;rule2&gt;
+    ...
+</pre>
+
+<p>
+All glob patterns must be relative. The directory specifier may
+be absolute if the IN-block is a top-level IN-block. In a nested
+IN-block, the directory specifier must also be relative.
+
+<p>
+Example:
+<pre class="code">
+IN dir1
+    *
+</pre>
+
+<p>This includes all files under dir1 and is exactly the same as the rule
+<pre class="code">
+dir1/*
+</pre>
+
+<p>
+Example of nested IN-blocks:
+<pre class="code">
+    IN dir1
+        IN dir2
+            IN dir3
+                *
+</pre>
+
+<p>
+This matches the files <tt>dir1/dir2/dir3/*</tt>.
+
+<h3 id="if-block">4.4 IF-block</h3>
+
+<p>
+IF-block is a bit like an glob list rule with an IF-expression,
+but an IF-block may contain several rules. The rules are applied
+only if the expression evaluates to true. The expression is
+evaluated only once.
+
+<p>
+Format:
+<pre class="code">
+IF &lt;expression&gt;
+    &lt;rule1&gt;
+    &lt;rule2&gt;
+    ...
+</pre>
+
+
+<h2 id="expressions">5. Expressions</h2>
+
+<h3 id="expressions-general">5.1 General</h3>
+
+<p>
+Expressions are used in for-each rules, glob list rules and IF-block rules
+to determine whether a rule should be applied. Each expression evaluates
+to true or false.
+
+<p>
+Expressions are made of:
+<ul>
+<li>integer, floating point, string and timestamp literals, e.g. <tt>50</tt>, 
+    <tt>5.23</tt>, <tt>"abc"</tt>, <tt>"2005-08-05 21:30"</tt>
+<li>variable references if inside a for-each rule, e.g. <tt>f</tt>
+<li>function calls, e.g. <tt>size(f)</tt>, <tt>now()</tt>
+<li>logical operators NOT, AND, OR, e.g. "<tt>NOT expr</tt>",
+    "<tt>expr1 AND expr2</tt>", "<tt>expr1 OR expr2</tt>"
+<li>comparison operators <tt>&lt; &lt;= &gt; &gt;= = !=</tt>
+    (these work for numbers, strings and timestamps)
+</ul>
+
+<p>
+Timestamp literals are written inside quotation marks just like strings.
+However, they are converted to a "real" timestamp representation internally.
+An invalid timestamp literal results in a parse error.
+Accepted timestamp formats are:
+<ul>
+<li><tt>yyyy</tt> (month=1, day=1, hour=0, minute=0, second=0)
+<li><tt>yyyy-mm</tt>
+<li><tt>yyyy-mm-dd</tt>
+<li><tt>yyyy-mm-dd hh:mm</tt>
+<li><tt>yyyy-mm-dd hh:mm:ss</tt>
+</ul>
+
+<p>
+Notice that logical NOT (inside an expression) is
+conceptually different from the exclusion NOT before a rule.
+Logical NOT merely reverses the truth value of an expression.
+
+<p>
+Expression format:
+<pre class="code">
+&lt;expression&gt; := &lt;simple-expression&gt; ((AND | OR) &lt;expression&gt;)?
+&lt;simple-expression&gt; := (NOT)? &lt;atom&gt; (&lt;compare-op&gt; &lt;atom&gt;)?
+                     | (NOT)? "(" &lt;expression&gt; ")"
+&lt;atom&gt; := &lt;string&gt;
+        | &lt;number&gt;
+        | &lt;variable-name&gt;
+        | &lt;function-name&gt; "(" &lt;atom&gt; ")"
+&lt;compare-op&gt; := "&lt;" | "&lt;=" | "&gt;" | "&gt;=" | "=" | "!="
+</pre>
+
+<p>
+Expressions with open parenthesis can span several lines, unlike
+normal simple FSL rules. Inside the expression, indentation doesn't
+matter.
+
+<p>
+Example:
+<pre class="code">
+EACH f IN *.txt IF (size(f) < 1000
+                  OR age(f) < 30)
+</pre>
+
+<p>
+The following example would be a syntax error because there
+are no (open) parenthesis:
+<pre class="code">
+EACH f IN *.txt IF size(f) < 1000
+                 OR age(f) < 30
+</pre>
+
+<h3 id="functions">5.2 Built-in functions</h3>
+
+<p>
+Expressions can use built-in functions, which can be divided into two
+categories: predicate and value functions. Predicate functions return
+a truth value and value functions return a value (like a number).
+Value function calls can't be used as complete expressions as themselves,
+they must be combined with comparison operators.
+
+<p>
+<table>
+<tr>
+    <th align="left">Function
+    <th align="left">Type
+    <th align="left">Description
+<tr>
+    <td><em>age(filename)</em>
+    <td><tt>filename -&gt; float</tt>
+    <td>Return age of file in days as floating point number (based on modification date)
+<tr>
+    <td><em>base(filename)</em>
+    <td><tt>filename -&gt; filename</tt>
+    <td>Return file name without (outermost) extension,
+    e.g. for filename <tt>"dir/aaa.ext"</tt>, return <tt>"dir/aaa"</tt>
+<tr>
+    <td><em>date(filename)</em>
+    <td><tt>filename -&gt; datetime</tt>
+    <td>Return modification date of file
+<tr>
+    <td><em>exists(filename)</em>
+    <td><tt>filename -&gt; boolean</tt>
+    <td>Return true if given file exists
+<tr>
+    <td><em>extract(time, part)</em>
+    <td><tt>datetime, string -&gt; int</tt>
+    <td>Extract part from timestamp. Part is one of "year", "month", "day",
+    "hour", "minute", "second", "week", "weekday".
+<tr>
+    <td><em>now()</em>
+    <td><tt>-&gt; datetime</tt>
+    <td>Return current time
+<tr>
+    <td><em>size(filename)</em>
+    <td><tt>filename -&gt; int</tt>
+    <td>Return size of file in bytes
+</table>
+
+
+<h2 id="eval-order">6. Rule evaluation order</h2>
+
+<p>
+Rules are evaluated from the first to the last and the last
+matching rule is applied.
+
+<p>
+For example, there are the following rules:
+
+<pre class="code">
+*
+NOT *.jpg
+</pre>
+
+<p>
+The first rule matches all files, but the second rule excludes
+all *.jpg files. As the result, all files except *.jpg files are
+included in the file set.
+
+<p>
+An ill-formed rule set:
+
+<pre class="code">
+NOT *.jpg
+*
+</pre>
+
+<p>
+This matches all files, including *.jpg files, because
+the last rule (*) tells to include all files. Also, no exclusive
+rule at the beginning of a rule set ever has any effect.
+Indeed, the FSL interpreter warns you in this case:
+    Warning: exclusive rule at beginning - has no effect
+
+<p>
+Usually, you should have inclusion rules at the beginning of the
+rule set and exclusion rules at the end.
+
+<!--
+<h2 id="examples">7. Examples</h2>
+
+<p>
+Includes config.ini file under root directory, if present.
+<pre class="code">
+config.ini
+</pre>
+
+<hr>
+<p>
+Includes config.ini files under all directories, except in my/prog
+and its subdirectories.
+<pre class="code">
+*/config.ini
+NOT my/prog
+</pre>
+-->
+
+<p>
+<hr>
+Up: <a href="index.html">FSL index</a><br>
+Updated 2005-10-21
+</body>
+</html>
+- code documentation
+
+- more test cases for date parsing
+
+- more test cases for absolute globs
+
+- test cases for parser, especially invalid samples
+
+- [abc] globs aren't tested
+
+- should --reldir default to the directory given with -l
+  instead of current directory?
+
+- @include pragma rule:
+    @include "somefile.fsl"
+
+- Unix shell script compiler?
+import os
+
+import globals
+import lexer
+import utils
+
+_running_id_number = 1
+
+_EXPR_TYPE_DICT = {
+    globals.E_BOOLEAN: 'boolean',
+    globals.E_INT: 'int',
+    globals.E_FLOAT: 'float',
+    globals.E_STRING: 'string',
+    globals.E_FILENAME: 'filename',
+    globals.E_DATETIME: 'datetime',
+
+    globals.E_STRING_FILENAME_DATETIME: '<string or filename or datetime>',
+}
+
+def type2str(expr_type):
+    if expr_type is None:
+        return '<UNKNOWN>'
+    if expr_type not in _EXPR_TYPE_DICT:
+        raise ValueError, 'Unknown expression type: %s' % expr_type
+    return _EXPR_TYPE_DICT[expr_type]
+
+
+class ASTNode:
+    def __init__(self, parent, rootdir):
+        if not rootdir:
+            raise ValueError, 'Rootdir not set'
+
+        assert '//' not in rootdir
+        assert '\\' not in rootdir
+        self.parent = parent
+        self.rootdir = utils.abspath(rootdir)
+        self.token = None
+
+        global _running_id_number
+        self.idnumber = _running_id_number
+        _running_id_number += 1
+
+    def visit(self, visitor, arg=None):
+        class_name = self.__class__.__name__
+        method = getattr(visitor, 'visit_' + class_name)
+        return method(self, arg)
+
+    def set_rootdir(self, rootdir):
+        self.rootdir = utils.abspath(rootdir)
+
+    def get_parents(self):
+        """Return list of parent nodes in order from direct parent
+        to root node."""
+        parents = []
+        cur_parent = self.parent
+        while cur_parent:
+            parents.append(cur_parent)
+            cur_parent = cur_parent.parent
+        return parents
+
+    def under_in_block(self):
+        """Return whether node is an IN-block or a (grand)child of an IN-block."""
+        for parent in self.get_parents():
+            if parent.rootdir != self.rootdir:
+                return True
+        return False
+
+### Rules ##############################################################
+
+class Rule(ASTNode):
+    def __init__(self, parent, rootdir):
+        ASTNode.__init__(self, parent, rootdir)
+
+    def innerdir_common_prefix(self):
+        """Return the common prefix of all innerdirs of the children
+        of the node."""
+        raise NotImplementedError
+
+class BlockRule(Rule):
+    def __init__(self, parent, rootdir):
+        Rule.__init__(self, parent, rootdir)
+        self.rules = []
+
+    def __str__(self):
+        if self.parent:
+            parentstr = '%d' % self.parent.idnumber
+        else:
+            parentstr= '-'
+        return 'Block rule (length %d, id %s, parent %s, rootdir "%s")' % (len(self.rules), self.idnumber, parentstr, self.rootdir)
+
+    def innerdir_common_prefix(self):
+        children_prefixes = [c.innerdir_common_prefix() for c in self.rules]
+        return os.path.commonprefix(children_prefixes)
+
+
+class IfBlockRule(BlockRule):
+    def __init__(self, parent, rootdir):
+        BlockRule.__init__(self, parent, rootdir)
+        self.if_expr = None
+
+    def __str__(self):
+        if self.parent:
+            parentstr = '%d' % self.parent.idnumber
+        else:
+            parentstr= '-'
+        return 'IF-block rule (length %d, id %s, parent %s, rootdir "%s")' % (len(self.rules), self.idnumber, parentstr, self.rootdir)
+
+
+class SelectorRule(Rule):
+    def __init__(self, parent, rootdir, inclusive):
+        Rule.__init__(self, parent, rootdir)
+        self.inclusive = inclusive
+
+
+class GloblistRule(SelectorRule):
+    def __init__(self, parent, rootdir, inclusive):
+        SelectorRule.__init__(self, parent, rootdir, inclusive)
+        self.glob_patterns = []
+        self.if_expr = None
+
+    def __str__(self):
+        if self.parent:
+            parentstr = '%d' % self.parent.idnumber
+        else:
+            parentstr= '-'
+        if self.inclusive:
+            exclstr = ''
+        else:
+            exclstr = 'EXCLUDE '
+        return 'Glob list %s(length %d, parent %s, rootdir "%s")' % (exclstr, len(self.glob_patterns), parentstr, self.rootdir)
+
+    def innerdir_common_prefix(self):
+        children_prefixes = [c.innerdir for c in self.glob_patterns]
+        return os.path.commonprefix(children_prefixes)
+
+
+class ForEachRule(SelectorRule):
+    def __init__(self, parent, rootdir, inclusive):
+        SelectorRule.__init__(self, parent, rootdir, inclusive)
+        self.variable_expr = None
+        self.glob_patterns = []
+        self.if_expr = None
+
+    def __str__(self):
+        if self.inclusive:
+            exclstr = ''
+        else:
+            exclstr = 'EXCLUDE '
+        return 'For each %s(variable "%s", %s globs, rootdir "%s")' % (exclstr, self.variable_expr.variable_name, len(self.glob_patterns), self.rootdir)
+
+    def innerdir_common_prefix(self):
+        children_prefixes = [c.innerdir for c in self.glob_patterns]
+        return os.path.commonprefix(children_prefixes)
+
+
+class GlobPattern(ASTNode):
+    def __init__(self, parent, rootdir, pattern, recursive):
+        ASTNode.__init__(self, parent, rootdir)
+        self.pattern = pattern.replace('\\', '/')
+        self.recursive = recursive
+        self.is_absolute = utils.filename_is_absolute(self.pattern)
+
+        if not self.is_absolute:
+            self.fullglob, self.innerdir = utils.calc_fullglob_innerdir(self.rootdir, self.pattern)
+        else:
+            self.fullglob = utils.abspath(self.pattern)
+            self.innerdir = utils.calc_innerdir(self.fullglob)
+
+    def __str__(self):
+        if not self.recursive:
+            return 'Glob: "%s" NONREC (innerdir "%s")' % (self.fullglob, self.innerdir)
+        else:
+            return 'Glob: "%s" (innerdir "%s")' % (self.fullglob, self.innerdir)
+
+### Expressions ########################################################
+
+class Expression(ASTNode):
+    def __init__(self, parent, rootdir, expr_type):
+        ASTNode.__init__(self, parent, rootdir)
+        self.expr_type = expr_type
+
+
+class AndExpression(Expression):
+    def __init__(self, parent, rootdir):
+        Expression.__init__(self, parent, rootdir, globals.E_BOOLEAN)
+        self.expr1 = None
+        self.expr2 = None
+
+    def __str__(self):
+        return 'AND expression'
+
+
+class OrExpression(Expression):
+    def __init__(self, parent, rootdir):
+        Expression.__init__(self, parent, rootdir, globals.E_BOOLEAN)
+        self.expr1 = None
+        self.expr2 = None
+
+    def __str__(self):
+        return 'OR expression'
+
+
+class NotExpression(Expression):
+    def __init__(self, parent, rootdir,):
+        Expression.__init__(self, parent, rootdir, globals.E_BOOLEAN)
+        self.expr = None
+
+    def __str__(self):
+        return 'NOT expression'
+
+
+class FunctionExpression(Expression):
+    def __init__(self, parent, rootdir, funcname):
+        Expression.__init__(self, parent, rootdir, None)
+        self.funcname = funcname
+        self.params = [] # [Expression]
+
+    def __str__(self):
+#        paramstr = ', '.join([str(p) for p in self.params])
+        return 'Function call (funcname "%s", %d params)' % (self.funcname, len(self.params))
+
+
+class CompareExpression(Expression):
+    def __init__(self, parent, rootdir, expr1, operator_type):
+        Expression.__init__(self, parent, rootdir, globals.E_BOOLEAN)
+        self.expr1 = expr1
+        self.expr2 = None
+        self.operator_type = operator_type
+
+    def __str__(self):
+        s = lexer.type2str(self.operator_type)
+        return 'Compare expression (type "%s")' % s
+
+
+class LiteralExpression(Expression):
+    def __init__(self, parent, rootdir, strvalue, atom_type):
+        Expression.__init__(self, parent, rootdir, atom_type)
+        self.strvalue = strvalue
+        self.datetime_value = None
+        self.filename_value = None
+
+    def __str__(self):
+        if self.expr_type in (globals.E_STRING, globals.E_STRING_FILENAME_DATETIME):
+            s = '"%s"' % self.strvalue
+        elif self.expr_type == globals.E_FILENAME:
+            s = self.filename_value
+        elif self.expr_type == globals.E_DATETIME:
+            s = self.datetime_value.strftime('%Y-%m-%d %H:%M:%S')
+        else:
+            s = self.strvalue
+        return 'Atom (%s, type %s)' % (s, type2str(self.expr_type))
+
+
+class VariableExpression(Expression):
+    def __init__(self, parent, rootdir, variable_name):
+        Expression.__init__(self, parent, rootdir, None)
+        self.variable_name = variable_name
+        self.variable_obj = None # Reference to symboltable.Variable
+
+    def __str__(self):
+        return 'Variable (%s, type %s)' % (self.variable_name, type2str(self.expr_type))

fsl/ASTChecker.py

+import AST
+import globals
+import utils
+import Visitor
+
+def filename_is_normalized(filename):
+    return utils.abspath(filename) == filename and '\\' not in filename
+
+class ASTChecker(Visitor.Visitor):
+    def __init__(self):
+        self.root_node_seen = False
+
+    def visit_BlockRule(self, astnode, arg):
+        assert astnode.rules
+        assert filename_is_normalized(astnode.rootdir)
+
+        if self.root_node_seen:
+            assert astnode.parent
+        if astnode.parent is None:
+            self.root_node_seen = True
+
+        for rule in astnode.rules:
+            assert rule.parent == astnode
+            assert isinstance(rule, AST.Rule)
+            rule.visit(self)
+
+        astnode.innerdir_common_prefix()
+
+    def visit_IfBlockRule(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.if_expr
+        assert astnode.if_expr.expr_type == globals.E_BOOLEAN
+        assert astnode.if_expr.parent == astnode
+        self.visit_BlockRule(astnode, arg)
+
+    def visit_GloblistRule(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.glob_patterns
+        assert astnode.inclusive is not None
+        for pattern in astnode.glob_patterns:
+            assert pattern.parent == astnode
+            assert isinstance(pattern, AST.GlobPattern)
+            pattern.visit(self)
+        if astnode.if_expr:
+            astnode.if_expr.visit(self)
+
+    def visit_ForEachRule(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.variable_expr
+        assert astnode.variable_expr.variable_name
+        assert astnode.variable_expr.expr_type
+        assert astnode.glob_patterns
+        assert astnode.if_expr
+        assert astnode.inclusive is not None
+
+        astnode.variable_expr.visit(self)
+        for pattern in astnode.glob_patterns:
+            assert pattern.parent == astnode
+            assert isinstance(pattern, AST.GlobPattern)
+            pattern.visit(self)
+
+        assert isinstance(astnode.if_expr, AST.Expression)
+        astnode.if_expr.visit(self)
+
+    def visit_GlobPattern(self, astnode, arg):
+        assert astnode.parent
+        assert astnode.pattern
+        assert '\\' not in astnode.pattern
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.recursive is not None
+        assert astnode.fullglob
+        assert filename_is_normalized(astnode.fullglob)
+        assert filename_is_normalized(astnode.innerdir)
+
+    def visit_AndExpression(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.expr_type is not None
+        assert astnode.expr1.parent == astnode
+        assert astnode.expr2.parent == astnode
+        assert astnode.expr1.expr_type == globals.E_BOOLEAN
+        assert astnode.expr2.expr_type == globals.E_BOOLEAN
+        astnode.expr1.visit(self)
+        astnode.expr2.visit(self)
+
+    def visit_OrExpression(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.expr_type is not None
+        assert astnode.expr1.parent == astnode
+        assert astnode.expr2.parent == astnode
+        assert astnode.expr1.expr_type == globals.E_BOOLEAN
+        assert astnode.expr2.expr_type == globals.E_BOOLEAN
+        astnode.expr1.visit(self)
+        astnode.expr2.visit(self)
+
+    def visit_NotExpression(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.expr_type is not None
+        assert astnode.expr.expr_type == globals.E_BOOLEAN
+        astnode.expr.visit(self)
+
+    def visit_FunctionExpression(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.expr_type is not None
+        for param in astnode.params:
+            param.visit(self)
+            assert param.parent == astnode
+            assert isinstance(param, AST.Expression)
+
+    def visit_CompareExpression(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.expr_type == globals.E_BOOLEAN
+        astnode.expr1.visit(self)
+        astnode.expr2.visit(self)
+        assert astnode.expr1.parent == astnode
+        assert astnode.expr2.parent == astnode
+
+    def visit_LiteralExpression(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.expr_type is not None
+
+    def visit_VariableExpression(self, astnode, arg):
+        assert astnode.rootdir == astnode.parent.rootdir
+        assert filename_is_normalized(astnode.rootdir)
+        assert astnode.expr_type is not None
+        assert astnode.variable_obj is not None

fsl/ASTLinearizer.py

+import ExpressionEval
+import Visitor
+
+class ASTLinearizer(Visitor.Visitor):
+    """Walk the AST in depth-first order and return linearized list
+    of SeletorRule instances."""
+
+    def visit_BlockRule(self, ast, arg):
+        li = []
+        for rule in ast.rules:
+            li.extend(rule.visit(self))
+        return li
+
+    def visit_IfBlockRule(self, ast, arg):
+        if not ast.if_expr.visit(ExpressionEval.ExpressionEval()):
+            return []
+        ast.if_expr = None
+        return self.visit_BlockRule(ast, arg)
+
+    def visit_GloblistRule(self, ast, arg):
+        if ast.if_expr:
+            if not ast.if_expr.visit(ExpressionEval.ExpressionEval()):
+                return []
+            ast.if_expr = None
+        return [ast]
+
+    def visit_ForEachRule(self, ast, arg):
+        return [ast]
+
+    # Other visit_* methods not implemented, they don't get visited.

fsl/ASTPrinter.py

+import Visitor
+
+class ASTPrinter(Visitor.Visitor):
+    def __init__(self, indent_amount=4):
+        self.level = 0
+        self.indent_amount = indent_amount
+
+    def printline(self, line):
+        print '%s%s' % (' '*self.level, line)
+
+    def _inc_level(self):
+        self.level += self.indent_amount
+
+    def _dec_level(self):
+        self.level -= self.indent_amount
+
+    def visit_BlockRule(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        for rule in astnode.rules:
+            rule.visit(self)
+        self._dec_level()
+
+    def visit_IfBlockRule(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        for rule in astnode.rules:
+            rule.visit(self)
+        astnode.if_expr.visit(self)
+        self._dec_level()
+
+    def visit_GloblistRule(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        for pattern in astnode.glob_patterns:
+            pattern.visit(self)
+            assert pattern.parent == astnode
+        if astnode.if_expr:
+            self.printline('If-expression:')
+            astnode.if_expr.visit(self)
+        self._dec_level()
+
+    def visit_ForEachRule(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        for pattern in astnode.glob_patterns:
+            pattern.visit(self)
+            assert pattern.parent == astnode
+#        self.printline('If-expression:')
+        astnode.if_expr.visit(self)
+        self._dec_level()
+
+    def visit_GlobPattern(self, astnode, arg):
+        self.printline(str(astnode)) # + ' (innerdir "%s")' % astnode.innerdir)
+
+    def visit_AndExpression(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        astnode.expr1.visit(self)
+        astnode.expr2.visit(self)
+        assert astnode.expr1.parent == astnode
+        assert astnode.expr2.parent == astnode
+        self._dec_level()
+
+    def visit_OrExpression(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        astnode.expr1.visit(self)
+        astnode.expr2.visit(self)
+        assert astnode.expr1.parent == astnode
+        assert astnode.expr2.parent == astnode
+        self._dec_level()
+
+    def visit_NotExpression(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        astnode.expr.visit(self)
+        self._dec_level()
+
+    def visit_FunctionExpression(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        for param in astnode.params:
+            param.visit(self)
+            assert param.parent == astnode
+        self._dec_level()
+
+    def visit_CompareExpression(self, astnode, arg):
+        self.printline(astnode)
+        self._inc_level()
+        astnode.expr1.visit(self)
+        astnode.expr2.visit(self)
+        assert astnode.expr1.parent == astnode
+        assert astnode.expr2.parent == astnode
+        self._dec_level()
+
+    def visit_LiteralExpression(self, astnode, arg):
+        self.printline(astnode)
+
+    def visit_VariableExpression(self, astnode, arg):
+        self.printline(astnode)

fsl/ExpressionEval.py

+import AST
+import functions
+import globals
+import lexer
+import Visitor
+import utils
+
+class ExpressionEval(Visitor.Visitor):
+
+    def visit_AndExpression(self, astnode, arg):
+        true1 = astnode.expr1.visit(self)
+        if not true1:
+            return False
+        return astnode.expr2.visit(self)
+
+    def visit_OrExpression(self, astnode, arg):
+        true1 = astnode.expr1.visit(self)
+        if true1:
+            return True
+        return astnode.expr2.visit(self)
+
+    def visit_NotExpression(self, astnode, arg):
+        return not astnode.expr.visit(self)
+
+    def visit_FunctionExpression(self, astnode, arg):
+        funcname = astnode.funcname
+        param_values = [p.visit(self) for p in astnode.params]
+        return functions.execute_function(funcname, param_values)
+
+    def visit_CompareExpression(self, astnode, arg):
+        value1 = astnode.expr1.visit(self)
+        value2 = astnode.expr2.visit(self)
+#        print 'Compare', value1, value2
+        if astnode.operator_type == lexer.T_LESS_THAN:
+            return value1 < value2
+        elif astnode.operator_type == lexer.T_LESS_THAN_EQUAL:
+            return value1 <= value2
+        elif astnode.operator_type == lexer.T_MORE_THAN:
+            return value1 > value2
+        elif astnode.operator_type == lexer.T_MORE_THAN_EQUAL:
+            return value1 >= value2
+        elif astnode.operator_type == lexer.T_EQUAL:
+            return value1 == value2
+        elif astnode.operator_type == lexer.T_NOT_EQUAL:
+            return value1 != value2
+        else:
+            raise ValueError, 'Unknown compare operator (%s)' % astnode.operator_type
+
+    def visit_LiteralExpression(self, astnode, arg):
+        if astnode.expr_type == globals.E_INT:
+            return int(astnode.strvalue, 10)
+        elif astnode.expr_type == globals.E_FLOAT:
+            return float(astnode.strvalue)
+        elif astnode.expr_type == globals.E_STRING:
+            return astnode.strvalue
+        elif astnode.expr_type == globals.E_FILENAME:
+            return astnode.filename_value
+        elif astnode.expr_type == globals.E_DATETIME:
+            return astnode.datetime_value
+        elif astnode.expr_type == globals.E_STRING_FILENAME_DATETIME:
+            return astnode.strvalue
+        else:
+            raise TypeError, 'Cannot evaluate atom "%s"' % astnode.strvalue
+
+    def visit_VariableExpression(self, astnode, arg):
+        return astnode.variable_obj.value
+import datetime
+import os
+
+class File:
+    """Filename representation, with possible size and
+    modification time information.
+    
+    :ivar abs_filename: Absolute filename as string. The filename
+      is normalized such that it always contains forward slashes,
+      even on Windows.
+    """
+
+    def __init__(self, filename):
+        self.abs_filename = filename
+
+        if os.path.abspath(filename).replace('\\', '/') != filename:
+            raise ValueError, 'Filename must be absolute and normalized ("%s")' % filename
+        if not os.path.exists(filename):
+            raise ValueError, 'File "%s" not found' % filename
+
+        self._size = None
+        self._modtime = None
+        self._isfile = os.path.isfile(self.abs_filename)
+        """
+        if os.path.isfile(self.abs_filename):
+            self.size = os.path.getsize(self.abs_filename)
+            timestamp = os.path.getmtime(self.abs_filename)
+            try:
+                self.modtime = datetime.datetime.fromtimestamp(timestamp)
+            except ValueError:
+                # datetime raises error for out-of-bounds dates
+                self.modtime = None
+        else:
+            self.size = None
+            self.modtime = None
+        """
+
+    def __cmp__(self, other):
+        if not isinstance(other, File):
+            return -1
+        return cmp(self.abs_filename, other.abs_filename)
+
+    def __str__(self):
+        return self.abs_filename
+
+    def __hash__(self):
+        return hash(self.abs_filename)
+
+    def get_size(self):
+        """Return size of file in bytes. Return None if the file
+        is a directory."""
+        if self._size is None and self._isfile:
+            self._size = os.path.getsize(self.abs_filename)
+        return self._size
+
+    def get_modtime(self):
+        """Return modification time as `datetime.datetime` instance.
+        Return None if the file is a directory or the modification
+        time can't otherwise be determined."""
+
+        if self._modtime is None and self._isfile:
+            timestamp = os.path.getmtime(self.abs_filename)
+            try:
+                self._modtime = datetime.datetime.fromtimestamp(timestamp)
+            except ValueError:
+                # datetime raises error for out-of-bounds dates
+                self._modtime = None
+        return self._modtime
+
+    def calc_relative_filename(self, rootdir):
+        """Return a relative filename r so that rootdir+r is
+        the absolute filename. May return None if the
+        relative filename can't be calculated."""
+
+        rootdir_lower = os.path.abspath(rootdir).replace('\\', '/').lower()
+        absname_lower = self.abs_filename.lower()
+        if not absname_lower.startswith(rootdir_lower):
+            return None
+
+        relname = self.abs_filename[len(rootdir_lower):]
+        if relname.startswith('/'):
+            relname = relname[1:]
+        if not relname:
+            relname = '.'
+        return relname

fsl/HookHandler.py

+"""
+Contains the abstract base class for hook handlers
+that real handlers expand.
+
+Hook handlers receive real-time information on
+interpretation and file-system scanning. Inclusion
+and exclusion callback functions also receive the
+source rule that was applied.
+
+The hook mechanism is one-direction only; hook
+handlers can't modify the interpreter during running,
+only receive information on the proceeding.
+"""
+
+class HookHandler:
+    """Base class for hook handlers. The default implementations
+    for the methods do nothing."""
+
+    def include_file(self, fileobj, rule):
+        """Called when a file has been included in the
+        file set. Only called once for each file.
+
+        :param fileobj: The included file as `File.File`
+          instance.
+        :param rule: The rule that resulted in the inclusion.
+          `AST.Rule` instance.
+        """
+        pass
+
+    def exclude_file(self, fileobj, rule):
+        """Called when a file has been included in the
+        file set (NOTE: CURRENTLY NEVER CALLED). Only called once
+        for each file.
+
+        :param fileobj: The excluded file as `File.File`
+          instance.
+        :param rule: The rule that resulted in the exclusion.
+          `AST.Rule` instance.
+        """
+        pass
+
+    def start_scan(self, dirnameobj, glob_pattern):
+        """Called when a directory scan starts.
+
+        :param dirnameobj: Scanned directory as `File.File`
+          instance.
+        :param glob_pattern: The glob pattern that is used
+          in scanning, as string.
+        """
+        pass
+
+    def end_scan(self, dirnameobj, files_count, bytes_count):
+        """Called when a directory scan finishes.
+
+        :param dirnameobj: Scanned directory as `File.File`
+          instance.
+        :param files_count: Number of files that were included
+          in the directory, including its sub-directories.
+        :param bytes_count: Size in bytes of the included files
+          in the directory, including its sub-directories.
+        """
+        pass

fsl/Interpreter.py

+"""
+The FSL interpreter, which scans the file system and
+produces a set of files.
+
+The interpreter is normally invoked with the functions
+`interpret` and `interpret_string`; making an explicit
+instance of the class `Interpreter` is usually not
+necessary.
+
+The interpreter has an extension mechanism called hook
+handlers. A hook handler (see `HookHandler`) defines
+callback functions that the interpreter calls on
+appropriate times. Hook handlers receive real-time
+information on interpretation and file-system scanning.
+Hook handlers are passed as parameter to the methods
+mentioned above.
+
+About exceptions: the lexer, parser and interpreter
+raise various exceptions on errors. The superclass for
+FSL exceptions is `fsl.globals.FSLError`.
+`fsl.globals.FSLParseError` is raised on parse errors,
+e.g. invalid tokens or incorrect source structure.
+`fsl.globals.FSLSyntaxError` is raised on syntax errors
+related to semantics, e.g. type errors.
+Finally, `fsl.globals.FSLRuntimeError` is raised
+on errors that occur during interpretation.
+"""
+
+import fnmatch
+import glob
+import os
+import sys
+
+import AST
+import ASTChecker
+import ASTLinearizer
+import ASTPrinter
+import ExpressionEval
+import File
+import fslparser
+import globals
+import HookHandler
+import utils
+
+_DEBUG = False
+
+# Hooks:
+# include file (filename, rule)
+# exclude file (filename, rule)
+# start of directory scan (dirname, glob_pattern)
+# end of directory scan (dirname)
+
+class Interpreter:
+    def __init__(self, ast):
+        self.ast = ast
+        ast.visit(ASTChecker.ASTChecker())
+
+        self._rulelist = ast.visit(ASTLinearizer.ASTLinearizer())
+        self._filedict = {}         # {filename: File}
+        self._cur_rule_index = None  # index to self._rulelist
+
+        self._hook_handlers = []
+
+    def run(self):
+        self._filedict = {}
+        self._cur_rule_index = 0
+
+        if self._rulelist and not self._rulelist[0].inclusive:
+            print >>sys.stderr, 'Warning: exclusive rule at beginning - has no effect'
+
+        self._check_dir_permissions(self._rulelist)
+        self._convert_directory_globs(self._rulelist)
+
+        for i, rule in enumerate(self._rulelist):
+            self._cur_rule_index = i
+            if not rule.inclusive:
+                continue
+
+            if isinstance(rule, AST.GloblistRule):
+                self._interpret_globlist(rule)
+            else:
+                self._interpret_foreach(rule)
+
+        return self._filedict.values()
+
+    def _convert_directory_globs(self, rulelist):
+        for rule in rulelist:
+#            if (not rule.inclusive) and isinstance(rule, AST.GloblistRule):
+                # Don't convert exclusive glob list rules
+#                continue
+            for glob in rule.glob_patterns:
+                if os.path.isdir(glob.fullglob):
+                    old_fullglob = glob.fullglob
+                    if glob.fullglob.endswith('/'):
+                        glob.fullglob += '*'
+                    elif not glob.fullglob.endswith('*'):
+                        glob.fullglob += '/*'
+#                    print >>sys.stderr, 'Converted glob pattern "%s" to "%s"' % (old_fullglob, glob.fullglob)
+
+    def _check_dir_permissions(self, rulelist):
+        prev_cwd = os.getcwd()
+
+        for i, rule in enumerate(rulelist):
+            if not os.path.exists(rule.rootdir) or not rule.inclusive:
+                continue
+            for glob_pattern in rule.glob_patterns:
+                if os.path.isdir(glob_pattern.innerdir):
+                    if self._has_exclusion_rule(glob_pattern.innerdir, i):
+                        # The innerdir has exclusion rule, so no need to
+                        # check permissions.
+                        continue
+
+                    if not os.access(glob_pattern.innerdir, os.R_OK):
+                        # This doesn't work on Windows properly.
+                        os.chdir(prev_cwd)
+                        raise globals.FSLRuntimeError, 'No read permissions for directory "%s"' % glob_pattern.innerdir
+
+                    try:
+                        os.chdir(glob_pattern.innerdir)
+                    except OSError:
+                        os.chdir(prev_cwd)
+                        raise globals.FSLRuntimeError, 'No chdir permissions for directory "%s"' % glob_pattern.innerdir
+
+        os.chdir(prev_cwd)
+
+    def _interpret_globlist(self, globlist_rule):
+        if globlist_rule.if_expr and not self._expr_true(globlist_rule.if_expr):
+            # IF-clause is false.
+            return
+
+        for glob_pattern in globlist_rule.glob_patterns:
+            self._scan_glob(glob_pattern, glob_pattern.innerdir, None, None)
+
+    def _interpret_foreach(self, foreach_rule):
+        for glob_pattern in foreach_rule.glob_patterns:
+            self._scan_glob(glob_pattern, glob_pattern.innerdir, foreach_rule.if_expr, foreach_rule.variable_expr)
+
+
+    def _scan_glob(self, glob_pattern_node, scan_directory, if_expr, variable_expr):
+        if not os.path.exists(scan_directory):
+            if _DEBUG:
+                print 'Skipping directory %s (not found)' % scan_directory
+            return  # Early exit, directory not found
+
+        if self._has_exclusion_rule(scan_directory, self._cur_rule_index):
+            return  # Early exit, directory excluded
+
+        if os.path.isfile(scan_directory):
+            if self._file_is_includable(scan_directory, if_expr, variable_expr):
+                self._add_file(scan_directory, glob_pattern_node.parent)
+            return  # Early exit, directory was an actual file
+
+        self._notify_start_scan(scan_directory, glob_pattern_node.fullglob)
+        files_count = 0
+        bytes_count = 0
+
+        files = os.listdir(scan_directory)
+        files.sort()
+
+        for filename in files:
+            fullpath = utils.pathjoin(scan_directory, filename)
+#            print scan_directory, filename, fullpath
+
+            if os.path.isdir(fullpath) and glob_pattern_node.recursive: # Directory
+                newdir = utils.pathjoin(scan_directory, filename)
+                assert os.path.exists(newdir)
+                if not self._has_exclusion_rule(newdir, self._cur_rule_index):
+                    rec_files, rec_bytes = self._scan_glob(glob_pattern_node, newdir, if_expr, variable_expr)
+                    files_count += rec_files
+                    bytes_count += rec_bytes
+
+            elif os.path.isfile(fullpath): # File
+                if self._glob_matches(fullpath, glob_pattern_node):
+                    if self._file_is_includable(fullpath, if_expr, variable_expr):
+                        fileobj = self._add_file(fullpath, glob_pattern_node.parent)
+                        if fileobj is not None:
+                            files_count += 1
+                            bytes_count += fileobj.get_size()
+
+        self._notify_end_scan(scan_directory, files_count, bytes_count)
+        return files_count, bytes_count
+
+    def _add_file(self, filename, rule):
+        assert isinstance(rule, AST.Rule)
+        if filename not in self._filedict:
+            fil = File.File(filename)
+            self._filedict[filename] = (fil)
+            self._notify_include_file(fil, rule)
+            return fil
+        else:
+            return None
+
+    def _file_is_includable(self, filename, if_expr, variable_expr):
+        """Return true if the IF-expression is true (if any) and
+        there is no exclusion rule."""
+
+        if if_expr is not None:
+            variable_expr.variable_obj.set_value(filename)
+            matched = self._expr_true(if_expr)
+            variable_expr.variable_obj.set_value(None)
+            if not matched:
+                # If-expression didn't match.
+                return False
+        return not self._has_exclusion_rule(filename, self._cur_rule_index)
+
+    def _has_exclusion_rule(self, filename, cur_rule_index):
+        """Return whether the filename has a matching exclusion
+        rule later in the rulelist.
+        
+        :param filename: The filename to be tested
+        :param cur_rule_index: Index of the rule that produced
+          `filename`. Checking starts from `cur_rule_index`+1.
+        """
+
+        for rule in self._rulelist[cur_rule_index+1:]:
+            if rule.inclusive:
+                continue
+
+            if isinstance(rule, AST.GloblistRule):
+                match = self._check_globlist_exclusion(rule, filename)
+            elif isinstance(rule, AST.ForEachRule):
+                match = self._check_eachrule_exclusion(rule, filename)
+            else:
+                raise InternalError
+
+            if match:
+                return True  # Early exit
+
+        return False
+
+    def _check_globlist_exclusion(self, rule, filename):
+        for glob_node in rule.glob_patterns:
+            if self._glob_matches(filename, glob_node):
+                return True
+        return False
+
+    def _check_eachrule_exclusion(self, rule, filename):
+        if os.path.isdir(filename):
+            return False
+
+        for glob_node in rule.glob_patterns:
+            if self._glob_matches(filename, glob_node):
+                rule.variable_expr.variable_obj.set_value(filename)
+                matched = self._expr_true(rule.if_expr)
+                rule.variable_expr.variable_obj.set_value(None)
+                if matched:
+                    return True
+
+        return False
+
+    def _expr_true(self, if_expr):
+        return if_expr.visit(ExpressionEval.ExpressionEval())
+
+    def _glob_matches(self, filename, globrule_node):
+        if globrule_node.fullglob.endswith('/*'):
+            # Match also "xyz/*" to "xyz"
+            pattern2 = globrule_node.fullglob[:-2].lower()
+            if pattern2 == filename.lower():
+                return True
+
+        match = fnmatch.fnmatch(filename, globrule_node.fullglob)
+        if match and not globrule_node.recursive:
+            dirname = os.path.dirname(filename).replace('\\', '/')
+            return dirname == globrule_node.innerdir or filename == globrule_node.innerdir
+        else:
+            return match
+
+    ### Hooks ##########################################################
+
+    def add_hook_handler(self, handler):
+        if not isinstance(handler, HookHandler.HookHandler):
+            raise TypeError, 'handler must be instance of HookHandler'
+        self._hook_handlers.append(handler)
+
+    def _notify_include_file(self, fileobj, rule):
+        for handler in self._hook_handlers:
+            handler.include_file(fileobj, rule)
+
+    def _notify_exclude_file(self, filename, rule):
+        fileobj = File.File(filename)
+        for handler in self._hook_handlers:
+            handler.exclude_file(fileobj, rule)
+
+    def _notify_start_scan(self, dirname, glob_pattern):
+        fileobj = File.File(dirname)
+        for handler in self._hook_handlers:
+            handler.start_scan(fileobj, glob_pattern)
+
+    def _notify_end_scan(self, dirname, files_count, bytes_count):
+        fileobj = File.File(dirname)
+        for handler in self._hook_handlers:
+            handler.end_scan(fileobj, files_count, bytes_count)
+
+
+def interpret(source_filenames, rootdir, hook_handlers=[]):
+    """Interpret source files, return resulting file-set as a
+    list of `File.File` instances.
+    If multiple files are given as parameter, they are combined in a
+    cascading manner.
+
+    :param source_filenames: List of source filenames, or a
+      single filename.
+    :param rootdir: The root directory for top-level relative
+      filenames.
+    :param hook_handlers: List of `HookHandler.HookHandler`
+      instances. The handler callback functions are called
+      by the interpreter.
+    """
+
+    if isinstance(hook_handlers, HookHandler.HookHandler):
+        hook_handlers = [hook_handlers]
+    ast = fslparser.parse(source_filenames, rootdir)
+    interpreter = Interpreter(ast)
+    for handler in hook_handlers:
+        interpreter.add_hook_handler(handler)
+    return interpreter.run()
+
+def interpret_string(source_string, rootdir, hook_handlers=[]):
+    """Interpret a source string, return resulting file-set as a
+    list of `File.File` instances.
+
+    :param source_string: FSL program as a string. The string
+      may contain newlines.
+    :param rootdir: The root directory for top-level relative
+      filenames.
+    :param hook_handlers: List of `HookHandler.HookHandler`
+      instances. The handler callback functions are called
+      by the interpreter.
+    """
+
+    if isinstance(hook_handlers, HookHandler.HookHandler):
+        hook_handlers = [hook_handlers]
+    ast = fslparser.parse_string(source_string, rootdir)
+    interpreter = Interpreter(ast)
+    for handler in hook_handlers:
+        interpreter.add_hook_handler(handler)
+    return interpreter.run()

fsl/TypeChecker.py

+import re
+
+import AST