Commits

Anonymous committed 3b4f3f5

purepython impl

  • Participants
  • Parent commits de7d55b

Comments (0)

Files changed (17)

File pyhep-purepython/DESIGN.txt

+api of histograms
+=================
+
+extension types
+---------------
+
+PyHistogram1D
+.............
+
+   * python axis object
+   * a one-element sequence of the python axis object describing the axis
+   * axis objects used for converting coordinates
+   * python type used to store bin contents
+   * bin contents
+   * nbr of accumulations
+   * dict for additional attributes

File pyhep-purepython/hep/__init__.py

Empty file added.

File pyhep-purepython/hep/fs.py

+#-----------------------------------------------------------------------
+# imports
+#-----------------------------------------------------------------------
+
+from __future__ import generators
+
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+    
+import fnmatch
+import os.path
+import re
+import stat
+
+#-----------------------------------------------------------------------
+# classes
+#-----------------------------------------------------------------------
+
+class AccessError(RuntimeError):
+
+    pass
+
+
+#-----------------------------------------------------------------------
+
+class Info(object):
+
+    def __init__(self, name, type):
+        self.name = name
+        self.type = type
+        
+
+    def __repr__(self):
+        return "Info(name=%r, type=%r)" % (self.name, self.type)
+
+
+    def __str__(self):
+        attributes = self.__dict__.keys()
+        attributes.sort()
+        return "Info(" \
+               + ", ".join([ "%s=%r" % (a, getattr(self, a))
+                             for a in attributes ]) \
+               + ")"
+
+
+
+#-----------------------------------------------------------------------
+
+class Directory(object):
+    """A generalized directory.
+
+    A directory object provides access to (file system and other)
+    directory contents through a standard mapping protocol (except that
+    the 'copy' method is not available).
+
+    The following attributes specify default options.  They may be
+    overridden in method calls by keyword arguments.
+
+    'deldirs' -- If true, allow recursively delete directories by
+    deleting their contents.  Otherwise, a directory may be deleted only
+    when it is empty.
+
+    'makedirs' -- If true, create missing intervening directories when
+    setting a key.  For example, if subdirectory 'a' is empty and the
+    key 'a/b/c' is set, the subdirectory 'a/b' is automatically
+    created.
+
+    'replace' -- If true, when setting a key that already exists,
+    replace the previous value.  Otherwise, a key's value may not be
+    overwritten.
+
+    'replacedirs' -- Like 'replace' but if true, also allows replacement
+    of entire directories."""
+
+    # A subclass must provide these attributes:
+    #
+    #   'writable' -- True if the directory is writable.
+    #
+    # and these methods:
+    #
+    #   '_keys(**kw_args)' -- Return a sequence of keys.
+    # 
+    #   '_isdir(key, **kw_args)' -- Return true if 'key' corresponds to
+    #   a subdirectory; false if not or if it doesn't exist.
+    #
+    #   '_get(key, **kw_args)' -- Return the value for 'key'.
+    # 
+    #   '_set(key, value, **kw_args)' -- Set the value for 'key' to
+    #   'value'.
+    #
+    #   '_delete(key, **kw_args)' -- Remove 'key' and its value.
+    #
+    #   '_getinfo(key, **kw_args)' -- Return an 'Info' object describing
+    #   the value of 'key'.
+    #
+
+    deldirs = True
+    
+    makedirs = True
+
+    replace = True
+
+    replacedirs = False
+
+
+    def __contains__(self, path):
+        subdir, key = self._splitdir(path, makedirs=False)
+        return key in subdir._keys()
+    
+
+    def __delitem__(self, path):
+        subdir, key = self._splitdir(path, makedirs=False)
+        subdir.__del(key)
+
+
+    def __getitem__(self, path):
+        if path == "":
+            return self
+        subdir, key = self._splitdir(path, makedirs=False)
+        if key in subdir._keys():
+            return subdir._get(key)
+        else:
+            raise KeyError, key
+
+
+    def __len__(self):
+        return len(self._keys())
+
+
+    def __iter__(self):
+        return iter(self._keys())
+
+
+    def __setitem__(self, path, value):
+        subdir, key = self._splitdir(path)
+        subdir.__set(key, value)
+
+
+    def clear(self, **kw_args):
+        """Delete keys in this directory.
+
+        See 'keys' for a description of optional keyword attributes."""
+
+        for key in self._keys(**kw_args):
+            self.__del(key, **kw_args)
+
+
+    def delete(self, path, **kw_args):
+        """Delete 'path'."""
+
+        subdir, key = self._splitdir(path, makedirs=False)
+        subdir.__del(key, **kw_args)
+        
+
+    def get(self, path, default=None, **kw_args):
+        """Return the contents of 'path'."""
+
+        if path == "":
+            return self
+        # Disable the 'makedirs' option for 'get' operations.
+        kw_args["makedirs"] = False
+        subdir, key = self._splitdir(path, **kw_args)
+        if key in subdir._keys(**kw_args):
+            return subdir._get(key, **kw_args)
+        else:
+            return default
+
+
+    def getinfo(self, path, **kw_args):
+        # Disable the 'makedirs' option for 'getinfo' operations.
+        kw_args["makedirs"] = False
+        subdir, key = self._splitdir(path, **kw_args)
+        if key in subdir._keys(**kw_args):
+            return subdir._getinfo(key, **kw_args)
+        else:
+            raise KeyError, key
+
+
+    def has_key(self, path, **kw_args):
+        subdir, key = self._splitdir(path, **kw_args)
+        return key in subdir._keys(**kw_args)
+    
+
+    is_empty = property(lambda self: len(self._keys()) == 0)
+
+
+    def isdir(self, path, **kw_args):
+        # Disable the 'makedirs' option for 'isdir' operations.
+        kw_args["makedirs"] = False
+        subdir, key = self._splitdir(path, **kw_args)
+        return subdir._isdir(key, **kw_args)
+
+
+    def items(self, **kw_args):
+        """Return '(key, value)' pairs in this directory.
+
+        See 'keys' for description of optional keyword arguments."""
+
+        return [ (n, self.get(n, **kw_args))
+                 for n in self.keys(**kw_args) ]
+
+
+    def iteritems(self, **kw_args):
+        """Return an iterator of '(key, value)' pairs in this directory.
+
+        See 'keys' for description of optional keyword arguments."""
+
+        for key in self.iterkeys(**kw_args):
+            yield key, self.get(key, **kw_args)
+
+
+    def iterkeys(self, **kw_args):
+        """Return an iterator of keys in this directory.
+
+        See 'keys' for description of optional keyword arguments."""
+
+        for key in self.keys(**kw_args):
+            yield key
+
+
+    def itervalues(self, **kw_args):
+        """Return an iterator of values in this directory.
+
+        See 'keys' for description of optional keyword arguments."""
+
+        for key in self.iterkeys(**kw_args):
+            yield self.get(key, **kw_args)
+
+
+    def keys(self, **kw_args):
+        """Returns the keys in this directory.
+
+        If 'recursive' is true, return paths in this directory and its
+        subdirectories.
+
+        Optional keyword arguments:
+
+        'recursive' -- If true, return keys in this directory and its
+        subdirectories.  Keys in a subdirectory precedes the
+        subdirectory key itself.  Default is false.
+
+        'glob' -- Return only keys matching the specified glob pattern.
+
+        'not_dir' -- If true, don't include subdirectory keys.
+
+        'pattern' -- Return only keys matching the specified regular
+        expression.
+
+        'known_types' -- Don't return keys whose types are "unknown".
+
+        'only_type' -- Return only keys whose values' types are as
+        specified. 
+
+        returns -- A sequence of keys or paths."""
+
+        paths = self._keys(**kw_args)
+
+        if kw_args.get("recursive", False):
+            # Construct a list of all keys in this directory and its
+            # subdirectories. 
+            all_paths = []
+            # Loop over keys.
+            for key in self._keys():
+                # Is it a subdirectory?
+                if self.isdir(key):
+                    # Yes.  Get the keys it contains.
+                    subdir = self._get(key, **kw_args)
+                    # Add them, with a path prefix.
+                    all_paths.extend([ os.path.join(key, k)
+                                       for k in subdir.keys(**kw_args) ])
+                # Add this key itself.
+                all_paths.append(key)
+            # Use the expanded list.
+            paths = all_paths
+
+        if "glob" in kw_args:
+            # Apply a glob-style filter.
+            glob = kw_args["glob"]
+            paths = [ p for p in paths
+                      if fnmatch.fnmatchcase(os.path.basename(p), glob) ]
+
+        if kw_args.get("not_dir", False):
+            # Remove keys of directories.
+            paths = [ p for p in paths if not self.isdir(p) ]
+
+        if "pattern" in kw_args:
+            # Apply a regex filter.
+            regex = re.compile(kw_args["pattern"])
+            paths = [ p for p in paths
+                      if regex.match(os.path.basename(p)) is not None ]
+
+        if "only_type" in kw_args:
+            # Apply a type filter.
+            type_key = kw_args["only_type"]
+            paths = [ p for p in paths
+                      if self.getinfo(p, **kw_args).type == type_key ]
+            
+        if kw_args.get("known_types", True):
+            # Select only types that are not "unknown".
+            paths = [ p for p in paths
+                      if self.getinfo(p, **kw_args).type != "unknown" ]
+
+        return paths
+
+
+    def list(self, **kw_args):
+        """Print keys and types in this directory.
+
+        See 'keys' for a description of optional keyword arguments."""
+
+        # Find the paths to list.
+        paths = self.keys(**kw_args)
+        paths.sort()
+        # Construct a formatting template wide enough to display the
+        # paths. 
+        if len(paths) > 0:
+            template = "%%-%ds: %%s" % (max(map(len, paths)) + 4)
+        # List the paths.
+        for path in paths:
+            print template % (path, self.getinfo(path).type)
+
+
+    def popitem(self, **kw_args):
+        """Return and remove a single arbitrary element.
+
+        See 'keys' for a description of optional keyword arguments.
+        The 'not_dir' option is implied.
+
+        returns -- A '(path, value)' pair from the directory, which is
+        also removed."""
+
+        # Make sure we don't return directories that have just been
+        # deleted. 
+        kw_args["not_dir"] = True
+        # Find the keys that are available.
+        paths = self.keys(**kw_args)
+        # Are there any?
+        if len(paths) == 0:
+            # Nope.  Complain.
+            raise KeyError, "'%s' is empty" % self
+        else:
+            # Yes.  Remove and return the first.
+            path = paths[0]
+            value = self._get(path, **kw_args)
+            self.__del(path, **kw_args)
+            return path, value
+
+
+    def mkdir(self, path, **kw_args):
+        """Create an empty directory at 'path'.
+
+        'path' -- The path at which to create a directory.  It must not
+        already exist.
+
+        returns -- The new directory."""
+
+        subdir, key = self._splitdir(path, **kw_args)
+        # Check that the key doesn't always exist.
+        if key in subdir._keys(**kw_args):
+            raise RuntimeError, "'%s' already exists" % key
+        # Make the directory.
+        subdir.set(key, {}, type="directory")
+        # Return it
+        return subdir.get(key, **kw_args)
+
+
+    def set(self, path, value, **kw_args):
+        """Set 'path' to 'value'."""
+
+        subdir, key = self._splitdir(path, **kw_args)
+        subdir.__set(key, value, **kw_args)
+
+
+    def setdefault(self, path, default, **kw_args):
+        """Return 'path', setting it to 'default' if it doesn't exist."""
+
+        subdir, key = self._splitdir(path, **kw_args)
+        # Is it there?
+        if key not in subdir._keys(**kw_args):
+            # No.  Set the default value.
+            subdir.__set(key, default, **kw_args)
+        # Return the value.  Get it by key even if we just set the
+        # default, in case the value's type changed.
+        return subdir._get(key, **kw_args)
+
+
+    def update(self, values, **kw_args):
+        """Add items from 'values' to the directory.
+
+        'values' -- A map."""
+
+        for key in values.keys():
+            self.set(key, values[key], **kw_args)
+
+
+    def values(self, **kw_args):
+        """Return the values in this directory.
+
+        See 'keys' for description of optional keyword arguments."""
+
+        return [ self.get(n, **kw_args) for n in self.keys(**kw_args) ]
+
+
+    def __del(self, key, **kw_args):
+        # Check that the directory is writable.
+        if not self.writable:
+            raise AccessError, "%s is not writable" % self
+        # Is it a directory?
+        if self._isdir(key, **kw_args):
+            args = dict(kw_args)
+            args["writable"] = True
+            directory = self.get(key, **args)
+            # If the recursive directory removal is enabled, do it.
+            deldirs = kw_args.get("deldirs", self.deldirs)
+            if deldirs:
+                for subkey in directory._keys(**kw_args):
+                    directory.__del(subkey, **kw_args)
+            # Otherwise, the directory better be empty.
+            elif not directory.is_empty:
+                raise RuntimeError, "%s is not empty" % self
+            del directory
+        self._del(key, **kw_args)
+
+        
+    def __set(self, key, value, **kw_args):
+        # Check that the directory is writable.
+        if not self.writable:
+            raise AccessError, "%s is not writable" % self
+        # Does the key already exist?
+        if key in self._keys(**kw_args):
+            # Yes.  We should replace it if the 'replace' option is
+            # true. 
+            replace = kw_args.get("replace", self.replace)
+            # For directories, we also requre that the 'replacedirs'
+            # option is true.
+            if self._isdir(key):
+                replace = replace and kw_args.get(
+                    "replacedirs", self.replacedirs)
+            if replace:
+                self.__del(key, **kw_args)
+            else:
+                raise RuntimeError, "'%s' already exists" % key
+        self._set(key, value, **kw_args)
+
+
+    def _splitdir(self, path, **kw_args):
+        """For a path, return the base key and the containing subdirectory.
+
+        'path' -- A path name.
+
+        returns -- '(subdir, key)' where 'subdir' is a directory object
+        and 'key' is a key in that subdirectory corresponding to
+        'path'. """
+
+        dir_name, key = os.path.split(path)
+        # Does it start with a path separator?  Then it's an absolute path.
+        if dir_name == os.sep:
+            raise KeyError, "absolute paths may not be used"
+        # Is it in this directory?
+        elif dir_name == "":
+            # Yes. Just return the name.
+            return self, key
+        else:
+            # It's in a subdirectory.  Recursively split the
+            # subdirectory name.
+            subdir, dir_key = self._splitdir(dir_name, **kw_args)
+            # Does the subdirectory exist as a directory?
+            if subdir.has_key(dir_key):
+                if subdir._isdir(dir_key, **kw_args):
+                    # Yes.  Go with that.
+                    subdir = subdir._get(dir_key, **kw_args)
+                else:
+                    # There is a non-directory under the subdirectory's
+                    # name.  That's a problem.
+                    raise KeyError, "'%s' is not a directory" % dir_name
+            # The subdirectory doesn't exist.  Should we make it?
+            elif kw_args.get("makedirs", self.makedirs):
+                # Yup.  Do that.
+                 subdir = subdir.setdefault(dir_key, {}, **kw_args)
+            else:
+                # A missing subdirectory.
+                raise KeyError, "'%s' does not exist" % dir_name
+            return subdir, key
+
+                
+
+#-----------------------------------------------------------------------
+
+class FileSystemDirectory(Directory):
+    """A directory object for a directory in the file system.
+
+    Keys in the directory correspond to names of files in the directory.
+    The corresponding values are file contents or file handles,
+    depending on the file type.
+
+    Instances supply the following additional attributes:
+
+      'parent' -- The parent directory, or 'None' if this is the root
+      directory.
+
+      'root' -- The file system's root directory.
+
+    The file type is determined from the file extension (unless
+    overridden by the 'type' keyword argument):
+
+      no extension ("directory") -- A subdirectory.
+
+      '.pickle' ("pickle") -- A file contianing a pickled Python object.
+
+      n/a ("symlink") -- A symbolic link.
+
+      '.text' ("text") -- A text file.
+
+    See 'Directory' for a description of generic directory object
+    methods, attributes, and options.
+
+    The following options may be set as class attributes or specified as
+    keyword arguments:
+
+      'by_line' -- If true, when reading text files return an iterator
+      over lines in the file.  Otherwise, return the entire file
+      contents as a string.
+
+      'pickle_binary' -- If true, use the binary format when storing
+      Python pickles.
+
+    Note that this class contains problematic race conditions when the
+    contents of the directory (or its subdirectories) are modified
+    simultaneously by another thread or process."""
+      
+
+    by_line = False
+
+    pickle_binary = True
+
+
+    def __init__(self, path, writable=True):
+        # Always us the canonicalized path.
+        path = os.path.realpath(path)
+
+        # Check that the directory is accessible.
+        if not os.access(path, os.R_OK | os.X_OK):
+            raise IOError, "'%s' is not accessible" % path
+        if writable:
+            # Check that the directory is writable.
+            if not os.access(path, os.W_OK):
+                raise IOError, "'%s' is not writable" % path
+
+        self.__path = path
+        self.__writable = writable
+
+
+    def __str__(self):
+        return self.__path
+
+
+    def __repr__(self):
+        return "FileSystemDirectory(%r, writable=%r)" \
+               % (self.__path, self.writable)
+
+
+    def join(self, path, **kw_args):
+        """Return the absolute file system path for 'path'."""
+
+        kw_args["makedirs"] = False
+        subdir, key = self._splitdir(path, **kw_args)
+        if isinstance(subdir, FileSystemDirectory):
+            return os.path.join(subdir.__path, key)
+        else:
+            raise KeyError, \
+                  "'%s' is not a file system directory" % subdir
+
+
+    def __div__(self, path):
+        """A synonym for 'join'."""
+
+        return self.join(path)
+
+
+    def __truediv__(self, path):
+        """A synonym for 'join'."""
+
+        return self.join(path)
+
+
+    name = property(lambda self: os.path.basename(self.__path))
+
+
+    def __get_parent(self):
+        if self.__path == os.sep:
+            return None
+        else:
+            parent_path = os.path.dirname(self.__path)
+            # If this directory is writable, we'll return a writable
+            # directory object, unless the parent directory itself is
+            # not writable.
+            writable = self.writable and os.access(parent_path, os.W_OK)
+            # Return the parent directory.
+            return FileSystemDirectory(parent_path, writable)
+
+
+    parent = property(__get_parent)
+
+
+    path = property(lambda self: self.__path)
+
+
+    def __get_root(self):
+        return root
+
+
+    root = property(__get_root)
+
+
+    writable = property(lambda self: self.__writable)
+    
+
+    def _del(self, key, **kw_args):
+        # print "_del(%r, **%r)" % (key, kw_args)
+        path = os.path.join(self.__path, key)
+        if os.path.isdir(path):
+            # FIXME: Sometimes NFS leaves '.nfs*' transient files around
+            # for a moment, which causes the 'rmdir' to fail.  Uncomment
+            # the following line to see them.
+            # print os.listdir(path)
+            os.rmdir(path)
+        else:
+            os.unlink(path)
+
+
+    def _get(self, key, **kw_args):
+        # print "_get(%r, **%r)" % (key, kw_args)
+        type = self.__gettype(key, **kw_args)
+        return type[3](self, key, **kw_args)
+
+
+    def _getinfo(self, key, **kw_args):
+        # print "_getinfo(%r, **%r)" % (key, kw_args)
+        type = self.__gettype(key, **kw_args)
+        path = os.path.join(self.__path, key)
+        stat_info = os.stat(path)
+        # Create the info object.
+        info = Info(key, type[0])
+        info.path = path
+        info.file_size = stat_info.st_size
+        info.user_id = stat_info.st_uid
+        info.group_id = stat_info.st_gid
+        info.access_mode = stat.S_IMODE(stat_info.st_mode)
+        info.modification_time = stat_info.st_mtime
+        return info
+
+
+    def _isdir(self, key, **kw_args):
+        # print "_isdir(%r, **%r)" % (key, kw_args)
+        path = os.path.join(self.__path, key)
+        if os.path.isdir(path):
+            return True
+        else:
+            base, extension = os.path.splitext(key)
+            type = self.__extension_map[extension]
+            return type[2]
+
+
+    def _keys(self, **kw_args):
+        # print "_keys(**%r)" % kw_args
+        return os.listdir(self.__path)
+
+
+    def _set(self, key, value, **kw_args):
+        # print "_set(%r, %r, **%r)" % (key, value, kw_args)
+        type = self.__gettype(key, **kw_args)
+        type[4](self, key, value, **kw_args)
+
+
+    def __gettype(self, key, **kw_args):
+        path = os.path.join(self.__path, key)
+
+        # If it actually is a directory on disk, the type is always
+        # "directory". 
+        if os.path.isdir(path):
+            return self.__type_map["directory"]
+        
+        # Get the type specified by keyword argument, if any.
+        if "type" in kw_args:
+            type_name = kw_args.get("type", None)
+            try:
+                type = self.__type_map[type_name]
+            except KeyError:
+                raise TypeError, \
+                      "can't handle files of type '%s'" % type_name
+        else:
+            base, extension = os.path.splitext(key)
+            try:
+                type = self.__extension_map[extension]
+            except KeyError:
+                return self.__type_map["unknown"]
+
+        # If we identified it as a directory and it exists, it better be
+        # a directory.
+        if type[0] == "directory" \
+           and os.path.exists(path) \
+           and not os.path.isdir(path):
+            return self.__type_map["unknown"]
+
+        return type
+                
+
+    def __get_directory(self, key, **kw_args):
+        path = os.path.join(self.__path, key)
+        if not os.path.isdir(path):
+            raise TypeError, "'%s' is not a directory" % key
+        writable = kw_args.get("writable", self.writable)
+        return FileSystemDirectory(path, writable)
+
+
+    def __set_directory(self, key, value, **kw_args):
+        # Make sure 'value' looks like a map.
+        if not (hasattr(value, "keys") and hasattr(value, "get")):
+            raise TypeError, "value is not a map"
+        # Make the directory.
+        path = os.path.join(self.__path, key)
+        os.mkdir(path)
+        # Fill in the initial entries, if some where specified.
+        if len(value) > 0:
+            self.get(key, **kw_args).update(value, **kw_args)
+
+
+    def __get_hbook_file(self, key, **kw_args):
+        path = self.join(key)
+        if not os.path.isfile(path):
+            raise TypeError, "'%s' is not a file" % key
+        writable = kw_args.get("writable", False)
+        record_length = kw_args.get("record_length", 1024)
+        purge_cycles = kw_args.get("purge_cycles", True)
+
+        from hep.cernlib import hbook
+        return hbook.open(path, writable, record_length, purge_cycles)
+
+
+    def __set_hbook_file(self, key, value, **kw_args):
+        path = self.join(key)
+        if os.path.isfile(path):
+            raise ValueError, "'%s' already exists" % key
+        record_length = kw_args.get("record_length", 1024)
+        purge_cycles = kw_args.get("purge_cycles")
+
+        from hep.cernlib import hbook
+        hbook_file = hbook.create(path, record_length, purge_cycles)
+        hbook_file.update(value)
+
+
+    def __get_pickle(self, key, **kw_args):
+        path = os.path.join(self.__path, key)
+        if not os.path.isfile(path):
+            raise TypeError, "'%s' is not a file" % key
+        return cPickle.load(file(path))
+
+
+    def __set_pickle(self, key, value, **kw_args):
+        path = os.path.join(self.__path, key)
+        binary = kw_args.get("pickle_binary", self.pickle_binary)
+        cPickle.dump(value, file(path, "w"), binary)
+
+
+    def __get_root_file(self, key, **kw_args):
+        path = self.join(key)
+        if not os.path.isfile(path):
+            raise TypeError, "'%s' is not a file" % key
+        writable = kw_args.get("writable", False)
+        purge_cycles = kw_args.get("purge_cycles", True)
+        with_metadata = kw_args.get("with_metadata", True)
+
+        import hep.root
+        return hep.root.open(path, writable, purge_cycles, with_metadata)
+
+
+    def __set_root_file(self, key, value, **kw_args):
+        path = self.join(key)
+        if os.path.isfile(path):
+            raise ValueError, "'%s' already exists" % key
+        purge_cycles = kw_args.get("purge_cycles")
+
+        import hep.root
+        root_file = hep.root.create(path, purge_cycles)
+        root_file.update(value)
+
+
+    def __get_symlink(self, key, **kw_args):
+        path = os.path.join(self.__path, key)
+        if os.path.islink(path):
+            return os.readlink(path)
+        else:
+            raise TypeError, "'%s' is not a symlink" % key
+
+
+    def __set_symlink(self, key, value, **kw_args):
+        path = os.path.join(self.__path, key)
+        os.symlink(value, path)
+
+
+    def __get_table(self, key, **kw_args):
+        import hep.table
+        path = self.join(key, **kw_args)
+        update = kw_args.get("writable", False)
+        row_type = kw_args.get("row_type", hep.table.RowDict)
+        with_metadata = kw_args.get("with_metadata", True)
+        return hep.table.open(path, update, row_type, with_metadata)
+
+
+    def __set_table(self, key, value, **kw_args):
+        raise NotImplementedError, "cannot assign table obejects"
+
+
+    def __get_text(self, key, **kw_args):
+        path = os.path.join(self.__path, key)
+        if os.path.isfile(path):
+            text_file = file(path)
+            by_line = kw_args.get("by_line", self.by_line)
+            if by_line:
+                return text_file.xreadlines()
+            else:
+                return text_file.read()
+        else:
+            raise TypeError, "'%s' is not an ordinary file" % key
+
+
+    def __set_text(self, key, value, **kw_args):
+        path = os.path.join(self.__path, key)
+        file(path, "w").write(value)
+
+
+    def __get_unknown(self, key, **kw_args):
+        raise NotImplementedError, "cannot get files of unknown type"
+
+
+    def __set_unknown(self, key, value, **kw_args):
+        raise NotImplementedError, "cannot set files of unknown type"
+
+
+    __types = [
+        ("directory", "", True, __get_directory, __set_directory),
+        ("HBOOK file", ".hbook", True, __get_hbook_file, __set_hbook_file),
+        ("pickle", ".pickle", False, __get_pickle, __set_pickle),
+        ("Root file", ".root", True, __get_root_file, __set_root_file),
+        ("symlink", None, False, __get_symlink, __set_symlink),
+        ("table", ".table", False, __get_table, __set_table),
+        ("text", ".txt", False, __get_text, __set_text),
+        ("unknown", None, False, __get_unknown, __set_unknown),
+        ]
+
+
+    # The items in '__types' mapped to their type names.
+    __type_map = dict([ (t[0], t) for t in __types ])
+
+    # The items in '__types' mapped to their file extensions.
+    __extension_map = dict([ (t[1], t) for t in __types ])
+
+        
+
+#-----------------------------------------------------------------------
+# functions
+#-----------------------------------------------------------------------
+
+def get(path, **kw_args):
+    if "default" in kw_args:
+        have_default = True
+        default_vaule = hep.popitem(kw_args, "default")
+    else:
+        have_default = False
+        
+    path = os.path.realpath(path)
+    dir_name, base_name = os.path.split(path)
+    parent_dir = getdir(dir_name, **kw_args)
+    if base_name in parent_dir:
+        return parent_dir.get(base_name, **kw_args)
+    elif have_default:
+        return default_value
+    else:
+        raise KeyError, base_name
+
+
+def set(path, value, **kw_args):
+    path = os.path.realpath(path)
+    dir_name, base_name = os.path.split(path)
+    parent_dir = getdir(dir_name, **kw_args)
+    parent_dir.set(base_name, value, **kw_args)
+
+
+def getdir(path, **kw_args):
+    """Return a directory object for the file system directory 'path'.
+
+    'path' -- The path to a directory.
+
+    returns -- A 'FileSystemDirectory' object."""
+
+    # Canonicalize the path.
+    path = os.path.realpath(path)
+
+    # Does it exist as a file system directory?
+    if os.path.isdir(path):
+        # Yes.  Stop right here and return it.  Should we return it
+        # writable? 
+        writable = kw_args.get("writable")
+        if writable == None:
+            # Determine writable status from the directory's write
+            # permissions. 
+            writable = os.access(path, os.W_OK)
+        return FileSystemDirectory(path, writable)
+
+    # Does not exist as a directory.  Split its parent path.
+    dir_name, base_name = os.path.split(path)
+    # Get its parent directory.
+    parent_dir = getdir(dir_name, **kw_args)
+    # Create it in its parent directory.
+    makedirs = kw_args.get("makedirs", False)
+    if makedirs:
+        return parent_dir.setdefault(base_name, {}, **kw_args)
+    else:
+        return parent_dir.get(base_name, **kw_args)
+
+
+def getcwd(writable="auto"):
+    """Return a directory object for the current working directory."""
+
+    return getdir(os.getcwd(), writable=writable)
+
+
+def tardir(directory, output_dir=None, compression="bz2", options="",
+           replace=True):
+    """Build a tar archive from a directory tree.
+
+    The archive file is named as 'directory' with the extension '.tar',
+    plus an additional extension indicating the compression method.  The
+    archive file contains 'directory' and its contents as the only
+    top-level directory item.
+
+    'directory' -- The directory to archive.
+
+    'output_dir' -- The directory in which to write the tar archive.  By
+    default, the archive is written in the parent of 'directory'.
+
+    'compression' -- Compression mechanism to use: '"bz2"' (bzip2
+    compression, the default), '"gz"' (gzip compression), or 'None'.
+
+    'options' -- Additional command-line options to pass to the 'tar'
+    command.
+
+    'replace' -- If true, allow an existing tar archive to be replaced.
+
+    returns -- The full path to the generated tar archive."""
+
+    # Construct a path for the tar file.  If an output directory is not
+    # specified, put the tar file next to the directory being
+    # processed. 
+    if output_dir is None:
+        output_dir = directory.parent
+    tar_file_path = output_dir.join(directory.name + ".tar")
+    if compression is not None:
+        tar_file_path += "." + compression
+    # Check if it exists.
+    if os.path.exists(tar_file_path):
+        if not replace:
+            raise ValueError, "'%s' already exists" % tar_file_path
+        # Otherwise, happily overwrite it.
+
+    dir_name, base_name = os.path.split(directory.path)
+    # Construct the base command line.
+    command_line = [
+        "/bin/tar",
+        "--create",
+        "--file",
+        tar_file_path,
+        "--directory",
+        dir_name,
+        ] 
+    # Add compression options.
+    if compression == "bz2":
+        command_line.append("--bzip")
+    elif compression == "gz":
+        command_line.append("--gzip")
+    elif compression is None:
+        pass
+    else:
+        raise ValueError, "unknown compression %r" % compression
+    # Add additional caller-specified options.
+    command_line += [ o for o in options.split(" ") if o.strip() != "" ]
+    # Add the names of the files to include.
+    command_line += [
+        os.path.join(base_name, k)
+        for k in directory.keys(recursive=True, not_dir=True) ]
+
+    # Run the command.
+    exit_code = os.spawnve(os.P_WAIT, command_line[0], command_line, {})
+    # Check the exit code.
+    if exit_code != 0:
+        raise RuntimeError, \
+              "%s failed (exit code %d)" % (command_line[0], exit_code)
+    # Return the name of the tar file.
+    return tar_file_path
+    
+
+def untardir(tar_file_path, directory=None, options="",
+             replacedirs=False):
+    """Extract a directory from a tar archive.
+
+    See the 'tardir' command.  The tar archive must contain a directory
+    in its root whose name is the same as the base name of the tar
+    archive file itself; it is this directory that is extracted from the
+    archive.  For instance, the tar archive 'foo.tar' should contain the
+    top-level directory 'foo'.
+
+    'tar_file_path' -- The path to the tar file.
+
+    'directory' -- The directory in which to expand the tar archive.  If
+    omitted, the directory containing the tar file is used.
+
+    'options' -- Additional command-line options to pass to the 'tar'
+    command.
+
+    'replacedirs' -- If true, allow deletion of an existing directory to
+    make room for the contents of the tar archive.
+
+    returns -- A directory object for the expanded directory."""
+
+    tar_file_path = os.path.realpath(tar_file_path)
+    # Check that the tar file exists.
+    if not os.path.isfile(tar_file_path):
+        raise ValueError, "'%s' does not exist" % tar_file_path
+    
+    # Extract the name of the directory contained in the tar file, and
+    # infer the compression type.
+    base = os.path.basename(tar_file_path)
+    if base.endswith(".tar.bz2"):
+        compression = "bz2"
+        base = base[: -8]
+    elif base.endswith(".tar.gz"):
+        compression = "gz"
+        base = base[: -7]
+    elif base.endswith(".tgz"):
+        compression = "gz"
+        base = base[: -4]
+    elif base.endswith(".tar"):
+        compression = None
+        base = base[: -4]
+    else:
+        raise ValueError, "unrecongnized file name '%s'" % base
+
+    # Put the expanded directory tree next to the tar file, if no path
+    # was specified.
+    if directory is None:
+        directory = getdir(os.path.dirname(tar_file_path), writable=True)
+    # Check whether the target exists.
+    if directory.has_key(base):
+        if replacedirs:
+            # Clean up the existing one.
+            directory.delete(base, deldirs=True)
+        else:
+            # Object.
+            raise RuntimeError, \
+                  "'%s' already exists" % directory.join(base)
+
+    # Construct the base command.
+    command_line = [
+        "/bin/tar",
+        "--get",
+        "--file",
+        tar_file_path,
+        "--directory",
+        directory.path,
+        ] 
+    # Add compression options.
+    if compression == "bz2":
+        command_line.append("--bzip")
+    elif compression == "gz":
+        command_line.append("--gzip")
+    elif compression is None:
+        pass
+    else:
+        raise ValueError, "unknown compression %r" % compression
+    # Add additional caller-specified options.
+    command_line += [ o for o in options.split(" ") if o.strip() != "" ]
+    # Add the name of the directory to extract.
+    command_line.append(base)
+
+    # Run the command.
+    exit_code = os.spawnve(os.P_WAIT, command_line[0], command_line, {})
+    # Check the exit code.
+    if exit_code != 0:
+        raise RuntimeError, \
+              "%s failed (exit code %d)" % (command_line[0], exit_code)
+    # Return the extracted directory.
+    return directory[base]
+
+
+#-----------------------------------------------------------------------
+# constants
+#-----------------------------------------------------------------------
+
+root = FileSystemDirectory("/", False)
+

File pyhep-purepython/hep/hist/__init__.py

+#-----------------------------------------------------------------------
+#
+# module hep.hist
+#
+# Copyright 2003 by Alex Samuel.  All rights reserved.
+#
+#-----------------------------------------------------------------------
+
+"""Histograms.
+
+The following error models are supported:
+
+  'explicit' -- Asymmetrical errors may be specified for each bin using
+  the 'setError' method.  No errors are computed automatically.
+
+  'gaussian' -- The symmetrical error on the value of each bin is
+  computed assuming a Gaussian distribution.
+
+  'none' -- Bins have zero errors.
+
+  'poisson' -- The asymmetrical error on the value of each bin is computed
+  assuming a Poisson distribution.
+
+  'quadrature' -- The symmetrical error on each bin is square root of
+  the sum of squares of weights accumulated to that bin.
+
+Error values are always of type 'float'.
+"""
+
+#-----------------------------------------------------------------------
+# imports
+#-----------------------------------------------------------------------
+
+from __future__ import generators
+
+import sys
+import copy
+try:
+    import copyreg
+except ImportError:
+    import copy_reg as copyreg
+    pass
+
+from   .axis import Axis, BinnedAxis, AxisIterator, AxesIterator
+## from   function import Function1D, isFunction, SampledFunction1D, \
+##                        sampleFunction1D
+## from   function import Graph, ContourGraph, isGraph
+## from   hep import ext
+## from   hep.bool import *
+from   histogram import Histogram, Histogram1D, is_histogram
+from   scatter import is_scatter, Scatter
+from   util import *
+

File pyhep-purepython/hep/hist/axis.py

+#-----------------------------------------------------------------------
+#
+# module hep.hist.axis
+#
+# Copyright 2003 by Alex Samuel.  All rights reserved.
+#
+#-----------------------------------------------------------------------
+
+"""Histogram axes."""
+
+#-----------------------------------------------------------------------
+# imports
+#-----------------------------------------------------------------------
+
+from __future__ import generators
+
+import copy
+import math
+
+import numpy as np
+
+#-----------------------------------------------------------------------
+# classes
+#-----------------------------------------------------------------------
+
+class Axis(object):
+    """Base axis class."""
+
+
+    def __init__(self, axis_type='f', **kw_args):
+        if type(axis_type) is not type:
+            raise ValueError, "axis type must be a Python type"
+        self.type = np.dtype(axis_type).type
+        self.__dict__.update(kw_args)
+
+
+    def __repr__(self):
+        if hasattr(self, "range") and self.range is not None:
+            range = ", range=(%r, %r)" % self.range
+        else:
+            range = ""
+        return "Axis(%s%s)" % (self.type, range)
+
+
+
+#-----------------------------------------------------------------------
+
+class BinnedAxis(Axis):
+    """Base for binned axis types."""
+
+    pass
+
+
+
+#-----------------------------------------------------------------------
+
+class EvenlyBinnedAxis(BinnedAxis):
+    """A binned axis with evenly-spaced bins.
+
+    A 'Axis' consists of a mapping from a set of values to bin numbers.
+    The axis spans a range '(lo, hi)' of values of a numerical type.
+    Values must satisfy 'lo <= value < hi'; otherwise, 'value' is
+    considered to be an underflow or overflow.
+
+    The bin numbers are consecutive integers starting at zero.  In
+    addition, the strings "underflow" and "overflow" are valid bin
+    numbers; these correspond to values below and above the acceptable
+    range of values."""
+
+
+    def __init__(self, nbins, range, axis_type='f', **kw_args):
+        """Construct a new axis.
+
+        'nbins' -- The number of bins.
+
+        'range' -- A pair '(lo, hi)', specifying the allowed range of
+        values.
+
+        'axis_type' -- The Python type used to represent values."""
+
+        # FIXME: silent coercion ?
+        axis_type=np.dtype(axis_type)
+        
+        lo, hi = np.asarray(range,dtype=axis_type)
+        # Perform sanity checks on the range.
+        if lo == hi:
+            raise ValueError, "axis range is empty"
+        if lo > hi:
+            lo, hi = hi, lo
+        # If the axis type is integral, the range must be an even
+        # multiple of the number of bins.
+        if (axis_type.char in np.typecodes['AllInteger'] and
+            ((hi - lo) % nbins) != 0):
+            raise ValueError("number of bins must be a divisor of axis range")
+
+        BinnedAxis.__init__(self, axis_type, **kw_args)
+        self.__nbins = int(nbins)
+        self.__range = (lo, hi)
+
+
+    def __repr__(self):
+        result = "EvenlyBinnedAxis(%d, %r" \
+                 % (self.__nbins, self.__range)
+        if hasattr(self, "name"):
+            result += ", name=%r" % self.name
+        if hasattr(self, "units"):
+            result += ", units=%r" % self.units
+        result += ")"
+        return result
+
+
+    nbins = property(lambda self: self.__nbins)
+    """The number of bins on the axis."""
+
+    range = property(lambda self: self.__range)
+    """The range of values spanned by the axis."""
+
+    type = property(lambda self: self.type)
+    """The type used to represent values."""
+        
+
+    def __call__(self, value):
+        """Return the bin number corresponding to 'value'."""
+
+        try:
+            # Convert value to the right type.
+            value = self.type(value)
+        except ValueError:
+            # Conversion failed.
+            raise ValueError(
+                "axis value %s is not of type %s" % (repr(value), self.type)
+                )
+
+        lo, hi = self.__range
+        nbins = self.__nbins
+        # Compute the bin number.
+        bin_nbr = int(math.floor(
+            ((value - lo) * nbins) / (hi - lo)))
+        if bin_nbr < 0:
+            # Underflow bin.
+            return "underflow"
+        elif bin_nbr >= nbins:
+            # Overflow bin.
+            return "overflow"
+        else:
+            # A normal bin.
+            return bin_nbr
+
+
+    def bin_range(self, bin_nbr):
+        """Return the range of values corresponding to a bin.
+
+        'bin_nbr' -- The number of the bin to consider.
+
+        returns -- A pair '(lo, hi)', specifying the range of that bin.
+
+        If 'bin_nbr' is "underflow", returns '(None, axis_lo)', where
+        'axis_lo' is the minimum value of the entire axis.  Similarly,
+        if 'bin_nbr' is "overflow", returns '(axis_hi, None)'."""
+
+        lo, hi = self.__range
+        nbins = self.__nbins
+        
+        if bin_nbr == "underflow":
+            return (None, lo)
+
+        elif 0 <= bin_nbr < nbins:
+            # A regular bin.
+            dbins = (hi - lo) / nbins
+            bin_lo = lo + bin_nbr * dbins
+            bin_hi = bin_lo + dbins
+            return (self.type(bin_lo), self.type(bin_hi))
+
+        elif bin_nbr == "overflow":
+            return (hi, None)
+        
+        else:
+            raise ValueError, "unknown bin number %s" % repr(bin_nbr)
+
+
+    def bin_center(self, bin_nbr):
+        """Return the central value of a bin."""
+
+        if bin_nbr in ("underflow", "overflow"):
+            raise ValueError(bin_nbr)
+        bin_lo, bin_hi = self.bin_range(bin_nbr)
+        return (bin_lo + bin_hi) / 2
+
+
+
+#-----------------------------------------------------------------------
+
+class UnevenlyBinnedAxis(BinnedAxis):
+
+    def __init__(self, edges, axis_type='f', **kw_args):
+        """Construct a new axis.
+
+        'axis_type' -- The Python type used to represent values."""
+
+        # Make sure there are at least two edges specified.
+        if len(edges) < 2:
+            raise ValueError, "'edges' must have two elements"
+        # FIXME: ensure consistent dtypes...
+        axis_type = np.dtype(axis_type)
+        edges = np.asarray(edges, dtype=axis_type)
+        # Make sure they're ascending.
+        edges.sort()
+
+        BinnedAxis.__init__(self, axis_type, **kw_args)
+        self.edges = edges
+
+
+    def __repr__(self):
+        return "UnevenlyBinnedAxis(%s)" % repr(self.edges)
+
+
+    nbins = property(lambda self: len(self.edges) - 1)
+    """The number of bins on the axis."""
+
+    range = property(lambda self:
+                     (self.edges[0], self.edges[-1]))
+    """The range of values spanned by the axis."""
+
+    type = property(lambda self: self.type)
+    """The type used to represent values."""
+        
+
+    def __call__(self, value):
+        """Return the bin number corresponding to 'value'."""
+
+        try:
+            # Convert value to the right type.
+            value = self.type(value)
+        except ValueError:
+            # Conversion failed.
+            raise ValueError("axis value %s is not of type %s" %
+                             (repr(value), self.type))
+
+        if value < self.edges[0]:
+            # Underflow bin.
+            return "underflow"
+        elif value >= self.edges[-1]:
+            # Overflow bin.
+            return "overflow"
+        else:
+            # A normal bin.  Perform binary search to find the bin
+            # number. 
+            return np.searchsorted(self.edges, value)
+
+
+    def bin_range(self, bin_nbr):
+        """Return the range of values corresponding to a bin.
+
+        'bin_nbr' -- The number of the bin to consider.
+
+        returns -- A pair '(lo, hi)', specifying the range of that bin.
+
+        If 'bin_nbr' is "underflow", returns '(None, axis_lo)', where
+        'axis_lo' is the minimum value of the entire axis.  Similarly,
+        if 'bin_nbr' is "overflow", returns '(axis_hi, None)'."""
+
+        if bin_nbr == "underflow":
+            return (None, self.edges[0])
+
+        elif 0 <= bin_nbr < self.nbins:
+            # A regular bin.
+            return (self.edges[bin_nbr],
+                    self.edges[bin_nbr + 1])
+
+        elif bin_nbr == "overflow":
+            return (self.edges[-1], None)
+        
+        else:
+            raise ValueError, "unknown bin number %s" % repr(bin_nbr)
+
+
+    def bin_center(self, bin_nbr):
+        """Return the central value of a bin."""
+
+        if bin_nbr in ("underflow", "overflow"):
+            raise ValueError(bin_nbr)
+        bin_lo, bin_hi = self.bin_range(bin_nbr)
+        return (bin_lo + bin_hi) / 2.
+
+
+
+#-----------------------------------------------------------------------
+# functions
+#-----------------------------------------------------------------------
+
+def AxisIterator(axis, overflows=False, range=None):
+    """Return an iterator over bin numbers for 'axis'.
+
+    The iterator yields bin numbers in ascending order.
+
+    'axis' -- An 'BinnedAxis' object.
+
+    'overflows' -- If true, include underflow and overflow bins."""
+
+    if overflows:
+        yield "underflow"
+    for index in xrange(axis.nbins):
+        if range is not None:
+            # Check that the bin is in range.
+            lo, hi = axis.bin_range(index)
+            if lo < range[0] or hi > range[1]:
+                continue
+        yield index
+    if overflows:
+        yield "overflow"
+
+
+def AxesIterator(axes, overflows=False, range=None):
+    """Return an interator over bin numbers for 'axes'.
+
+    'axes' -- A sequence of 'BinnedAxis' objects.
+
+    'overflows' -- If true, include underflow and overflow bins."""
+
+    if range is None:
+        range = len(axes) * (None, )
+
+    # Handle the zero-dimensional case.
+    if len(axes) == 0:
+        yield ()
+        return
+
+    # Handle the general case recursively.  Get the first axis, and loop
+    # over the rest, prepending each to all indices of the remaining
+    # axes.
+    for first_index in AxisIterator(axes[0], overflows, range[0]):
+        for next in AxesIterator(axes[1 :], overflows, range[1 :]):
+            yield (first_index, ) + next
+
+
+def parse_axis_arg(arg):
+    # Is it already an 'Axis'?
+    if arg is None:
+        return Axis(float)
+    elif isinstance(arg, Axis):
+        # Yes.  Use a copy of it.
+        return copy.copy(arg)
+    else:
+        # No.  Construct one.
+        # How many arguments?
+        if len(arg) >= 1:
+            axis_type = arg[0]
+        else:
+            axis_type = float
+        axis = Axis(axis_type)
+        if len(arg) >= 2:
+            axis.name = arg[1]
+        if len(arg) >= 3:
+            axis.units = arg[2]
+        if len(arg) >= 4:
+            try:
+                lo, hi = arg[3]
+            except TypeError:
+                raise ValueError, "range must be a '(low, high)' pair"
+            if lo is not None:
+                lo = axis_type(lo)
+            if hi is not None:
+                hi = axis_type(hi)
+            axis.range = (lo, hi)
+
+        return axis
+    
+
+def parse_binned_axis_arg(arg):
+    """Parse a binned axis specification from 'arg'.
+
+    If 'arg' is a 'BinnedAxis' object, it is simply returned.
+    Otherwise, it is assumed to be a sequence with format
+
+        '(nbins, range, name, units)'
+
+    where 'range' is a '(lo, hi)' pair, and 'name' and 'units' are
+    optional.   
+
+    returns -- A 'BinnedAxis' object."""
+
+    from copy import copy as deep_copy
+    
+    # Is it already an 'BinnedAxis'?
+    if isinstance(arg, BinnedAxis):
+        # Yes.  Use a copy of it.
+        return deep_copy(arg)
+
+    # Is it some other kind of axis?
+    elif isinstance(arg, Axis):
+        # Oops.  We need a binned axis.
+        raise TypeError, "axis must be binned"
+
+    # Not an axis object; interpret it as a sequence.
+    else:
+        # How many arguments?
+        try:
+            sz = len(arg)
+        except ValueError:
+            sz = None
+        if sz is None or sz not in (2, 3, 4):
+            raise ValueError("axis must have the form "
+                             "'(nbins, range[, name[, units]])'")
+
+        # Unpack the basic stuff.
+        nbins = arg[0]
+        try:
+            lo, hi = arg[1]
+        except TypeError:
+            raise TypeError, "range must be a '(lo, hi)' pair"
+        # Infer the axis type.
+        lo, hi = coerce(lo, hi)
+        axis_type = type(lo)
+
+        # Make the axis.
+        axis = EvenlyBinnedAxis(nbins, (lo, hi), axis_type)
+
+        # If there's another element, it's the axis name.
+        if sz >= 3:
+            axis.name = str(arg[2])
+        # If there's another element, it's the axis units.
+        if length >= 4:
+            axis.units = str(arg[3])
+
+        return axis
+    
+
+def wrap_1d(arg):
+    try:
+        length = len(arg)
+    except TypeError:  # 'len() of unsized object'
+        return (arg, )
+    else:
+        return arg
+
+
+def combine_axes(axis0, axis1):
+    axis = None
+
+    if isinstance(axis0, EvenlyBinnedAxis) \
+       and isinstance(axis1, EvenlyBinnedAxis) \
+       and axis0.range == axis1.range:
+        # Two evenly-binned histograms with the same range.  Do they
+        # have compatible binning?
+        if axis0.nbins % axis1.nbins == 0:
+            axis = copy.copy(axis0)
+        elif axis1.nbins % axis0.nbins == 0:
+            axis = copy.copy(axis1)
+
+    elif isinstance(axis0, BinnedAxis) \
+         and not isinstance(axis1, BinnedAxis) \
+         and (not hasattr(axis1, "range")
+              or axis1.range is None
+              or isIntervalSubset(axis1.range, axis0.range)):
+        # Only 'axis0' is binned, and either 'axis1' has no range or its
+        # range is contained in that of 'axis0'.
+        axis = copy.copy(axis0)
+
+    elif isinstance(axis1, BinnedAxis) \
+         and not isinstance(axis0, BinnedAxis) \
+         and (not hasattr(axis0, "range")
+              or axis0.range is None 
+              or isIntervalSubset(axis0.range, axis1.range)):
+        # Only 'axis1' is binned, and either 'axis0' has no range or its
+        # range is contained in that of 'axis1'.
+        axis = copy.copy(axis1)
+
+    # Have we failed to combine the axes so far?
+    if axis is None:
+        # Construct a lowest-common-denominator unbinned axis.
+        axis_type = coerceTypes(axis0.type, axis1.type)
+        if hasattr(axis0, "range") and hasattr(axis1, "range"):
+            range = intervalUnion(axis0.range, axis1.range)
+        elif hasattr(axis0, "range"):
+            range = axis0.range
+        elif hasattr(axis1, "range"):
+            range = axis1.range
+        else:
+            range = None
+        axis = Axis(axis_type)
+        # Combine other attributes, giving 'axis0' precedence.
+        axis.__dict__.update(axis1.__dict__)
+        axis.__dict__.update(axis0.__dict__)
+        # Set the range.
+        axis.range = range
+
+    return axis
+
+
+def combine_axis_list(axes):
+    if len(axes) == 0:
+        axis = Axis(int, range=(0, 1))
+    elif len(axes) == 1:
+        axis = copy.copy(axes[0])
+    else:
+        axis = copy.copy(axes[0])
+        for next in axes[1 :]:
+            axis = combineAxes(axis, next)
+
+    if not hasattr(axis, "range"):
+        axis.range = (0, 1)
+    range = axis.range
+
+    # Is the range degenerate?
+    if range is None:
+        axis.range = (0, 1)
+    elif range[0] == range[1]:
+        assert not isinstance(axis, BinnedAxis)
+        # Yes.  Is it zero?
+        if range[0] == 0:
+            # Just make something up.
+            axis.range = (0, 1)
+        else:
+            # Adjust the range to an interval around the value.
+            axis.range = (range[0] - abs(range[0]) / 2,
+                          range[0] + abs(range[0]) / 2)
+
+    return axis
+
+

File pyhep-purepython/hep/hist/fit.py

+#-----------------------------------------------------------------------
+#
+# fit.py
+#
+# Copyright (C) 2004 by Alex Samuel.  All rights reserved.
+#
+#-----------------------------------------------------------------------
+
+#-----------------------------------------------------------------------
+# imports
+#-----------------------------------------------------------------------
+
+from   hep.bool import *
+import hep.expr
+import hep.expr.op
+import hep.hist
+from   math import hypot, sqrt
+import sys
+
+#-----------------------------------------------------------------------
+# classes
+#-----------------------------------------------------------------------
+
+# FIXME: 'NaiveChiSquare' and 'naiveChiSquareFit' are deprecated, and
+# are only left here only until another multidimensional fitter is
+# available.
+
+class NaiveChiSquare:
+
+    def __init__(self, function, sample_vars, samples):
+        self.function = function
+        self.sample_vars = tuple(sample_vars)
+        self.samples = samples
+
+
+    def __call__(self, **parameters):
+        symbols = dict(parameters)
+
+        result = 0
+        for x, y, e in self.samples:
+            # FIXME:
+            if e == 0:
+                continue
+            for var, value in zip(self.sample_vars, x):
+                symbols[var] = value
+            f = self.function(**symbols)
+            result += ((f - y) / e) ** 2
+
+        return result
+
+
+
+def naiveChiSquareFit(histogram, function, variable_names, parameters):
+    import hep.cernlib.minuit
+
+    function = hep.expr.asExpression(function)
+    samples = [ (hep.hist.getBinCenter(histogram.axes, bin),
+                 histogram.getBinContent(bin),
+                 histogram.getBinError(bin))
+                for bin in hep.hist.AxesIterator(histogram.axes) ]
+    chi_sq = NaiveChiSquare(function, variable_names, samples)
+    result = hep.cernlib.minuit.minimize(chi_sq, *parameters)
+    result.expression = hep.expr.substitute(function, **result.values)
+    return result
+
+
+#-----------------------------------------------------------------------
+
+class ChiSquare1D:
+    """Function object to compute one-dimensional chi-square."""
+
+    def __init__(self, function, sample_var, samples):
+        self.function = function
+        self.sample_var = sample_var
+        self.samples = samples
+        self.warned = False
+
+
+    def __call__(self, **parameters):
+        # FIXME: We could save some evaluations on adjacent bins here.
+        def eval(x, sample_var=self.sample_var, function=self.function):
+            parameters[sample_var] = x
+            return function(**parameters)
+
+        # Substitute parameters.
+        function = hep.expr.substitute(self.function, **parameters)
+        # Compile the function, using 'float' as the type for the
+        # independent variable.
+        function = hep.expr.compile(function, float)
+
+        eval = lambda v: function.evaluate({ self.sample_var: v })
+
+        chi_sq = 0
+        for (x0, x1), y in self.samples:
+            # We can't handle non-positive bin values, since we can't
+            # compute gaussian errors for them.
+            if y <= 0:
+                if not self.warned:
+                    print >> sys.stderr, \
+                      "Warning: non-positive bin values ignored in chi-square"
+                    self.warned = True
+                continue
+            # Do Simpson's rule integration.
+            f = (x1 - x0) * \
+                (eval(x0) + 4 * eval((x0 + x1) / 2) + eval(x1)) / 6
+            # Update the chi-square.
+            chi_sq += ((f - y) ** 2) / y
+
+        return chi_sq
+
+
+
+def chiSquareFit1D(histogram, function, variable_name, parameters,
+                   min_count=7, range=None):
+    """Perform a one-dimensional chi-square fit of a PDF to a histogram.
+    
+    Performs a chi-square fit of PDF 'function' to a one-dimensional
+    histogram.  The chi-square is computed using Simpson's rule
+    (three-point gaussian quadrature) to integrate the PDF for
+    comparison with histogram bins.  Bin errors in 'histogram' are
+    ignored; 1/sqrt(N) gaussian couning errors are always used.
+    Therefore, histogram contents should be positive integers (though
+    zero and small bins are handled; see the 'min_count' parameter
+    below).
+
+    'histogram' -- The histogram to fit to.
+
+    'function' -- The PDF.  May be an 'Expression' object or a
+    callable.
+
+    'variable_name' -- The name of the variable in 'function'
+    corresponding to the dependent quantity in 'histogram' (i.e. the
+    quantity along the histogram axis).
+
+    'parameters' -- A sequence of parameters to fit.  Each may be simply
+    a variable name in 'function', or a sequence '(name, initial_value,
+    step_size, bound_low, bound_hi)'.
+
+    'min_count' -- The minimum value for bins in the fit.  Bins which
+    contain a smaller value than this are coalesced with neighboring
+    bins until all bins contain at least this value.
+
+    returns -- A 'Result' object."""
+
+    # We'll use this for minimization.
+    import hep.cernlib.minuit
+
+    axis = histogram.axis
+    function = hep.expr.asExpression(function)
+
+    # Construct the list of bin samples, coalescing bins as necessary. 
+    samples = []
+    # Figure out which bins are included in the fit.
+    if range is None:
+        bins = hep.hist.AxisIterator(axis)
+    else:
+        lo, hi = range
+        bins = xrange(axis(lo), axis(hi))
+    # Loop over bins.
+    x0 = None
+    for bin in bins:
+        range = axis.getBinRange(bin)
+        # Are we carrying a bin whose contents don't satisfy the minimum
+        # so far? 
+        if x0 is None:
+            # No.  Start a new bin
+            x0, x1 = range
+            value = histogram.getBinContent(bin)
+        else:
+            # Yes.  Make sure we are adjacent.
+            assert x1 == range[0]
+            # Coalesce this bin with the carried one.
+            x1 = range[1]
+            value += histogram.getBinContent(bin)
+        # Doe the bin we're carrying have enough in it?
+        if value >= min_count:
+            # Yes it does.  Use it as a sample.
+            samples.append(((x0, x1), value, ))
+            x0 = None
+
+    # If we're still carrying a bin, it must not have enough in it.  But
+    # we have to do something with it.
+    if x0 is not None:
+        # Are there any samples at all so far?
+        if len(samples) > 0:
+            # Yes there are.  Coalesce the bin we're carrying with the
+            # last one. 
+            (last_x0, last_x1), last_value = samples.pop(-1)
+            assert last_x1 == x0
+            samples.append(((last_x0, x1), last_value + value, ))
+        else:
+            # No samples.  The whole histogram doesn't have enough in it
+            # to satisfy the minimum criterion.  Oh well, just use one
+            # sample. 
+            samples.append((x0, x1), value, )
+
+    # Construct the chi-square function.
+    chi_sq = ChiSquare1D(function, variable_name, samples)
+    # Minimize it.
+    result = hep.cernlib.minuit.minimize(chi_sq, parameters)
+
+    # Store the number of degrees of freedom.
+    result.degrees_of_freedom = len(samples) - len(parameters)
+    # Compute the chi-square probability.
+    result.chi_square_probability = \
+        hep.num.chiSquareCDF(result.minimum, result.degrees_of_freedom)
+    # Substitute the fit parameter values into 'function' and store
+    # that. 
+    result.expression = hep.expr.substitute(function, **result.values)
+
+    # Construct a function suitable for plotting.  Scale the PDF by the
+    # bin size, for sensible comparison to the histogram.
+    bin_size = (axis.range[1] - axis.range[0]) / axis.number_of_bins
+    scaled_expr = hep.expr.Multiply(
+        result.expression, hep.expr.Constant(bin_size))
+    notes = "Results of fit to '%s'\n" % str(function)
+    notes += "   $\chi^2$ / DOF = %.2f / %d = %.2f\n" \
+             % (result.minimum, result.degrees_of_freedom,
+                result.minimum / result.degrees_of_freedom)
+    notes += "   $P_{\chi^2}$ = %.3f\n" % result.chi_square_probability
+    for name, value in result.values.items():
+        notes += "   %s = %.4f � %.4f\n" \
+                 % (name, value, result.errors[name])
+    result.function = hep.hist.Function1D(
+        scaled_expr, arg_name=variable_name,
+        axis=hep.hist.Axis(axis.type, range=axis.range), notes=notes)
+
+    return result
+

File pyhep-purepython/hep/hist/function.py

+#-----------------------------------------------------------------------
+#
+# function.py
+#
+# Copyright (C) 2004 by Alex Samuel.  All rights reserved.
+#
+#-----------------------------------------------------------------------
+
+"""Parameterized and decorated functions for fitting and plotting.
+
+A function object represents a single-values function of one or many
+arguments.  A function may also be specified using fixed parameters,
+which are not considered to be among the formal arguments.  The
+function's arguments are specified by axis objects, similarly to
+histogram objects. """
+
+#-----------------------------------------------------------------------
+# imports
+#-----------------------------------------------------------------------
+
+from __future__ import division
+
+from   axis import *
+from   hep import popkey
+import hep.expr
+import hep.ext
+import hep.num
+import sys
+
+#-----------------------------------------------------------------------
+# classes
+#-----------------------------------------------------------------------
+
+class _Function:
+    """A single-valued function of multiple arguments."""
+
+    def __init__(self, axes, expr, arg_names, parameters):
+        if len(axes) != len(arg_names):
+            raise ValueError, \
+                  "the number of axes and argument names must be the same"
+        self.axes = axes
+        self.formula = str(expr)
+        self.expr = hep.expr.asExpression(expr)
+        self.arg_names = arg_names
+        self.parameters = parameters
+
+        self.__compile()
+
+
+    def __repr__(self):
+        return "Function(axes=%r, expr=%r, arg_names=%r, " \
+               "parameters=%r)" \
+               % (self.axes, self.formula, self.arg_names,
+                  self.parameters)
+
+
+    dimensions = property(lambda self: len(self.axes))
+
+
+    # FIXME: Find the expr type correctly, using the argument and
+    # parameter types.
+    type = property(lambda self: self.expr.type)
+
+
+    def __getstate__(self):
+        state = dict(self.__dict__)
+        del state["compiled_expr"]
+        return state
+
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.__compile()
+
+
+    def __compile(self):
+        """Construct a compiled version of 'expr'."""
+
+        # Infer parameter types from their values.
+        parameter_types = dict(
+            [ (n, type(v)) for (n, v) in self.parameters.items() ])
+        # Compile the expression.  Free symbols are taken as 'float'.
+        self.compiled_expr = hep.expr.compile(
+            self.expr, float, **parameter_types)
+
+
+    def __call__(self, args):
+        """Evaluate the function.
+
+        'args' -- A sequence of argument values.  Each item in the
+        sequence is the coordinate value along the corresponding axis.
+
+        returns -- The result of evaluating the function.  If the
+        function has no value at the specified 'args', for instance if
+        one of the coordinate values is out of range of its axis,
+        returns 'None'."""
+
+        if len(args) != len(self.arg_names):
+            raise TypeError, \
+                  "function takes %d arguments" % len(self.arg_names)
+        
+        # Build the symbol table for evaluation.  Start with fixed
+        # parameters. 
+        symbols = dict(self.parameters)
+        # Now the arguments.  
+        for i in xrange(len(self.axes)):
+            axis = self.axes[i]
+            arg_name = self.arg_names[i]
+            arg = args[i]
+            # Get the axis range, if any.
+            lo, hi = getattr(axis, "range", (None, None))
+            # If a coordinate value is outside the axis range, the
+            # function has no value.
+            if (lo is not None and arg < lo) \
+               or (hi is not None and arg >= hi):
+                return None
+            # Store the argument value for this symbol.
+            symbols[arg_name] = axis.type(arg)
+        # Evaluate the expression.
+        try:
+            return self.compiled_expr.evaluate(symbols)
+        except (ZeroDivisionError, FloatingPointError, OverflowError,