Source code for cf.functions

import numpy
import resource
import copy
import cPickle
from os      import getpid, listdir, mkdir
from os.path import isfile
from os.path import abspath as os_path_abspath
from itertools   import product as itertools_product
from collections import Iterable

from .constants import CONSTANTS, _file_to_fh

def FM_THRESHOLD(*new_minncfm):
    '''

Return or set the minimum amount of memory to be kept free as a
temporary work space.

The amount is returned as a number of kibibytes of memory, but set as
a number of chunks.

:Parameters:

    new_minncfm : int, optional
        The number of chunks to be kept free as a temporary work
        space.

:Returns:

    out : float or None    
        If new_minncfm was not set then return the existing temporary
        work space size in kibibytes, otherwise return `None`.

**Examples**

>>> cf.FM_THRESHOLD()
1024000.0
>>> cf.FM_THRESHOLD(20)
>>> cf.FM_THRESHOLD()
2048000.0

'''
    if not new_minncfm:
        return CONSTANTS['FM_THRESHOLD']
    
    minncfm = new_minncfm[0]
    CONSTANTS['MINNCFM']      = minncfm
    CONSTANTS['FM_THRESHOLD'] = minncfm * CONSTANTS['CHUNKSIZE']/1024.0
#--- End: def

def MINNCFM(*new_minncfm):
    '''

Return or set the number of chunks of memory to be kept free as a
temporary work space.

:Parameters:

    new_minncfm : int, optional
        The number of chunks to be kept free as a temporary work
        space.

:Returns:

    out : int or None    
        If new_minncfm was not set then return the existing number of
        chunks, otherwise return `None`.

**Examples**

>>> cf.MINNCFM()
10
>>> cf.MINNCFM(20)
>>> cf.MINNCFM()
20

'''
    if not new_minncfm:
        return CONSTANTS['MINNCFM']

    FM_THRESHOLD(new_minncfm[0])
#--- End: def

[docs]def TEMPDIR(*new_tempdir): ''' Return or set the directory for internally generated temporary files. When setting the directory, it is created if it specified path does not exist. :Parameters: new_tempdir : str, optional The new directory for temporary files. :Returns: out : str or None If `new_tempdir` was not set then return the existing temporary files' directory, otherwise return `None`. **Examples** >>> cf.TEMPDIR() '/tmp' >>> cf.TEMPDIR('/home/me/tmp') >>> cf.TEMPDIR() '/home/me/tmp' ''' if not new_tempdir: return CONSTANTS['TEMPDIR'] # Create the directory if it does not exist. try: mkdir(new_tempdir[0]) except OSError: pass CONSTANTS['TEMPDIR'] = new_tempdir[0] #--- End: def
def CHUNKSIZE(*new_chunksize): ''' Return or set the chunk size for data storage and processing. When setting the chunk size, the amount of minimum amount of memory to be kept free as a temporary work space is also updated. :Parameters: new_chunksize : int, optional The new chunk size in bytes. :Returns: out : int or None If new_chunksize was not set then return the existing chunk size in bytes, otherwise return `None`. **Examples** >>> cf.CHUNKSIZE() 104857600 >>> cf.CHUNKSIZE(2**30) >>> cf.CHUNKSIZE() 1073741824 ''' if not new_chunksize: return CONSTANTS['CHUNKSIZE'] CONSTANTS['CHUNKSIZE'] = new_chunksize[0] FM_THRESHOLD(CONSTANTS['MINNCFM']) #--- End: def def OF_FRACTION(*of_fraction): ''' Return or set the number of concurrently open files above which files containing data arrays may be automatically closed. Expressed as a fraction of the maximum possible number of concurrently open files. Note that closed files will be automatically reopened if subsequently needed by a variable to access its data array. :Parameters: of_fraction : float, optional The new fraction (between 0.0 and 1.0). :Returns: out : float or None If `of_fraction` was not set return the existing value, otherwise return `None`. **Examples** >>> cf.OF_FRACTION() 0.5 >>> cf.OF_FRACTION(0.75) >>> cf.OF_FRACTION() 0.75 The fraction may be translated to an actual number of files as follows: >>> import resource >>> max_open_files = resource.getrlimit(resource.RLIMIT_NOFILE)[0] >>> threshold = int(cf.OF_FRACTION() * max_open_files) >>> max_open_files, threshold (1024, 768) ''' if of_fraction: CONSTANTS['OF_FRACTION'] = of_fraction[0] else: return CONSTANTS['OF_FRACTION'] #--- End: def def dump(x, **kwargs): ''' Print a description of an object to stdout. If the object has a `dump` method then this is used to create the output. In this case the arguments are passed to the `dump` method. Otherwise the arguments are ignored and ``str(x)`` is printed. :Parameters: x : object, optional The object to print. kwargs : :Returns: None **Examples** >>> cf.dump(x) >>> cf.dump(f, id='field2') ''' if hasattr(x, 'dump') and callable(x.dump): print x.dump(**kwargs) else: print x #--- End: def def iterindices(location): ''' Return an iterator over indices ... :Parameters: location: sequence :Returns: out : generator An iterator over the element's indices **Examples** >>> for index in iterindices(([1,3], [0, 1], [3, 6])): ... print index ... (1, 0, 3) (1, 0, 4) (1, 0, 5) (2, 0, 3) (2, 0, 4) (2, 0, 5) >>> for index in iterindices([]): ... print index ... () >>> for index in iterindices([(0, n) for n in [2, 2]]) ... print index ... (0, 0) (0, 1) (1, 0) (1, 1) ''' if hasattr(location, 'shape'): indices = [xrange(*r) for r in [(0, n) for n in location.shape]] else: indices = [xrange(*r) for r in location] for index in itertools_product(*indices): yield index #--- End: def _fd_dir = '/proc/'+str(getpid())+'/fd' # LINUX DEPENDENCY _max_number_of_open_files = resource.getrlimit(resource.RLIMIT_NOFILE)[0] def open_files_threshold_exceeded(): ''' Return True if the number of open files is greater than the current threshold. The threshold is determined by as a fraction of the maximum possible number of concurrently open files (an operating system dependent amount). The fraction is retrieved and set with the `cf.OF_FRACTION` function. :Returns: out : bool Whether or not the number of open files exceeds the threshold. **Examples** In this example, the number of open files is equivalent to 75% of the maximum possible number of concurrently open files: >>> cf.OF_FRACTION() 0.5 >>> print cf.open_files_threshold_exceeded() True >>> cf.OF_FRACTION(0.9) >>> print cf.open_files_threshold_exceeded() False ''' # LINUX DEPENDENCY (_fd_dir is OS dependent) return len(listdir(_fd_dir)) > _max_number_of_open_files * OF_FRACTION() #---End: def def close_files(file_format=None): ''' Close open files containing data arrays. By default all files are closed, but this may be restricted to files of a particular format with the `file_format` parameter. Note that closed files will be automatically reopened if subsequently needed by a variable to access its data array. If there are no appropiate open files then no action is taken. :Parameters: file_format : str, optional Only close files of the given format. Recognised formats are 'netCDF' and 'PP'. By default files of any format are closed. :Returns: None **Examples** >>> cf.close_files() >>> cf.close_files('netCDF') >>> cf.close_files('PP') ''' if file_format is not None: if file_format in _file_to_fh: for fh in _file_to_fh[file_format].itervalues(): fh.close() _file_to_fh[file_format].clear() else: for file_format, value in _file_to_fh.iteritems(): for fh in value.itervalues(): fh.close() _file_to_fh[file_format].clear() #---End: def def close_one_file(file_format=None): ''' Close an arbitrary open file containing data arrays. By default a file of arbitrary format is closed, but the choice may be restricted to files of a particular format with the `file_format` parameter. Note that the closed file will be automatically reopened if subsequently needed by a variable to access its data array. If there are no appropiate open files then no action is taken. :Parameters: file_format : str, optional Only close a file of the given format. Recognised formats are 'netCDF' and 'PP'. By default a file of any format is closed. :Returns: None **Examples** >>> cf.close_one_file() >>> cf.close_one_file('netCDF') >>> cf.close_one_file('PP') >>> cf.open_files() {'netCDF': {'file1.nc': <netCDF4.Dataset at 0x181bcd0>, 'file2.nc': <netCDF4.Dataset at 0x1e42350>, 'file3.nc': <netCDF4.Dataset at 0x1d185e9>}} >>> cf.close_one_file() >>> cf.open_files() {'netCDF': {'file1.nc': <netCDF4.Dataset at 0x181bcd0>, 'file3.nc': <netCDF4.Dataset at 0x1d185e9>}} ''' if file_format is not None: if file_format in _file_to_fh and _file_to_fh[file_format]: filename, fh = next(_file_to_fh[file_format].iteritems()) fh.close() del _file_to_fh[file_format][filename] else: for values in _file_to_fh.itervalues(): if not values: continue filename, fh = next(values.iteritems()) fh.close() del values[filename] return #---End: def def open_files(file_format=None): ''' Return the open files containing data arrays. By default all such files are returned, but the selection may be restricted to files of a particular format with the `file_format` parameter. :Parameters: file_format : str, optional Only return files of the given format. Recognised formats are 'netCDF' and 'PP'. By default all files are returned. :Returns: out : dict A dictionary of file names and their open file objects. **Examples** >>> cf.open_files() {'netCDF': {'file1.nc': <netCDF4.Dataset at 0x187b6d0>}} >>> cf.open_files('netCDF') {'file1.nc': <netCDF4.Dataset at 0x187b6d0>} >>> cf.open_files('PP') {} ''' if file_format is not None: if file_format in _file_to_fh: return _file_to_fh[file_format].copy() else: return {} else: out = {} for file_format, values in _file_to_fh.iteritems(): out[file_format] = values.copy() return out #---End: def def ufunc(func_name, x, *args, **kwargs): x = x.copy() getattr(x, func_name)(*args, **kwargs) return x def cos(x): ''' ''' return ufunc('cos', x) #--- End: if def sin(x): ''' Take the trigonometric sine of the data. Units are accounted for in the calculation. For example, the the sine of 90 degrees_east is 1.0, as is the sine of 1.57079632 radians. If the units are not equivalent to radians (such as Kelvin) then they are treated as if they were radians. The Units are changed to '1' (nondimensionsal). :Parameters: x : :Returns: out : ''' return ufunc('sin', x) #--- End: if def tan(x): ''' ''' return ufunc('tan', x) #--- End: if def clip(x, a_min, a_max, units=None): ''' Clip (limit) the values in the data array(s) of the input variable. Given an interval, values outside the interval are clipped to the interval edges. Parameters : a_min : scalar a_max : scalar units : str or Units :Returns: out : **Examples** ''' return ufunc('clip', x, a_min, a_max, units=units) #--- End: if def _allclose(a, b, rtol=None, atol=None): ''' True if two arrays have the same shape and elements to within numerical tolerance, False otherwise. The tolerance values are positive, typically very small numbers. The relative difference (`rtol` * abs(`b`)) and the absolute difference `atol` are added together to compare against the absolute difference between `a` and `b`. :Parameters: a, b : array_like Input arrays to compare. atol : float, optional The absolute tolerance for all numerical comparisons, By default the value returned by the `ATOL` function is used. rtol : float, optional The relative tolerance for all numerical comparisons, By default the value returned by the `RTOL` function is used. :Returns: out : bool Returns True if the arrays are equal. **Examples** >>> cf._allclose([1, 2], [1, 2]) True >>> cf._allclose(np.array([1, 2]), np.array([1, 2])) True >>> cf._allclose([1, 2], [1, 2, 3]) False >>> cf._allclose([1, 2], [1, 4]) False ''' try: return numpy.ma.allclose(a, b, rtol=rtol, atol=atol) except (IndexError, NotImplementedError): return numpy.ma.all(a == b) #--- End: def def parse_indices(data, indices): ''' ''' parsed_indices = [] if not isinstance(indices, tuple): indices = (indices,) # Initialize the list of parsed indices as the input indices with any # Ellipsis objects expanded length, dims = len(indices), len(data.shape) for index in indices: if index is Ellipsis: parsed_indices.extend([slice(None)] * (dims-length+1)) length = len(parsed_indices) else: parsed_indices.append(index) #--- End: for ndim = data.ndim if ndim and len(parsed_indices) > ndim: raise IndexError("Invalid indices %s for array with shape %s" % (parsed_indices, data.shape)) if len(parsed_indices) < ndim: parsed_indices.extend([slice(None)]*(ndim-len(parsed_indices))) if not ndim and parsed_indices: # If data is scalar then allow it to be indexed with an # equivalent to [0] if (len(parsed_indices) == 1 and parsed_indices[0] in (0, -1, slice(0,1), slice(-1,None,-1), slice(None, None, None))): parsed_indices = [] else: raise IndexError( "Scalar array can only be indexed with (), Ellipsis or an equivalent to 0") #--- End: if for i, (index, size) in enumerate(zip(parsed_indices, data.shape)): if isinstance(index, slice): start, stop, step = index.indices(size) if (start == stop or (start < stop and step < 0) or (start > stop and step > 0)): raise IndexError("Invalid indices %s for array with shape %s" % (parsed_indices, data.shape)) if step < 0 and stop < 0: stop = None index = slice(start, stop, step) elif isinstance(index, (int, long)): if index < 0: index += size index = slice(index, index+1, 1) else: if getattr(getattr(index, 'dtype', None), 'kind', None) == 'b': # Convert booleans to +ve integers index = list(numpy.where(index)[0]) else: # Convert negative integers to non-negative integers index = [(x+size if x<0 else x) for x in index] if len(index) == 1: # Convert a single element list to a slice object index = index[0] index = slice(index, index+1, 1) else: # Try to find a slice object equivalent to the list step = index[1] - index[0] if step > 0: start, stop = index[0], index[-1]+1 elif step < 0: start, stop = index[0], index[-1]-1 if index == range(start, stop, step): # Replace the list with a slice object if stop < 0: stop = None index = slice(start, stop, step) #--- End: if #--- End: if parsed_indices[i] = index #--- End: for return parsed_indices #--- End: def def subspace_array(array, indices): ''' Subset the input numpy array with the given indices. Indexing is similar to that of a numpy array. The differences to numpy array indexing are: 1. An integer index i takes the i-th element but does not reduce the rank of the output array by one. 2. When more than one dimension's slice is a 1-d boolean array or 1-d sequence of integers then these indices work independently along each dimension (similar to the way vector subscripts work in Fortran). indices must contain an index for each dimension of the input array. ''' gg = [] for i, x in enumerate(indices): if not isinstance(x, slice): gg.append(i) #--- End: for len_gg = len(gg) if len_gg > 1: # Slice the dimensions one at a time indices = list(indices) for axis in gg: array = numpy.ma.take(array, indices[axis], axis=axis) indices[axis] = slice(None) if len_gg < len(indices): array = array[tuple(indices)] else: # Slice all dimensions at the same time array = array[tuple(indices)] return array #--- End: def def ATOL(*atol): ''' Return or set the value of absolute tolerance for testing numerically tolerant equality. :Parameters: atol : int, optional The new value of absolute tolerance. :Returns: out : float or None If `atol` was not set return the existing value of absolute tolerance, otherwise return `None`. **Examples** >>> cf.ATOL() 1e-08 >>> cf.ATOL(1e-10) >>> cf.ATOL() 1e-10 ''' if atol: CONSTANTS['ATOL'] = atol[0] else: return CONSTANTS['ATOL'] #--- End: def def RTOL(*rtol): ''' Return or set the default value of relative tolerance for testing numerically tolerant equality. :Parameters: rtol : int, optional The new value of relative tolerance. :Returns: out : float or None If `rtol` was not set return the existing value of relative tolerance, otherwise return `None`. **Examples** >>> cf.RTOL() 1.0000000000000001e-05 >>> cf.RTOL(1e-10) >>> cf.RTOL() 1e-10 ''' if rtol: CONSTANTS['RTOL'] = rtol[0] else: return CONSTANTS['RTOL'] #--- End: def def equals(x, y, rtol=None, atol=None, traceback=False): ''' True if two objects are logically equal, False otherwise. If the first argument, `x`, has an `equals` method then it is used, and in this case ``equals(x, y)`` is equivalent to ``x.equals(y)``. :Parameters: x, y : The objects to compare for equality. atol : float, optional The absolute tolerance for all numerical comparisons, By default the value returned by the `ATOL` function is used. rtol : float, optional The relative tolerance for all numerical comparisons, By default the value returned by the `RTOL` function is used. traceback : bool, optional If True then print a traceback highlighting where the two objects differ. :Returns: out : bool Whether or not the two objects are equal. **Examples** >>> x <CF Field: rain(10,20)> >>> cf.equals(x,x) True >>> cf.equals(1.0, 1.0) True >>> cf.equals(1.0, 33) False >>> cf.equals('a', 'a') True >>> cf.equals('a', 'b') False >>> type(x), x.dtype (<type 'numpy.ndarray'>, dtype('int64')) >>> y=x.copy() >>> cf.equals(x, y) True >>> cf.equals(x, x+1) False >>> class A(object): pass >>> a=A() >>> b=A() >>> cf.equals(a, a) True >>> cf.equals(a, b) False ''' if rtol is None: rtol = RTOL() if atol is None: atol = ATOL() if hasattr(x, 'equals') and callable(x.equals): # x has a callable equals method return x.equals(y, rtol=rtol, atol=atol, traceback=traceback) else: if hasattr(x, '__iter__') or hasattr(y, '__iter__'): # x or y is a sequence if not isinstance(x, numpy.ndarray): x = numpy.ma.asarray(x) if not isinstance(y, numpy.ndarray): y = numpy.ma.asarray(y) if x.shape != y.shape: return False return _allclose(x, y, rtol=rtol, atol=atol) else: try: # x and y are both numbers return abs(x-y) <= atol + rtol*abs(y) except TypeError: # At least one of x and y is not a number return x == y #--- End: if #--- End: if #--- End: def def flat(x): ''' An iterator over an arbitrarily nested sequence. :Parameters: x : arbitrarily nested sequence or scalar The arbitrarily nested sequence to be flattened. Note that a Field instance behaves like a single element field list, and so is a valid argument. If `x` is a scalar then this is equivalent to passing a single element sequence containing the scalar. :Returns: out : generator An iterator over flattened sequence. **Examples** >>> for a in cf.flat([1, [2, [3, 4]]]): ... print a, 1 2 3 4 >>> for a in cf.flat(['a', ['bc', ['def', 'ghij']]]): ... print a, a bc def ghij >>> for a in cf.flat(2004): ... print a, 2004 >>> for a in cf.flat('abcdefghij'): ... print a, abcdefghij >>> f <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))> >>> for a in cf.flat(f): ... print a <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))> >>> for a in cf.flat([f, [f, [f, f]]]): ... print a <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))> <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))> <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))> <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))> >>> fl = cf.FieldList(cf.flat([f, [f, [f, f]]]) >>> fl [<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>, <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>, <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>, <CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>] ''' if isinstance(x, basestring) or not isinstance(x, Iterable): x = (x,) for a in x: if not isinstance(a, basestring) and isinstance(a, Iterable): for sub in flat(a): yield sub else: yield a #--- End: def def pickle(x, filename, overwrite=False): ''' Write a binary pickled representation of an object to a file. Note that Field and FieldList objects are picklable and their pickle file size will be very small if their data arrays contain file pointers as opposed to numpy arrays. :Parameters: x : The object to be pickled. filename : str The name of the file in which to write the pickled representation of `x`. overwrite: bool, optional If True a pre-existing output file is over written. By default an exception is raised if the output file pre-exists. :Returns: None :Raises: IOError : If `overwrite` is False and the output file pre-exists. PickleError : If the object is not picklable. **Examples** For any picklable object, x: >>> cf.pickle(x, 'file.cfpkl') >>> y = cf.unpickle('file.cfpkl') >>> cf.equals(x, y) True ''' if not overwrite and isfile(filename): raise IOError( "Can't pickle to an existing file unless overwrite=True") fh = open(filename, 'wb') try: cPickle.dump(x, fh, 2) except: fh.close() raise cPickle.PickleError("Failed whilst pickling %s" % repr(x)) fh.close() #--- End: def
[docs]def unpickle(filename): ''' Return the reconstituted (unpickled) object from a binary pickle file. Any binary pickle file may be used as input. :Parameters: filename : str The name of the file containing the pickled object. :Returns: out : The reconstituted object. :Raises: UnpicklingError : If the file can not be unpickled. In particular, this might be raised when attempting to unpickle fields which were pickled with a different, incompatible version of cf. **Examples** For any picklable object, x: >>> cf.pickle(x, 'file.cfpkl') >>> y = cf.unpickle('file.cfpkl') >>> cf.equals(x, y) True ''' fh = open(filename, 'rb') try: x = cPickle.load(fh) except: # Failed unpickling can throw up any type of error, so trap # them all, but raise an informative UnpicklingError. fh.close() raise cPickle.UnpicklingError( "Failed whilst unpickling file '%s'" % filename) fh.close() return x #--- End: def
_d = {'char': numpy.dtype('S1')} def string_to_numpy_data_type(string): ''' ''' try: return numpy.dtype(string) except TypeError: try: return _d[string] except KeyError: raise TypeError("asdasd kkasdhahsjj734654376") #--- End: def def abspath(filename): ''' ''' if filename.startswith('http://'): return filename return os_path_abspath(filename) #--- End: def