import numpy
import resource
import copy
import cPickle
from os import getpid, listdir, mkdir
from os.path import isfile
from os.path import abspath as os_path_abspath
from itertools import product as itertools_product
from collections import Iterable
from .constants import CONSTANTS, _file_to_fh
def FM_THRESHOLD(*new_minncfm):
'''
Return or set the minimum amount of memory to be kept free as a
temporary work space.
The amount is returned as a number of kibibytes of memory, but set as
a number of chunks.
:Parameters:
new_minncfm : int, optional
The number of chunks to be kept free as a temporary work
space.
:Returns:
out : float or None
If new_minncfm was not set then return the existing temporary
work space size in kibibytes, otherwise return `None`.
**Examples**
>>> cf.FM_THRESHOLD()
1024000.0
>>> cf.FM_THRESHOLD(20)
>>> cf.FM_THRESHOLD()
2048000.0
'''
if not new_minncfm:
return CONSTANTS['FM_THRESHOLD']
minncfm = new_minncfm[0]
CONSTANTS['MINNCFM'] = minncfm
CONSTANTS['FM_THRESHOLD'] = minncfm * CONSTANTS['CHUNKSIZE']/1024.0
#--- End: def
def MINNCFM(*new_minncfm):
'''
Return or set the number of chunks of memory to be kept free as a
temporary work space.
:Parameters:
new_minncfm : int, optional
The number of chunks to be kept free as a temporary work
space.
:Returns:
out : int or None
If new_minncfm was not set then return the existing number of
chunks, otherwise return `None`.
**Examples**
>>> cf.MINNCFM()
10
>>> cf.MINNCFM(20)
>>> cf.MINNCFM()
20
'''
if not new_minncfm:
return CONSTANTS['MINNCFM']
FM_THRESHOLD(new_minncfm[0])
#--- End: def
[docs]def TEMPDIR(*new_tempdir):
'''
Return or set the directory for internally generated temporary files.
When setting the directory, it is created if it specified path does
not exist.
:Parameters:
new_tempdir : str, optional
The new directory for temporary files.
:Returns:
out : str or None
If `new_tempdir` was not set then return the existing
temporary files' directory, otherwise return `None`.
**Examples**
>>> cf.TEMPDIR()
'/tmp'
>>> cf.TEMPDIR('/home/me/tmp')
>>> cf.TEMPDIR()
'/home/me/tmp'
'''
if not new_tempdir:
return CONSTANTS['TEMPDIR']
# Create the directory if it does not exist.
try:
mkdir(new_tempdir[0])
except OSError:
pass
CONSTANTS['TEMPDIR'] = new_tempdir[0]
#--- End: def
def CHUNKSIZE(*new_chunksize):
'''
Return or set the chunk size for data storage and processing.
When setting the chunk size, the amount of minimum amount of memory to
be kept free as a temporary work space is also updated.
:Parameters:
new_chunksize : int, optional
The new chunk size in bytes.
:Returns:
out : int or None
If new_chunksize was not set then return the existing chunk
size in bytes, otherwise return `None`.
**Examples**
>>> cf.CHUNKSIZE()
104857600
>>> cf.CHUNKSIZE(2**30)
>>> cf.CHUNKSIZE()
1073741824
'''
if not new_chunksize:
return CONSTANTS['CHUNKSIZE']
CONSTANTS['CHUNKSIZE'] = new_chunksize[0]
FM_THRESHOLD(CONSTANTS['MINNCFM'])
#--- End: def
def OF_FRACTION(*of_fraction):
'''
Return or set the number of concurrently open files above which files
containing data arrays may be automatically closed.
Expressed as a fraction of the maximum possible number of concurrently
open files.
Note that closed files will be automatically reopened if subsequently
needed by a variable to access its data array.
:Parameters:
of_fraction : float, optional
The new fraction (between 0.0 and 1.0).
:Returns:
out : float or None
If `of_fraction` was not set return the existing value,
otherwise return `None`.
**Examples**
>>> cf.OF_FRACTION()
0.5
>>> cf.OF_FRACTION(0.75)
>>> cf.OF_FRACTION()
0.75
The fraction may be translated to an actual number of files as
follows:
>>> import resource
>>> max_open_files = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
>>> threshold = int(cf.OF_FRACTION() * max_open_files)
>>> max_open_files, threshold
(1024, 768)
'''
if of_fraction:
CONSTANTS['OF_FRACTION'] = of_fraction[0]
else:
return CONSTANTS['OF_FRACTION']
#--- End: def
def dump(x, **kwargs):
'''
Print a description of an object to stdout.
If the object has a `dump` method then this is used to create the
output. In this case the arguments are passed to the `dump`
method. Otherwise the arguments are ignored and ``str(x)`` is printed.
:Parameters:
x : object, optional
The object to print.
kwargs :
:Returns:
None
**Examples**
>>> cf.dump(x)
>>> cf.dump(f, id='field2')
'''
if hasattr(x, 'dump') and callable(x.dump):
print x.dump(**kwargs)
else:
print x
#--- End: def
def iterindices(location):
'''
Return an iterator over indices ...
:Parameters:
location: sequence
:Returns:
out : generator
An iterator over the element's indices
**Examples**
>>> for index in iterindices(([1,3], [0, 1], [3, 6])):
... print index
...
(1, 0, 3)
(1, 0, 4)
(1, 0, 5)
(2, 0, 3)
(2, 0, 4)
(2, 0, 5)
>>> for index in iterindices([]):
... print index
...
()
>>> for index in iterindices([(0, n) for n in [2, 2]])
... print index
...
(0, 0)
(0, 1)
(1, 0)
(1, 1)
'''
if hasattr(location, 'shape'):
indices = [xrange(*r) for r in [(0, n) for n in location.shape]]
else:
indices = [xrange(*r) for r in location]
for index in itertools_product(*indices):
yield index
#--- End: def
_fd_dir = '/proc/'+str(getpid())+'/fd' # LINUX DEPENDENCY
_max_number_of_open_files = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
def open_files_threshold_exceeded():
'''
Return True if the number of open files is greater than the current
threshold.
The threshold is determined by as a fraction of the maximum possible
number of concurrently open files (an operating system dependent
amount). The fraction is retrieved and set with the `cf.OF_FRACTION`
function.
:Returns:
out : bool
Whether or not the number of open files exceeds the threshold.
**Examples**
In this example, the number of open files is equivalent to 75% of the
maximum possible number of concurrently open files:
>>> cf.OF_FRACTION()
0.5
>>> print cf.open_files_threshold_exceeded()
True
>>> cf.OF_FRACTION(0.9)
>>> print cf.open_files_threshold_exceeded()
False
'''
# LINUX DEPENDENCY (_fd_dir is OS dependent)
return len(listdir(_fd_dir)) > _max_number_of_open_files * OF_FRACTION()
#---End: def
def close_files(file_format=None):
'''
Close open files containing data arrays.
By default all files are closed, but this may be restricted to files
of a particular format with the `file_format` parameter.
Note that closed files will be automatically reopened if subsequently
needed by a variable to access its data array.
If there are no appropiate open files then no action is taken.
:Parameters:
file_format : str, optional
Only close files of the given format. Recognised formats are
'netCDF' and 'PP'. By default files of any format are closed.
:Returns:
None
**Examples**
>>> cf.close_files()
>>> cf.close_files('netCDF')
>>> cf.close_files('PP')
'''
if file_format is not None:
if file_format in _file_to_fh:
for fh in _file_to_fh[file_format].itervalues():
fh.close()
_file_to_fh[file_format].clear()
else:
for file_format, value in _file_to_fh.iteritems():
for fh in value.itervalues():
fh.close()
_file_to_fh[file_format].clear()
#---End: def
def close_one_file(file_format=None):
'''
Close an arbitrary open file containing data arrays.
By default a file of arbitrary format is closed, but the choice may be
restricted to files of a particular format with the `file_format`
parameter.
Note that the closed file will be automatically reopened if
subsequently needed by a variable to access its data array.
If there are no appropiate open files then no action is taken.
:Parameters:
file_format : str, optional
Only close a file of the given format. Recognised formats are
'netCDF' and 'PP'. By default a file of any format is closed.
:Returns:
None
**Examples**
>>> cf.close_one_file()
>>> cf.close_one_file('netCDF')
>>> cf.close_one_file('PP')
>>> cf.open_files()
{'netCDF': {'file1.nc': <netCDF4.Dataset at 0x181bcd0>,
'file2.nc': <netCDF4.Dataset at 0x1e42350>,
'file3.nc': <netCDF4.Dataset at 0x1d185e9>}}
>>> cf.close_one_file()
>>> cf.open_files()
{'netCDF': {'file1.nc': <netCDF4.Dataset at 0x181bcd0>,
'file3.nc': <netCDF4.Dataset at 0x1d185e9>}}
'''
if file_format is not None:
if file_format in _file_to_fh and _file_to_fh[file_format]:
filename, fh = next(_file_to_fh[file_format].iteritems())
fh.close()
del _file_to_fh[file_format][filename]
else:
for values in _file_to_fh.itervalues():
if not values:
continue
filename, fh = next(values.iteritems())
fh.close()
del values[filename]
return
#---End: def
def open_files(file_format=None):
'''
Return the open files containing data arrays.
By default all such files are returned, but the selection may be
restricted to files of a particular format with the `file_format`
parameter.
:Parameters:
file_format : str, optional
Only return files of the given format. Recognised formats are
'netCDF' and 'PP'. By default all files are returned.
:Returns:
out : dict
A dictionary of file names and their open file objects.
**Examples**
>>> cf.open_files()
{'netCDF': {'file1.nc': <netCDF4.Dataset at 0x187b6d0>}}
>>> cf.open_files('netCDF')
{'file1.nc': <netCDF4.Dataset at 0x187b6d0>}
>>> cf.open_files('PP')
{}
'''
if file_format is not None:
if file_format in _file_to_fh:
return _file_to_fh[file_format].copy()
else:
return {}
else:
out = {}
for file_format, values in _file_to_fh.iteritems():
out[file_format] = values.copy()
return out
#---End: def
def ufunc(func_name, x, *args, **kwargs):
x = x.copy()
getattr(x, func_name)(*args, **kwargs)
return x
def cos(x):
'''
'''
return ufunc('cos', x)
#--- End: if
def sin(x):
'''
Take the trigonometric sine of the data.
Units are accounted for in the calculation. For example, the the sine
of 90 degrees_east is 1.0, as is the sine of 1.57079632 radians. If
the units are not equivalent to radians (such as Kelvin) then they are
treated as if they were radians.
The Units are changed to '1' (nondimensionsal).
:Parameters:
x :
:Returns:
out :
'''
return ufunc('sin', x)
#--- End: if
def tan(x):
'''
'''
return ufunc('tan', x)
#--- End: if
def clip(x, a_min, a_max, units=None):
'''
Clip (limit) the values in the data array(s) of the input variable.
Given an interval, values outside the interval are clipped to the
interval edges.
Parameters :
a_min : scalar
a_max : scalar
units : str or Units
:Returns:
out :
**Examples**
'''
return ufunc('clip', x, a_min, a_max, units=units)
#--- End: if
def _allclose(a, b, rtol=None, atol=None):
'''
True if two arrays have the same shape and elements to within
numerical tolerance, False otherwise.
The tolerance values are positive, typically very small numbers. The
relative difference (`rtol` * abs(`b`)) and the absolute difference
`atol` are added together to compare against the absolute difference
between `a` and `b`.
:Parameters:
a, b : array_like
Input arrays to compare.
atol : float, optional
The absolute tolerance for all numerical comparisons, By
default the value returned by the `ATOL` function is used.
rtol : float, optional
The relative tolerance for all numerical comparisons, By
default the value returned by the `RTOL` function is used.
:Returns:
out : bool
Returns True if the arrays are equal.
**Examples**
>>> cf._allclose([1, 2], [1, 2])
True
>>> cf._allclose(np.array([1, 2]), np.array([1, 2]))
True
>>> cf._allclose([1, 2], [1, 2, 3])
False
>>> cf._allclose([1, 2], [1, 4])
False
'''
try:
return numpy.ma.allclose(a, b, rtol=rtol, atol=atol)
except (IndexError, NotImplementedError):
return numpy.ma.all(a == b)
#--- End: def
def parse_indices(data, indices):
'''
'''
parsed_indices = []
if not isinstance(indices, tuple):
indices = (indices,)
# Initialize the list of parsed indices as the input indices with any
# Ellipsis objects expanded
length, dims = len(indices), len(data.shape)
for index in indices:
if index is Ellipsis:
parsed_indices.extend([slice(None)] * (dims-length+1))
length = len(parsed_indices)
else:
parsed_indices.append(index)
#--- End: for
ndim = data.ndim
if ndim and len(parsed_indices) > ndim:
raise IndexError("Invalid indices %s for array with shape %s" %
(parsed_indices, data.shape))
if len(parsed_indices) < ndim:
parsed_indices.extend([slice(None)]*(ndim-len(parsed_indices)))
if not ndim and parsed_indices:
# If data is scalar then allow it to be indexed with an
# equivalent to [0]
if (len(parsed_indices) == 1 and
parsed_indices[0] in (0, -1, slice(0,1),
slice(-1,None,-1), slice(None, None, None))):
parsed_indices = []
else:
raise IndexError(
"Scalar array can only be indexed with (), Ellipsis or an equivalent to 0")
#--- End: if
for i, (index, size) in enumerate(zip(parsed_indices, data.shape)):
if isinstance(index, slice):
start, stop, step = index.indices(size)
if (start == stop or
(start < stop and step < 0) or
(start > stop and step > 0)):
raise IndexError("Invalid indices %s for array with shape %s" %
(parsed_indices, data.shape))
if step < 0 and stop < 0:
stop = None
index = slice(start, stop, step)
elif isinstance(index, (int, long)):
if index < 0:
index += size
index = slice(index, index+1, 1)
else:
if getattr(getattr(index, 'dtype', None), 'kind', None) == 'b':
# Convert booleans to +ve integers
index = list(numpy.where(index)[0])
else:
# Convert negative integers to non-negative integers
index = [(x+size if x<0 else x) for x in index]
if len(index) == 1:
# Convert a single element list to a slice object
index = index[0]
index = slice(index, index+1, 1)
else:
# Try to find a slice object equivalent to the list
step = index[1] - index[0]
if step > 0:
start, stop = index[0], index[-1]+1
elif step < 0:
start, stop = index[0], index[-1]-1
if index == range(start, stop, step):
# Replace the list with a slice object
if stop < 0:
stop = None
index = slice(start, stop, step)
#--- End: if
#--- End: if
parsed_indices[i] = index
#--- End: for
return parsed_indices
#--- End: def
def subspace_array(array, indices):
'''
Subset the input numpy array with the given indices. Indexing is similar to
that of a numpy array. The differences to numpy array indexing are:
1. An integer index i takes the i-th element but does not reduce the rank of
the output array by one.
2. When more than one dimension's slice is a 1-d boolean array or 1-d sequence
of integers then these indices work independently along each dimension
(similar to the way vector subscripts work in Fortran).
indices must contain an index for each dimension of the input array.
'''
gg = []
for i, x in enumerate(indices):
if not isinstance(x, slice):
gg.append(i)
#--- End: for
len_gg = len(gg)
if len_gg > 1:
# Slice the dimensions one at a time
indices = list(indices)
for axis in gg:
array = numpy.ma.take(array, indices[axis], axis=axis)
indices[axis] = slice(None)
if len_gg < len(indices):
array = array[tuple(indices)]
else:
# Slice all dimensions at the same time
array = array[tuple(indices)]
return array
#--- End: def
def ATOL(*atol):
'''
Return or set the value of absolute tolerance for testing numerically
tolerant equality.
:Parameters:
atol : int, optional
The new value of absolute tolerance.
:Returns:
out : float or None
If `atol` was not set return the existing value of absolute
tolerance, otherwise return `None`.
**Examples**
>>> cf.ATOL()
1e-08
>>> cf.ATOL(1e-10)
>>> cf.ATOL()
1e-10
'''
if atol:
CONSTANTS['ATOL'] = atol[0]
else:
return CONSTANTS['ATOL']
#--- End: def
def RTOL(*rtol):
'''
Return or set the default value of relative tolerance for testing
numerically tolerant equality.
:Parameters:
rtol : int, optional
The new value of relative tolerance.
:Returns:
out : float or None
If `rtol` was not set return the existing value of relative
tolerance, otherwise return `None`.
**Examples**
>>> cf.RTOL()
1.0000000000000001e-05
>>> cf.RTOL(1e-10)
>>> cf.RTOL()
1e-10
'''
if rtol:
CONSTANTS['RTOL'] = rtol[0]
else:
return CONSTANTS['RTOL']
#--- End: def
def equals(x, y, rtol=None, atol=None, traceback=False):
'''
True if two objects are logically equal, False otherwise.
If the first argument, `x`, has an `equals` method then it is used,
and in this case ``equals(x, y)`` is equivalent to ``x.equals(y)``.
:Parameters:
x, y :
The objects to compare for equality.
atol : float, optional
The absolute tolerance for all numerical comparisons, By
default the value returned by the `ATOL` function is used.
rtol : float, optional
The relative tolerance for all numerical comparisons, By
default the value returned by the `RTOL` function is used.
traceback : bool, optional
If True then print a traceback highlighting where the two
objects differ.
:Returns:
out : bool
Whether or not the two objects are equal.
**Examples**
>>> x
<CF Field: rain(10,20)>
>>> cf.equals(x,x)
True
>>> cf.equals(1.0, 1.0)
True
>>> cf.equals(1.0, 33)
False
>>> cf.equals('a', 'a')
True
>>> cf.equals('a', 'b')
False
>>> type(x), x.dtype
(<type 'numpy.ndarray'>, dtype('int64'))
>>> y=x.copy()
>>> cf.equals(x, y)
True
>>> cf.equals(x, x+1)
False
>>> class A(object): pass
>>> a=A()
>>> b=A()
>>> cf.equals(a, a)
True
>>> cf.equals(a, b)
False
'''
if rtol is None:
rtol = RTOL()
if atol is None:
atol = ATOL()
if hasattr(x, 'equals') and callable(x.equals):
# x has a callable equals method
return x.equals(y, rtol=rtol, atol=atol, traceback=traceback)
else:
if hasattr(x, '__iter__') or hasattr(y, '__iter__'):
# x or y is a sequence
if not isinstance(x, numpy.ndarray):
x = numpy.ma.asarray(x)
if not isinstance(y, numpy.ndarray):
y = numpy.ma.asarray(y)
if x.shape != y.shape:
return False
return _allclose(x, y, rtol=rtol, atol=atol)
else:
try:
# x and y are both numbers
return abs(x-y) <= atol + rtol*abs(y)
except TypeError:
# At least one of x and y is not a number
return x == y
#--- End: if
#--- End: if
#--- End: def
def flat(x):
'''
An iterator over an arbitrarily nested sequence.
:Parameters:
x : arbitrarily nested sequence or scalar
The arbitrarily nested sequence to be flattened. Note that a
Field instance behaves like a single element field list, and
so is a valid argument. If `x` is a scalar then this is
equivalent to passing a single element sequence containing the
scalar.
:Returns:
out : generator
An iterator over flattened sequence.
**Examples**
>>> for a in cf.flat([1, [2, [3, 4]]]):
... print a,
1 2 3 4
>>> for a in cf.flat(['a', ['bc', ['def', 'ghij']]]):
... print a,
a bc def ghij
>>> for a in cf.flat(2004):
... print a,
2004
>>> for a in cf.flat('abcdefghij'):
... print a,
abcdefghij
>>> f
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>
>>> for a in cf.flat(f):
... print a
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>
>>> for a in cf.flat([f, [f, [f, f]]]):
... print a
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>
>>> fl = cf.FieldList(cf.flat([f, [f, [f, f]]])
>>> fl
[<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>,
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>,
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>,
<CF Field: eastward_wind(air_pressure(5), latitude(110), longitude(106))>]
'''
if isinstance(x, basestring) or not isinstance(x, Iterable):
x = (x,)
for a in x:
if not isinstance(a, basestring) and isinstance(a, Iterable):
for sub in flat(a):
yield sub
else:
yield a
#--- End: def
def pickle(x, filename, overwrite=False):
'''
Write a binary pickled representation of an object to a file.
Note that Field and FieldList objects are picklable and their pickle
file size will be very small if their data arrays contain file
pointers as opposed to numpy arrays.
:Parameters:
x :
The object to be pickled.
filename : str
The name of the file in which to write the pickled
representation of `x`.
overwrite: bool, optional
If True a pre-existing output file is over written. By default
an exception is raised if the output file pre-exists.
:Returns:
None
:Raises:
IOError :
If `overwrite` is False and the output file pre-exists.
PickleError :
If the object is not picklable.
**Examples**
For any picklable object, x:
>>> cf.pickle(x, 'file.cfpkl')
>>> y = cf.unpickle('file.cfpkl')
>>> cf.equals(x, y)
True
'''
if not overwrite and isfile(filename):
raise IOError(
"Can't pickle to an existing file unless overwrite=True")
fh = open(filename, 'wb')
try:
cPickle.dump(x, fh, 2)
except:
fh.close()
raise cPickle.PickleError("Failed whilst pickling %s" % repr(x))
fh.close()
#--- End: def
[docs]def unpickle(filename):
'''
Return the reconstituted (unpickled) object from a binary pickle file.
Any binary pickle file may be used as input.
:Parameters:
filename : str
The name of the file containing the pickled object.
:Returns:
out :
The reconstituted object.
:Raises:
UnpicklingError :
If the file can not be unpickled. In particular, this might be
raised when attempting to unpickle fields which were pickled
with a different, incompatible version of cf.
**Examples**
For any picklable object, x:
>>> cf.pickle(x, 'file.cfpkl')
>>> y = cf.unpickle('file.cfpkl')
>>> cf.equals(x, y)
True
'''
fh = open(filename, 'rb')
try:
x = cPickle.load(fh)
except:
# Failed unpickling can throw up any type of error, so trap
# them all, but raise an informative UnpicklingError.
fh.close()
raise cPickle.UnpicklingError(
"Failed whilst unpickling file '%s'" % filename)
fh.close()
return x
#--- End: def
_d = {'char': numpy.dtype('S1')}
def string_to_numpy_data_type(string):
'''
'''
try:
return numpy.dtype(string)
except TypeError:
try:
return _d[string]
except KeyError:
raise TypeError("asdasd kkasdhahsjj734654376")
#--- End: def
def abspath(filename):
'''
'''
if filename.startswith('http://'):
return filename
return os_path_abspath(filename)
#--- End: def