########################################################################
#
#       License: BSD
#       Created: October 10, 2002
#       Author:  Francesc Altet - faltet@carabos.com
#
#       $Source: /home/ivan/_/programari/pytables/svn/cvs/pytables/pytables/tables/Array.py,v $
#       $Id: Array.py 1181 2005-09-12 17:49:13Z faltet $
#
########################################################################

"""Here is defined the Array class.

See Array class docstring for more info.

Classes:

    Array
    ImageArray

Functions:


Misc variables:

    __version__


"""

import types, warnings, sys

import numarray
import numarray.strings as strings

try:
    import Numeric
    Numeric_imported = True
except ImportError:
    Numeric_imported = False

import tables.hdf5Extension as hdf5Extension
from tables.utils import calcBufferSize, processRange, processRangeRead, \
                         convToFlavor, CHUNKTIMES
from tables.Leaf import Leaf, Filters


__version__ = "$Revision: 1181 $"


# default version for ARRAY objects
#obversion = "1.0"    # initial version
#obversion = "2.0"    # Added an optional EXTDIM attribute
#obversion = "2.1"    # Added support for complex datatypes
#obversion = "2.2"    # This adds support for time datatypes.
obversion = "2.3"    # This adds support for enumerated datatypes.



class Array(hdf5Extension.Array, Leaf):
    """Represent an homogeneous dataset in HDF5 file.

    It enables to create new datasets on-disk from Numeric, numarray,
    lists, tuples, strings or scalars, or open existing ones.

    All Numeric and numarray typecodes are supported except for complex
    datatypes.

    Methods:

        read(start, stop, step)
        iterrows(start, stop, step)

    Instance variables:

        type -- The type class for the array.
        itemsize -- The size of the atomic items. Specially useful for
            CharArray objects.
        flavor -- The object type of this object ("NumArray", "CharArray",
            "Numeric", "List", "Tuple", "String", "Int" or "Float").
        nrows -- The value of the first dimension of Array.
        nrow -- On iterators, this is the index of the current row.

    """

    # Class identifier.
    _c_classId = 'ARRAY'


    # <undo-redo support>
    _c_canUndoCreate = True  # Can creation/copying be undone and redone?
    _c_canUndoRemove = True  # Can removal be undone and redone?
    _c_canUndoMove   = True  # Can movement/renaming be undone and redone?
    # </undo-redo support>


    def _g_calcBufferSize(self, expectedfsizeinKb):
        """Compute and optimum buffer size.

        The logic to do that is based purely in experiments playing
        with different buffer sizes, chunksize and compression
        flag. It is obvious that using big buffers optimize the I/O
        speed. This might (should) be further optimized doing more
        experiments.

        This only is important for CArray and EArray classes, and it
        is keeped here just because it's an easy way to share it.

        """

        # Increasing the bufmultfactor would enable a good compression
        # ratio (up to an extend), but it would affect to reading
        # performance. Be careful when touching this
        # F. Altet 2004-11-10
        #bufmultfactor = int(1000 * 5) # Conservative value
        bufmultfactor = int(1000 * 10) # Medium value
        #bufmultfactor = int(1000 * 20)  # Agressive value
        #bufmultfactor = int(1000 * 50) # Very Aggresive value

        if expectedfsizeinKb <= 100:
            # Values for files less than 100 KB of size
            buffersize = 5 * bufmultfactor
        elif (expectedfsizeinKb > 100 and
            expectedfsizeinKb <= 1000):
            # Values for files less than 1 MB of size
            buffersize = 10 * bufmultfactor
        elif (expectedfsizeinKb > 1000 and
              expectedfsizeinKb <= 20 * 1000):
            # Values for sizes between 1 MB and 20 MB
            buffersize = 20  * bufmultfactor
        elif (expectedfsizeinKb > 20 * 1000 and
              expectedfsizeinKb <= 200 * 1000):
            # Values for sizes between 20 MB and 200 MB
            buffersize = 40 * bufmultfactor
        elif (expectedfsizeinKb > 200 * 1000 and
              expectedfsizeinKb <= 2000 * 1000):
            # Values for sizes between 200 MB and 2 GB
            buffersize = 50 * bufmultfactor
        else:  # Greater than 2 GB
            buffersize = 60 * bufmultfactor

        return buffersize

    def __init__(self, object = None, title = ""):
        """Create the instance Array.

        Keyword arguments:

        object -- The (regular) object to be saved. It can be any of
            NumArray, CharArray, Numeric, List, Tuple, String, Int of
            Float types, provided that they are regular (i.e. they are
            not like [[1,2],2]).

        title -- Sets a TITLE attribute on the HDF5 array entity.

        """
        self._v_new_title = title
        # Assign some filter values by default, i.e. no filters for Array
        self._v_new_filters = Filters()
        self.extdim = -1   # An Array object is not enlargeable
        # Check if we have to create a new object or read their contents
        # from disk
        if object is not None:
            self._v_new = 1
            self.object = object
        else:
            self._v_new = 0

    def _create(self):
        """Save a fresh array (i.e., not present on HDF5 file)."""
        global obversion

        self._v_version = obversion
        try:
            naarr, self.flavor = self._convertIntoNA(self.object)
        except:  #XXX
            # Problems converting data. Close the node and re-raise exception.
            #print "Problems converting input object:", str(self.object)
            self.close(flush=0)
            raise

        if naarr.shape:
            self._v_expectedrows = naarr.shape[0]
        else:
            self._v_expectedrows = 1  # Scalar case
        if (isinstance(naarr, strings.CharArray)):
            self.byteorder = "non-relevant"
        else:
            self.byteorder  = naarr._byteorder

        # Compute some values for buffering and I/O parameters
        # Compute the rowsize for each element
        self.rowsize = naarr.itemsize()
        for i in naarr.shape:
            if i>0:
                self.rowsize *= i
            else:
                raise ValueError, "An Array object cannot have zero-dimensions."

        # Compute the optimal chunksize
        (self._v_maxTuples, self._v_chunksize) = \
                            calcBufferSize(self.rowsize, self._v_expectedrows)

        self.shape = naarr.shape
        if naarr.shape:
            self.nrows = naarr.shape[0]
        else:
            self.nrows = 1    # Scalar case
        self.itemsize = naarr.itemsize()
        try:
            self.type, self.stype = self._createArray(naarr, self._v_new_title)
        except:  #XXX
            # Problems creating the Array on disk. Close node and re-raise.
            self.close(flush=0)
            raise

    def _convertIntoNA(self, object):
        "Convert a generic object into a numarray object"
        arr = object
        if isinstance(arr, numarray.NumArray):
            flavor = "NumArray"
            naarr = arr
            self.byteorder  = naarr._byteorder
        elif isinstance(arr, strings.CharArray):
            flavor = "CharArray"
            naarr = arr
            self.byteorder = "non-relevant"
        elif (Numeric_imported and type(arr) == type(Numeric.array(1))):
            flavor = "Numeric"
            if arr.typecode() == "c":
                # To emulate as close as possible Numeric character arrays,
                # itemsize for chararrays will be always 1
                if arr.iscontiguous():
                    # This the fastest way to convert from Numeric to numarray
                    # because no data copy is involved
                    naarr = strings.array(buffer(arr),
                                          itemsize=1,
                                          shape=arr.shape)
                else:
                    # Here we absolutely need a copy so as to obtain a buffer.
                    # Perhaps this can be avoided or optimized by using
                    # the tolist() method, but this should be tested.
                    naarr = strings.array(buffer(arr.copy()),
                                          itemsize=1,
                                          shape=arr.shape)
            else:
                if arr.iscontiguous():
                    # This the fastest way to convert from Numeric to numarray
                    # because no data copy is involved
                    naarr = numarray.array(buffer(arr),
                                           type=arr.typecode(),
                                           shape=arr.shape)
                else:
                    # Here we absolutely need a copy in order
                    # to obtain a buffer.
                    # Perhaps this can be avoided or optimized by using
                    # the tolist() method, but this should be tested.
                    naarr = numarray.array(buffer(arr.copy()),
                                           type=arr.typecode(),
                                           shape=arr.shape)

        elif (isinstance(arr, tuple) or
              isinstance(arr, list)):
            # Test if can convert to numarray object
            try:
                naarr = numarray.array(arr)
            # If not, test with a chararray
            except TypeError:
                try:
                    naarr = strings.array(arr)
                # If still doesn't, issues an error
                except:  #XXX
                    raise TypeError, \
"""The object '%s' can't be converted into a numerical or character array.
Sorry, but this object is not supported.""" % (arr)
            if isinstance(arr, tuple):
                flavor = "Tuple"
            else:
                flavor = "List"
        elif isinstance(arr, int):
            naarr = numarray.array(arr)
            flavor = "Int"
        elif isinstance(arr, float):
            naarr = numarray.array(arr)
            flavor = "Float"
        elif isinstance(arr, str):
            naarr = strings.array(arr)
            flavor = "String"
        else:
            raise TypeError, \
"""The object '%s' is not in the list of supported objects (NumArray, CharArray, Numeric, homogeneous list or homogeneous tuple, int, float or str). Sorry, but this object is not supported.""" % (arr)

        # We always want a contiguous buffer
        # (no matter if has an offset or not; that will be corrected later)
        if (not naarr.iscontiguous()):
            # Do a copy of the array in case is not contiguous
            naarr = numarray.NDArray.copy(naarr)

        return naarr, flavor

    def _open(self):
        """Get the metadata info for an array in file."""

        (self.type, self.stype, self.shape, self.itemsize, self.byteorder,
         self._v_chunksize) = self._openArray()
        # Compute the maxTuples
        self._v_maxTuples = self._v_chunksize * CHUNKTIMES

        # Get enumeration from disk.
        if self.stype == 'Enum':
            (self._enum, self.type) = self._loadEnum()
        else:
            self._enum = None

        # Compute the rowsize for each element
        self.rowsize = self.itemsize
        for i in xrange(len(self.shape)):
            self.rowsize *= self.shape[i]
        # Assign a value to nrows in case we are a non-enlargeable object
        if self.shape:
            self.nrows = self.shape[0]
        else:
            self.nrows = 1L   # Scalar case

    def getEnum(self):
        """
        Get the enumerated type associated with this array.

        If this array is of an enumerated type, the corresponding `Enum`
        instance is returned.  If it is not of an enumerated type, a
        ``TypeError`` is raised.
        """

        if self.stype != 'Enum':
            raise TypeError("array ``%s`` is not of an enumerated type"
                            % self._v_pathname)

        return self._enum


    def iterrows(self, start=None, stop=None, step=None):
        """Iterate over all the rows or a range.

        """

        try:
            (self._start, self._stop, self._step) = \
                          processRangeRead(self.nrows, start, stop, step)
        except IndexError:
            # If problems with indexes, silently return the null tuple
            return ()
        self._initLoop()
        return self

    def __iter__(self):
        """Iterate over all the rows."""

        if not hasattr(self, "_init"):
            # If the iterator is called directly, assign default variables
            self._start = 0
            self._stop = self.nrows
            self._step = 1
            # and initialize the loop
            self._initLoop()
        return self

    def _initLoop(self):
        "Initialization for the __iter__ iterator"

        self._nrowsread = self._start
        self._startb = self._start
        self._row = -1   # Sentinel
        self._init = 1    # Sentinel
        self.nrow = self._start - self._step    # row number

    def next(self):
        "next() method for __iter__() that is called on each iteration"
        if self._nrowsread >= self._stop:
            del self._init
            raise StopIteration        # end of iteration
        else:
            # Read a chunk of rows
            if self._row+1 >= self._v_maxTuples or self._row < 0:
                self._stopb = self._startb+self._step*self._v_maxTuples
                # Protection for reading more elements than needed
                if self._stopb > self._stop:
                    self._stopb = self._stop
                self.listarr = self.read(self._startb, self._stopb, self._step)
                # Swap the axes to easy the return of elements
                if self.extdim > 0:
                    if self.flavor == "Numeric":
                        if Numeric_imported:
                            self.listarr = Numeric.swapaxes(self.listarr,
                                                            self.extdim, 0)
                        else:
                            # Warn the user
                            warnings.warn( \
"""The object on-disk has Numeric flavor, but Numeric is not installed locally. Returning a numarray object instead!.""")
                            # Default to numarray
                            self.listarr = swapaxes(self.listarr,
                                                    self.extdim, 0)
                    else:
                        self.listarr = numarray.swapaxes(self.listarr,
			                                 self.extdim, 0)
                self._row = -1
                self._startb = self._stopb
            self._row += 1
            self.nrow += self._step
            self._nrowsread += self._step
            # Fixes bug #968132
            #if self.listarr.shape:
            if self.shape:
                return self.listarr[self._row]
            else:
                return self.listarr    # Scalar case

    def _interpret_indexing(self, keys):
        """Internal routine used by __getitem__ and __setitem__"""

        maxlen = len(self.shape)
        shape = (maxlen,)
        startl = numarray.array(None, shape=shape, type=numarray.Int64)
        stopl = numarray.array(None, shape=shape, type=numarray.Int64)
        stepl = numarray.array(None, shape=shape, type=numarray.Int64)
        stop_None = numarray.zeros(shape=shape, type=numarray.Int64)
        if not isinstance(keys, tuple):
            keys = (keys,)
        nkeys = len(keys)
        dim = 0
        # Here is some problem when dealing with [...,...] params
        # but this is a bit weird way to pass parameters anyway
        for key in keys:
            ellipsis = 0  # Sentinel
            if isinstance(key, types.EllipsisType):
                ellipsis = 1
                for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1):
                    startl[dim] = 0
                    stopl[dim] = self.shape[diml]
                    stepl[dim] = 1
                    dim += 1
            elif dim >= maxlen:
                raise IndexError, "Too many indices for object '%s'" % \
                      self._v_pathname
            elif type(key) in (int, long):
                # Index out of range protection
                if key >= self.shape[dim]:
                    raise IndexError, "Index out of range"
                if key < 0:
                    # To support negative values (Fixes bug #968149)
                    key += self.shape[dim]
                start, stop, step = processRange(self.shape[dim],
                                                 key, key+1, 1)
                stop_None[dim] = 1
            elif isinstance(key, slice):
                start, stop, step = processRange(self.shape[dim],
                                                 key.start, key.stop, key.step)
            else:
                raise TypeError, "Non-valid index or slice: %s" % \
                      key
            if not ellipsis:
                startl[dim] = start
                stopl[dim] = stop
                stepl[dim] = step
                dim += 1

        # Complete the other dimensions, if needed
        if dim < len(self.shape):
            for diml in xrange(dim, len(self.shape)):
                startl[dim] = 0
                stopl[dim] = self.shape[diml]
                stepl[dim] = 1
                dim += 1

        # Compute the shape for the container properly. Fixes #1288792
        shape = []
        for dim in xrange(len(self.shape)):
            new_dim = ((stopl[dim] - startl[dim] - 1) / stepl[dim]) + 1
            if not (new_dim == 1 and stop_None[dim]):
                # Append dimension
                shape.append(new_dim)

        return startl, stopl, stepl, shape

    def __getitem__(self, keys):
        """Returns an Array element, row or extended slice.

        It takes different actions depending on the type of the "keys"
        parameter:

        If "keys" is an integer, the corresponding row is returned. If
        "keys" is a slice, the row slice determined by key is returned.

        """

        startl, stopl, stepl, shape = self._interpret_indexing(keys)

        return self._readSlice(startl, stopl, stepl, shape)


    def __setitem__(self, keys, value):
        """Sets an Array element, row or extended slice.

        It takes different actions depending on the type of the "key"
        parameter:

        If "key" is an integer, the corresponding row is assigned to
        value.

        If "key" is a slice, the row slice determined by it is
        assigned to "value". If needed, this "value" is broadcasted to
        fit in the desired range. If the slice to be updated exceeds
        the actual shape of the array, only the values in the existing
        range are updated, i.e. the index error will be silently
        ignored. If "value" is a multidimensional object, then its
        shape must be compatible with the slice specified in "key",
        otherwhise, a ValueError will be issued.

        """

        startl, stopl, stepl, shape = self._interpret_indexing(keys)
        countl = ((stopl - startl - 1) / stepl) + 1
        # Create an array compliant with the specified slice
        if str(self.type) == "CharType":
            narr = strings.array(None, itemsize=self.itemsize,
            # Here shape=shape should be enough, but it makes some
            # tests to fail. This should be analyzed more carefully.
            # F. Altet 2005-09-12
                                 shape=countl)
        else:
            narr = numarray.array(None, shape=shape, type=self.type)

        shape = numarray.array(shape, shape=len(shape), type=numarray.Int64)

        # Assign the value to it
        try:
            narr[...] = value
        except:  #XXX
            (typerr, value2, traceback) = sys.exc_info()
            raise ValueError, \
"""value parameter '%s' cannot be converted into an array object compliant with %s:
'%r'
The error was: <%s>""" % (value, self.__class__.__name__, self, value2)

        if narr.size():
            self._modify(startl, stepl, countl, narr)

    # Accessor for the _readArray method in superclass
    def _readSlice(self, startl, stopl, stepl, shape):

        if self.extdim < 0:
            extdim = 0
        else:
            extdim = self.extdim

        if repr(self.type) == "CharType":
            # Workaround for numarray bug #997997
            if shape <> []:
                arr = strings.array(None, itemsize=self.itemsize, shape=shape)
            else:
                arr = strings.array([""], itemsize=self.itemsize, shape=shape)
        else:
            #arr = numarray.zeros(type=self.type, shape=shape)
            # This is slightly faster (~3%) than zeros()
            arr = numarray.array(None,type=self.type, shape=shape)
            # Set the same byteorder than on-disk
            arr._byteorder = self.byteorder

        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._g_readSlice(startl, stopl, stepl, arr)

        if hasattr(self, "_v_convert") and self._v_convert == 0:
            return arr

        if self.flavor in ["NumArray", "CharArray"]:
            if arr.shape == ():  # Scalar case
                return arr[()]
            else:             # No conversion needed
                return arr
        # Fixes #968131
        elif arr.shape == ():  # Scalar case
            return arr[()]  # return the value.
        # The next solution isn't appropriate as a scalar array is
        # meant as a way to return a Python value
#         elif arr.shape == (1,):  # Scalar case
#             return arr[0]  # return the value.
        else:
            return convToFlavor(self, arr)

    # Accessor for the _readArray method in superclass
    def read(self, start=None, stop=None, step=None):
        """Read the array from disk and return it as a self.flavor object."""

        if self.extdim < 0:
            extdim = 0
        else:
            extdim = self.extdim

        (start, stop, step) = processRangeRead(self.nrows, start, stop, step)
        rowstoread = ((stop - start - 1) / step) + 1
        shape = list(self.shape)
        if shape:
            shape[extdim] = rowstoread
            shape = tuple(shape)
        if repr(self.type) == "CharType":
            #arr = strings.array(None, itemsize=self.itemsize, shape=shape)
            # a workaround for a bug intoduced in numarray 1.3.1
            if self.shape == ():
                arr = strings.array(' '*self.itemsize, 
		                    itemsize=self.itemsize, shape=shape)
            else:
                arr = strings.array(None, itemsize=self.itemsize, shape=shape)
        else:
            arr = numarray.array(None, type=self.type, shape=shape)
            # Set the same byteorder than on-disk
            arr._byteorder = self.byteorder

        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._readArray(start, stop, step, arr)

        if self.flavor in ["NumArray", "CharArray"]:
            # No conversion needed
            return arr
        # Fixes #968131
        elif arr.shape == ():  # Scalar case
            return arr[()]  # return the value. Yes, this is a weird syntax :(
        else:
            return convToFlavor(self, arr)

    def _g_copyWithStats(self, group, name, start, stop, step, title, filters):
        "Private part of Leaf.copy() for each kind of leaf"
        # Get the slice of the array
        # (non-buffered version)
	if self.shape:
            arr = self[start:stop:step]
	else:
	    arr = self[()]
        # Build the new Array object
        object = self._v_file.createArray(
            group, name, arr, title=title, _log = False)
        nbytes = self.itemsize
        for i in self.shape:
            nbytes*=i

        return (object, nbytes)

    def __repr__(self):
        """This provides more metainfo in addition to standard __str__"""

        return """%s
  type = %r
  stype = %r
  shape = %s
  itemsize = %s
  nrows = %s
  flavor = %r
  byteorder = %r""" % (self, self.type, self.stype, self.shape, self.itemsize,
                       self.nrows, self.flavor, self.byteorder)



class ImageArray(Array):

    """
    Array containing an image.

    This class has no additional behaviour or functionality compared
    to that of an ordinary array.  It simply enables the user to open
    an ``IMAGE`` HDF5 node as a normal `Array` node in PyTables.
    """

    # Class identifier.
    _c_classId = 'IMAGE'
