#  Ei!, emacs, this is -*-Python-*- mode
########################################################################
#
#       License: BSD
#       Created: June 17, 2005
#       Author:  Francesc Altet - faltet@carabos.com
#
#       $Id: utilsExtension.pyx 1009 2005-06-15 13:39:15Z faltet $
#
########################################################################

"""Here is where Table and Row extension types live.

Classes (type extensions):

    Table
    Row

Functions:



Misc variables:

    __version__
"""

import numarray

import tables.hdf5Extension
from tables.exceptions import HDF5ExtError
from tables.utilsExtension import createNestedType, \
     getNestedType, convertTime64, getTypeEnum, enumFromHDF5


from definitions cimport \
     import_libnumarray, NA_getPythonScalar, NA_getBufferPtrAndSize

__version__ = "$Revision: 1009 $"


#-----------------------------------------------------------------

# Define the CharType code as a constant
cdef enum:
  CHARTYPE = 97  # 97 == ord('a')

# Standard C functions.
cdef extern from "stdlib.h":
  ctypedef long size_t
  void *malloc(size_t size)
  void free(void *ptr)

cdef extern from "string.h":
  char *strcpy(char *dest, char *src)
  char *strncpy(char *dest, char *src, size_t n)
  int strcmp(char *s1, char *s2)
  char *strdup(char *s)
  void *memcpy(void *dest, void *src, size_t n)

# Python API functions.
cdef extern from "Python.h":
  # To release global interpreter lock (GIL) for threading
  void Py_BEGIN_ALLOW_THREADS()
  void Py_END_ALLOW_THREADS()

  char *PyString_AsString(object string)

# HDF5 API.
cdef extern from "hdf5.h":
  # types
  ctypedef int hid_t
  ctypedef int herr_t
  ctypedef long long hsize_t

  cdef enum H5T_class_t:
    H5T_NO_CLASS         = -1,  #error                                      */
    H5T_INTEGER          = 0,   #integer types                              */
    H5T_FLOAT            = 1,   #floating-point types                       */
    H5T_TIME             = 2,   #date and time types                        */
    H5T_STRING           = 3,   #character string types                     */
    H5T_BITFIELD         = 4,   #bit field types                            */
    H5T_OPAQUE           = 5,   #opaque types                               */
    H5T_COMPOUND         = 6,   #compound types                             */
    H5T_REFERENCE        = 7,   #reference types                            */
    H5T_ENUM             = 8,   #enumeration types                          */
    H5T_VLEN             = 9,   #Variable-Length types                      */
    H5T_ARRAY            = 10,  #Array types                                */
    H5T_NCLASSES                #this must be last                          */

  # Native types
  cdef enum:
    H5T_NATIVE_CHAR
    H5T_NATIVE_SCHAR
    H5T_NATIVE_UCHAR
    H5T_NATIVE_SHORT
    H5T_NATIVE_USHORT
    H5T_NATIVE_INT
    H5T_NATIVE_UINT
    H5T_NATIVE_LONG
    H5T_NATIVE_ULONG
    H5T_NATIVE_LLONG
    H5T_NATIVE_ULLONG
    H5T_NATIVE_FLOAT
    H5T_NATIVE_DOUBLE
    H5T_NATIVE_LDOUBLE

  # Functions for dealing with datasets
  hid_t  H5Dopen (hid_t file_id, char *name)
  herr_t H5Dclose (hid_t dset_id)
  herr_t H5Dread (hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
                  hid_t file_space_id, hid_t plist_id, void *buf)
  hid_t H5Dget_type (hid_t dset_id)
  hid_t H5Dget_space (hid_t dset_id)

  # Functions for dealing with dataspaces
  int H5Sget_simple_extent_ndims(hid_t space_id)

  int H5Sget_simple_extent_dims(hid_t space_id, hsize_t dims[],
                                hsize_t maxdims[])

  herr_t H5Sclose(hid_t space_id)

  # Functions for dealing with datatypes
  size_t H5Tget_size(hid_t type_id)
  hid_t  H5Tcreate(H5T_class_t type, size_t size)
  hid_t  H5Tcopy(hid_t type_id)
  herr_t H5Tclose(hid_t type_id)
  herr_t H5Tget_sign(hid_t type_id)


cdef extern from "H5TB.h":

  herr_t H5TBmake_table( char *table_title, hid_t loc_id, char *dset_name,
                         char *version, char *class_,
                         hid_t mem_type_id, hsize_t nrecords,
                         hsize_t chunk_size, int compress,
                         char *complib, int shuffle, int fletcher32,
                         void *data )

  herr_t H5TBdelete_record( hid_t   loc_id,
                            char    *dset_name,
                            hid_t   mem_type_id,
                            hsize_t start,
                            hsize_t nrecords,
                            hsize_t maxtuples)


cdef extern from "H5TB-opt.h":

  herr_t H5TBOopen_read( hid_t *dataset_id,
                         hid_t *space_id,
                         hid_t loc_id,
                         char *dset_name)

  herr_t H5TBOread_records( hid_t *dataset_id, hid_t *space_id,
                            hid_t mem_type_id, hsize_t start,
                            hsize_t nrecords, void *data )

  herr_t H5TBOwrite_records ( hid_t loc_id,  char *dset_name,
                              hsize_t start, hsize_t nrecords,
                              hsize_t step, size_t type_size,
                              hid_t mem_type_id, void *data )

  herr_t H5TBOread_elements( hid_t *dataset_id,
                             hid_t *space_id,
                             hid_t mem_type_id,
                             hsize_t nrecords,
                             void *coords,
                             void *data )

  herr_t H5TBOclose_read( hid_t *dataset_id,
                          hid_t *space_id)

  herr_t H5TBOopen_append( hid_t *dataset_id,
                           hid_t loc_id,
                           char *dset_name )

  herr_t H5TBOappend_records( hid_t *dataset_id,
                              hid_t mem_type_id,
                              hsize_t nrecords,
                              hsize_t nrecords_orig,
                              void *data )

  herr_t H5TBOclose_append(hid_t *dataset_id,
                           hsize_t ntotal_records,
                           char *dset_name,
                           hid_t parent_id)

# Functions from HDF5 HL Lite
cdef extern from "H5LT.h":

  herr_t H5LTset_attribute_string( hid_t loc_id, char *obj_name,
                                   char *attr_name, char *attr_data )

  herr_t H5LT_set_attribute_numerical( hid_t loc_id,
                                       char *obj_name,
                                       char *attr_name,
                                       hid_t type_id,
                                       void *data )

#----------------------------------------------------------------------------

# Initialization code

# The numarray API requires this function to be called before
# using any numarray facilities in an extension module.
import_libnumarray()

#-------------------------------------------------------------

cdef class Table:
  # instance variables
  cdef void    *rbuf
  cdef hsize_t totalrecords
  cdef char    *name
  cdef int     _open
  cdef hid_t   parent_id, dataset_id, space_id, mem_type_id

  def _g_new(self, where, name):
    self.name = strdup(name)
    # The parent group id for this object
    self.parent_id = where._v_objectID
    self._open = 0
    self.mem_type_id = 0

  def _g_updateTypeId(self):
    "Refresh the complete compound datatype based on the table description"
    if self.mem_type_id:
      H5Tclose(self.mem_type_id)
    self.mem_type_id = createNestedType(self.description, self.byteorder)

  def _createTable(self, char *title, char *complib, char *obversion):
    cdef int     offset
    cdef int     ret
    cdef long    buflen
    cdef hid_t   oid
    cdef void    *data
    cdef hsize_t nrecords
    cdef char    *class_
    cdef object  i, fieldname, name

    # Compute the complete compound datatype based on the table description
    self.mem_type_id = createNestedType(self.description, self.byteorder)

    # test if there is data to be saved initially
    if hasattr(self, "_v_recarray"):
      self.totalrecords = self.nrows
      buflen = NA_getBufferPtrAndSize(self._v_recarray._data, 1, &data)
      # Correct the offset in the buffer
      offset = self._v_recarray._byteoffset
      data = <void *>(<char *>data + offset)
    else:
      self.totalrecords = 0
      data = NULL

    class_ = PyString_AsString(self._c_classId)
    oid = H5TBmake_table(title, self.parent_id, self.name,
                         obversion, class_,
                         self.mem_type_id, self.nrows, self._v_chunksize,
                         self.filters.complevel, complib,
                         self.filters.shuffle, self.filters.fletcher32,
                         data)
    if oid < 0:
      raise HDF5ExtError("Problems creating the table")
    self._v_objectID = oid

    # Set the conforming table attributes
    # Attach the CLASS attribute
    ret = H5LTset_attribute_string(self.parent_id, self.name,
                                   "CLASS", class_)
    if ret < 0:
      raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." %
                         ("CLASS", self.name))
    # Attach the VERSION attribute
    ret = H5LTset_attribute_string(self.parent_id, self.name,
                                   "VERSION", obversion)
    if ret < 0:
      raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." %
                         ("VERSION", self.name))
    # Attach the TITLE attribute
    ret = H5LTset_attribute_string(self.parent_id, self.name,
                                   "TITLE", title)
    if ret < 0:
      raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." %
                         ("TITLE", self.name))
    # Attach the NROWS attribute
    nrecords = self.nrows
    ret = H5LT_set_attribute_numerical(self.parent_id, self.name, "NROWS",
                                       H5T_NATIVE_LLONG, &nrecords )
    if ret < 0:
      raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." %
                         ("NROWS", self.name))

    # Attach the FIELD_N_NAME attributes
    # We write only the first level names
    i = 0
    for name in self.description._v_names:
      fieldname = "FIELD_%s_NAME" % i
      ret = H5LTset_attribute_string(self.parent_id, self.name,
                                     fieldname, name)
      i = i + 1
    if ret < 0:
      raise HDF5ExtError("Can't set attribute '%s' in table:\n %s." %
                         (fieldname, self.name))

  def _getInfo(self):
    "Get info from a table on disk."
    cdef hid_t   dataset_id, space_id, disk_type_id
    cdef size_t  type_size
    cdef hsize_t dims[1]  # enough for unidimensional tables

    # Open the dataset
    dataset_id = H5Dopen(self.parent_id, self.name)
    # Get the datatype
    disk_type_id = H5Dget_type(dataset_id)
    # Get the number of rows
    space_id = H5Dget_space(dataset_id)
    H5Sget_simple_extent_dims(space_id, dims, NULL)
    self.totalrecords = dims[0]
    # Assign the values to class variables
    self.nrows = self.totalrecords
    # Free resources
    H5Sclose(space_id)
    H5Dclose(dataset_id)

    # Get the type size
    type_size = H5Tget_size(disk_type_id)
    # Create the native data in-memory
    self.mem_type_id = H5Tcreate(H5T_COMPOUND, type_size)
    # Get the (nested) native type and description
    desc = getNestedType(disk_type_id, self.mem_type_id, self)
    if desc == {}:
      raise HDF5ExtError("Problems getting desciption for table %s", self.name)
    # Release resources
    H5Tclose(disk_type_id)

    # Return the buffer as a Python String
    return desc

  def _loadEnum(self, hid_t fieldTypeId):
    """_loadEnum(colname) -> (Enum, naType)
    Load enumerated type associated with `colname` column.

    This method loads the HDF5 enumerated type associated with
    `colname`.  It returns an `Enum` instance built from that, and the
    Numarray type used to encode it.
    """

    cdef hid_t enumId

    enumId = getTypeEnum(fieldTypeId)

    # Get the Enum and Numarray types and close the HDF5 type.
    try:
      return enumFromHDF5(enumId)
    finally:
      # (Yes, the ``finally`` clause *is* executed.)
      if H5Tclose(enumId) < 0:
        raise HDF5ExtError("failed to close HDF5 enumerated type")

  def _convertTypes(self, object recarr, hsize_t nrecords, int sense):
    """Converts Time64 columns in 'recarr' between Numarray and HDF5 formats.

    Numarray to HDF5 conversion is performed when 'sense' is 0.
    Otherwise, HDF5 to Numarray conversion is performed.
    The conversion is done in place, i.e. 'recarr' is modified.
    """

    # This should be generalised to support other type conversions.
    for t64cname in self._time64colnames:
      convertTime64(recarr.field(t64cname), nrecords, sense)

  def _open_append(self, object recarr):
    cdef long buflen
    cdef hsize_t nfields

    # Get the pointer to the buffer data area
    buflen = NA_getBufferPtrAndSize(recarr._data, 1, &self.rbuf)

    # Open the table for appending
    if ( H5TBOopen_append(&self.dataset_id, self.parent_id, self.name) < 0 ):
      raise HDF5ExtError("Problems opening table for append.")

    self._open = 1
    self._v_objectID = self.dataset_id

  def _append_records(self, object recarr, int nrecords):
    cdef int ret

    if not self._open:
      self._open_append(recarr)

    # Convert some Numarray types to HDF5 before storing.
    self._convertTypes(recarr, nrecords, 0)

    # release GIL (allow other threads to use the Python interpreter)
    Py_BEGIN_ALLOW_THREADS
    # Append the records:
    ret = H5TBOappend_records(&self.dataset_id, self.mem_type_id,
                              nrecords, self.totalrecords, self.rbuf)
    # acquire GIL (disallow other threads from using the Python interpreter)
    Py_END_ALLOW_THREADS
    if ret < 0:
      raise HDF5ExtError("Problems appending the records.")

    self.totalrecords = self.totalrecords + nrecords

  def _close_append(self):

    if self._open > 0:
      # Close the table for append
      if ( H5TBOclose_append(&self.dataset_id, self.totalrecords, self.name,
                             self.parent_id) < 0 ):
        raise HDF5ExtError("Problems closing table for append.")

    self._open = 0

  def _modify_records(self, hsize_t start, hsize_t stop,
                       hsize_t step, object recarr):
    cdef herr_t ret
    cdef void *rbuf
    cdef hsize_t nrecords, nrows
    cdef size_t rowsize

    # Get the pointer to the buffer data area
    buflen = NA_getBufferPtrAndSize(recarr._data, 1, &rbuf)

    # Compute the number of records to modify
    nrecords = len(recarr)
    nrows = ((stop - start - 1) / step) + 1
    if nrecords > nrows:
      nrecords = nrows

    # Convert some Numarray types to HDF5 before storing.
    self._convertTypes(recarr, nrecords, 0)
    # Modify the records:
    rowsize = self.rowsize
    Py_BEGIN_ALLOW_THREADS
    ret = H5TBOwrite_records(self.parent_id, self.name,
                             start, nrecords, step, rowsize,
                             self.mem_type_id, rbuf )
    Py_END_ALLOW_THREADS
    if ret < 0:
      raise HDF5ExtError("Problems modifying the records.")

  def _open_read(self, object recarr):
    cdef long buflen
    cdef object recarr2

    # Get the pointer to the buffer data area
    buflen = NA_getBufferPtrAndSize(recarr._data, 1, &self.rbuf)

    # Readout to the buffer
    if ( H5TBOopen_read(&self.dataset_id, &self.space_id,
                        self.parent_id, self.name) < 0 ):
      raise HDF5ExtError("Problems opening table for read.")

  def _read_records(self, object recarr, hsize_t start, hsize_t nrecords):
    cdef int ret

    # Correct the number of records to read, if needed
    if (start + nrecords) > self.totalrecords:
      nrecords = self.totalrecords - start

    # Read the records from disk
    Py_BEGIN_ALLOW_THREADS
    ret = H5TBOread_records(&self.dataset_id, &self.space_id,
                            self.mem_type_id, start,
                            nrecords, self.rbuf)
    Py_END_ALLOW_THREADS
    if ret < 0:
      raise HDF5ExtError("Problems reading records.")

    # Convert some HDF5 types to Numarray after reading.
    self._convertTypes(recarr, nrecords, 1)

    return nrecords

  def _read_elements(self, object recarr, object elements):
    cdef long buflen
    cdef hsize_t nrecords
    cdef void *coords
    cdef int ret, offset

    # Get the chunk of the coords that correspond to a buffer
    nrecords = len(elements)
    # Get the pointer to the buffer data area
    buflen = NA_getBufferPtrAndSize(elements._data, 1, &coords)
    # Correct the offset
    offset = elements._byteoffset
    coords = <void *>(<char *>coords + offset)

    Py_BEGIN_ALLOW_THREADS
    ret = H5TBOread_elements(&self.dataset_id, &self.space_id,
                             self.mem_type_id, nrecords,
                             coords, self.rbuf)
    Py_END_ALLOW_THREADS
    if ret < 0:
      raise HDF5ExtError("Problems reading records.")

    # Convert some HDF5 types to Numarray after reading.
    self._convertTypes(recarr, nrecords, 1)

    return nrecords

  def _close_read(self):

    if ( H5TBOclose_read(&self.dataset_id, &self.space_id) < 0 ):
      raise HDF5ExtError("Problems closing table for read.")

  def _read_elements_ra(self, object recarr, object elements):
    cdef long buflen
    cdef hsize_t nrecords
    cdef void *coords
    cdef int ret

    self._open_read(recarr)   # Open the table for reading
    # Get the chunk of the coords that correspond to a buffer
    nrecords = len(elements)
    # Get the pointer to the buffer data area
    buflen = NA_getBufferPtrAndSize(elements._data, 1, &coords)

    Py_BEGIN_ALLOW_THREADS
    ret = H5TBOread_elements(&self.dataset_id, &self.space_id,
                             self.mem_type_id, nrecords,
                             coords, self.rbuf)
    Py_END_ALLOW_THREADS
    if ret < 0:
      raise HDF5ExtError("Problems reading records.")

    self._close_read()   # Close the table for reading

    # Convert some HDF5 types to Numarray after reading.
    self._convertTypes(recarr, nrecords, 1)

    return nrecords

  def _remove_row(self, nrow, nrecords):

    if (H5TBdelete_record(self.parent_id, self.name, self.mem_type_id,
                          nrow, nrecords, self._v_maxTuples) < 0):
      #raise HDF5ExtError("Problems deleting records.")
      print "Problems deleting records."
      # Return no removed records
      return 0
    else:
      self.totalrecords = self.totalrecords - nrecords
      # Return the number of records removed
      return nrecords

  def  _get_mem_type_id(self):
    "Accessor to mem_type_id"
    return self.mem_type_id

  def __dealloc__(self):
    #print "Destroying object Table in Extension"
    if self.mem_type_id:
      H5Tclose(self.mem_type_id)
    free(<void *>self.name)


cdef class Row:
  """Row Class

  This class hosts accessors to a recarray row. The fields on a
  recarray can be accessed both as items (__getitem__/__setitem__),
  i.e. following the "map" protocol.

  """

  cdef object _table   # To allow compilation under MIPSPro C in SGI machines
  #cdef Table _table   # To allow access C methods in Table
  cdef object _wfields, _rfields, _recarray, _saveBufferedRows, _indexes
  cdef long long _row, _nrowinbuf, _unsavednrows
  cdef int    _strides
  #cdef readonly int _nrow # This is allowed from Pyrex 0.9 on
  # But defining it as long long makes it unaccessible from python!
  cdef hsize_t _nrow
  cdef hsize_t start, stop, step, nextelement
  cdef hsize_t nrowsinbuf, nrows, nrowsread, stopindex
  cdef int     bufcounter, counter, startb, stopb,  _all
  cdef int     *_scalar, *_enumtypes
  cdef int     _riterator, _rbuffer_initialized, _wbuffer_initialized
  cdef int     indexChunk
  cdef object  indexValid, coords, bufcoords, index
  cdef int     whereCond, indexed
  cdef double  startcond, stopcond
  cdef int     op1, op2
  cdef char    *colname
  cdef void    *rbuf
  cdef hid_t   parent_id, dataset_id, space_id, mem_type_id
  cdef object  ops, opsValues
  cdef int     _ropen

  def __new__(self, table):
  #def __new__(self, Table table):
    cdef int nfields, i

    # The MIPSPro C compiler on a SGI does not like to have an assignation
    # of a type Table to a type object. For now, as we do not have to call
    # C methods in Tables, I'll declare table as object.
    # F. Altet 2004-02-11
    self._table = table
    self.parent_id = table._v_parent._v_objectID
    self.mem_type_id = table._get_mem_type_id()
    self._unsavednrows = 0
    self._row = 0
    self._nrow = 0
    self._rbuffer_initialized = 0
    self._wbuffer_initialized = 0
    self._riterator = 0
    self._ropen = 0
    self._saveBufferedRows = self._table._saveBufferedRows

  def __call__(self, start=0, stop=0, step=1, coords=None, ncoords=0):
    """ return the row for this record object and update counters"""

    self._initLoop(start, stop, step, coords, ncoords)
    return iter(self)

  def __iter__(self):
    "Iterator that traverses all the data in the Table"

    return self

  def _newBuffer(self, write):
    "Create the recarray for I/O buffering"

    if write:
      buff = self._table._v_wbuffer = self._table._newBuffer(init=1)
      self._wfields = buff._fields
      # Flag that tells that the buffer has been initialized for writing
      self._wbuffer_initialized = 1
      # Initialize a copy for storing default values
      self._table._v_wbuffercpy = self._table._newBuffer(init=1)
      # Create a buffer for reading as well
      buff = self._table._v_rbuffer = self._table._newBuffer(init=1)
      self._rfields = buff._fields
      # Flag that tells that the buffer has been initialized for reading
      self._rbuffer_initialized = 1
    else:
      buff = self._table._v_rbuffer = self._table._newBuffer(init=0)
      # Flag that tells that the buffer has been initialized for reading
      self._rbuffer_initialized = 1
      self._rfields = buff._fields

    self._recarray = buff
    self.nrows = self._table.nrows  # This value may change
    self.nrowsinbuf = self._table._v_maxTuples    # Need to fetch this value
    self._strides = buff._strides[0]
    nfields = buff._nfields
    # Create a dictionary with the index columns of the recarray
    # and other tables
    i = 0
    self._indexes = {}
    self._scalar = <int *>malloc(nfields * sizeof(int))
    self._enumtypes = <int *>malloc(nfields * sizeof(int))
    for field in buff._names:
      self._indexes[field] = i
      if buff._repeats[i] == 1:
        self._scalar[i] = 1
      else:
        self._scalar[i] = 0
      self._enumtypes[i] = tables.hdf5Extension.naTypeToNAEnum[buff._fmt[i]]
      i = i + 1

  # The next _open_read, _read_elements, _read_records and _close_read
  # are here to allow the existence of nested iterators for reading.
  # They mimic their counterparts in Table extension class, so it
  # may be worth to spend some effort in order to fusion them.
  # Nested iterators for writing are not supported yet.
  # F. Altet 2005-06-16
  def _open_read(self, object recarr):
    cdef long buflen
    cdef object recarr2
    cdef char *name

    # Get the pointer to the buffer data area
    buflen = NA_getBufferPtrAndSize(recarr._data, 1, &self.rbuf)

    # Readout to the buffer
    tname = self._table.name  # to avoid temporary Python value error
    if ( H5TBOopen_read(&self.dataset_id, &self.space_id,
                        self.parent_id, tname) < 0 ):
      raise HDF5ExtError("Problems opening table for read.")

    self._ropen = 1

  def _read_records(self, object recarr, hsize_t start, hsize_t nrecords):
    cdef int ret

    # Correct the number of records to read, if needed
    if (start + nrecords) > self._table.nrows:
      nrecords = self._table.nrows - start

    # Read the records from disk
    Py_BEGIN_ALLOW_THREADS
    ret = H5TBOread_records(&self.dataset_id, &self.space_id,
                            self.mem_type_id, start,
                            nrecords, self.rbuf)
    Py_END_ALLOW_THREADS
    if ret < 0:
      raise HDF5ExtError("Problems reading records.")

    # Convert some HDF5 types to Numarray after reading.
    self._table._convertTypes(recarr, nrecords, 1)

    return nrecords

  def _read_elements(self, object recarr, object elements):
    cdef long buflen
    cdef hsize_t nrecords
    cdef void *coords
    cdef int ret, offset

    # Get the chunk of the coords that correspond to a buffer
    nrecords = len(elements)
    # Get the pointer to the buffer data area
    buflen = NA_getBufferPtrAndSize(elements._data, 1, &coords)
    # Correct the offset
    offset = elements._byteoffset
    coords = <void *>(<char *>coords + offset)

    Py_BEGIN_ALLOW_THREADS
    ret = H5TBOread_elements(&self.dataset_id, &self.space_id,
                             self.mem_type_id, nrecords,
                             coords, self.rbuf)
    Py_END_ALLOW_THREADS
    if ret < 0:
      raise HDF5ExtError("Problems reading records.")

    # Convert some HDF5 types to Numarray after reading.
    self._table._convertTypes(recarr, nrecords, 1)

    return nrecords

  def _close_read(self):

    if self._ropen:
      if ( H5TBOclose_read(&self.dataset_id, &self.space_id) < 0 ):
        raise HDF5ExtError("Problems closing table for read.")

    self._ropen = 0

  def _initLoop(self, hsize_t start, hsize_t stop, hsize_t step,
                object coords, int ncoords):
    "Initialization for the __iter__ iterator"

    self._riterator = 1   # We are inside a read iterator
    if not self._rbuffer_initialized:
      self._newBuffer(write=0)
    self.start = start
    self.stop = stop
    self.step = step
    self.coords = coords
    self.startb = 0
    self.nrowsread = start
    self._nrow = start - self.step
    self._open_read(self._recarray)  # Open the table for reading
    self.whereCond = 0
    self.indexed = 0
    # Do we have in-kernel selections?
    if (hasattr(self._table, "whereColname") and
        self._table.whereColname is not None):
      self.whereCond = 1
      self.colname = PyString_AsString(self._table.whereColname)
      # Is this column indexed and ready to use?
      if self._table.colindexed[self.colname] and ncoords >= 0:
        self.indexed = 1
        self.index = self._table.cols._f_col(self.colname).index
        # create buffers for indices
        self.index.indices._initIndexSlice(self.nrowsinbuf)
        self.nrowsread = 0
        self.nextelement = 0
      # Copy the table conditions to local variable
      self.ops = self._table.ops[:]
      self.opsValues = self._table.opsValues[:]
      # Reset the table variable conditions
      self._table.ops = []
      self._table.opsValues = []
      self._table.whereColname = None

    if self.coords is not None:
      self.stopindex = len(coords)
      self.nrowsread = 0
      self.nextelement = 0
    elif self.indexed:
      self.stopindex = ncoords

  def __next__(self):
    "next() method for __iter__() that is called on each iteration"
    if self.indexed or self.coords is not None:
      #print "indexed"
      return self.__next__indexed()
    elif self.whereCond:
      #print "inKernel"
      return self.__next__inKernel()
    else:
      #print "general"
      return self.__next__general()

  cdef __next__indexed(self):
    """The version of next() for indexed columns or with user coordinates"""
    cdef long offset
    cdef object indexValid1, indexValid2
    cdef int ncond, op, recout
    cdef long long stop
    cdef object opValue, field
    cdef long long nextelement

    while self.nextelement < self.stopindex:
      if self.nextelement >= self.nrowsread:
        # Correction for avoiding reading past self.stopindex
        if self.nrowsread+self.nrowsinbuf > self.stopindex:
          stop = self.stopindex-self.nrowsread
        else:
          stop = self.nrowsinbuf
        if self.coords is not None:
          self.bufcoords = self.coords[self.nrowsread:self.nrowsread+stop]
          nrowsread = len(self.bufcoords)
        else:
          self.bufcoords = self.index.getCoords(self.nrowsread, stop)
          nrowsread = len(self.bufcoords)
          tmp = self.bufcoords
          # If a step was specified, select the strided elements first
          if len(tmp) > 0 and self.step > 1:
            tmp2=(tmp-self.start) % self.step
            tmp = tmp[tmp2.__eq__(0)]
          # Now, select those indices in the range start, stop:
          if len(tmp) > 0 and tmp[0] < self.start:
            # Pyrex can't use the tmp>=number notation when tmp is a numarray
            # object. Why?
            #tmp = tmp[tmp>=self.start]
            tmp = tmp[tmp.__ge__(self.start)]
          if len(tmp) > 0 and tmp[-1] >= self.stop:
            tmp = tmp[numarray.where(tmp.__lt__(self.stop))]
          self.bufcoords = tmp
        self._row = -1
        if len(self.bufcoords):
          recout = self._read_elements(self._recarray, self.bufcoords)
        else:
          recout = 0
        self.nrowsread = self.nrowsread + nrowsread
        # Correction for elements that are eliminated by its
        # [start:stop:step] range
        self.nextelement = self.nextelement + nrowsread - recout
        if recout == 0:
          # no items where read, skipping
          continue
      self._row = self._row + 1
      self._nrow = self.bufcoords[self._row]
      self.nextelement = self.nextelement + 1
      # Return this row
      return self
    else:
      # Re-initialize the possible cuts in columns
      self.indexed = 0
      if self.coords is None:
        self.index.indices._destroyIndexSlice()  # Remove buffers in indices
        nextelement = self.index.nelemslice * self.index.nrows
        # Correct this for step size > 1
        correct = (nextelement - self.start) % self.step
        if self.step > 1 and correct:
          nextelement = nextelement + self.step - correct
      else:
        self.coords = None
        # All the elements has been read for this mode
        nextelement = self.nrows
      if nextelement >= self.nrows:
        self._close_read()  # Close the table
        self._riterator = 0
        # The next are not necessary becuase this instance will not be reused
#         self.index = 0
#         self.whereCond = 0
        raise StopIteration        # end of iteration
      else:
        # Continue the iteration with the __next__inKernel() method
        self.start = nextelement
        self.startb = 0
        self.nrowsread = self.start
        self._nrow = self.start - self.step
        return self.__next__inKernel()

  cdef __next__inKernel(self):
    """The version of next() in case of in-kernel conditions"""
    cdef long offset
    cdef object indexValid1, indexValid2
    cdef int ncond, op, recout, correct
    cdef object opValue, field

    self.nextelement = self._nrow + self.step
    while self.nextelement < self.stop:
      if self.nextelement >= self.nrowsread:
        # Skip until there is interesting information
        while self.nextelement >= self.nrowsread + self.nrowsinbuf:
          self.nrowsread = self.nrowsread + self.nrowsinbuf
        # Compute the end for this iteration
        self.stopb = self.stop - self.nrowsread
        if self.stopb > self.nrowsinbuf:
          self.stopb = self.nrowsinbuf
        self._row = self.startb - self.step
        # Read a chunk
        recout = self._read_records(
          self._recarray, self.nextelement, self.nrowsinbuf)
        self.nrowsread = self.nrowsread + recout
        self.indexChunk = -self.step
        # Iterate over the conditions
        ncond = 0
        for op in self.ops:
          opValue = self.opsValues[ncond]
          # Copying first on a non-strided array, reduces the speed
          # in a factor of 20%
          #field = self._rfields[self.colname].copy()
          if op == 1:
            indexValid1 = self._rfields[self.colname].__lt__(opValue)
          elif op == 2:
            indexValid1 = self._rfields[self.colname].__le__(opValue)
          elif op == 3:
            indexValid1 = self._rfields[self.colname].__gt__(opValue)
          elif op == 4:
            indexValid1 = self._rfields[self.colname].__ge__(opValue)
          elif op == 5:
            indexValid1 = self._rfields[self.colname].__eq__(opValue)
          elif op == 6:
            indexValid1 = self._rfields[self.colname].__ne__(opValue)
          # Consolidate the valid indexes
          if ncond == 0:
            self.indexValid = indexValid1
          else:
            self.indexValid = self.indexValid.__and__(indexValid1)
          ncond = ncond + 1
        # This indexing operation is *very* costly, so it is better
        # to keep the boolean (indexValid) approach.
        #result = self._recarray[self.indexValid]
        #if len(result) == 0:
        # Is still there any interesting information in this buffer?
        if not numarray.sometrue(self.indexValid):
          # No, so take the next one
          if self.step >= self.nrowsinbuf:
            self.nextelement = self.nextelement + self.step
          else:
            self.nextelement = self.nextelement + self.nrowsinbuf
            # Correction for step size > 1
            if self.step > 1:
              correct = (self.nextelement - self.start) % self.step
              self.nextelement = self.nextelement + self.step - correct
          continue

      self._row = self._row + self.step
      self._nrow = self.nextelement
      if self._row + self.step >= self.stopb:
        # Compute the start row for the next buffer
        self.startb = 0

      self.nextelement = self._nrow + self.step
      # Return only if this value is interesting
      self.indexChunk = self.indexChunk + self.step
      if self.indexValid[self.indexChunk]:
        return self
    else:
      self._close_read()  # Close the table
      self._riterator = 0
      raise StopIteration        # end of iteration

  # This is the most general __next__ version, simple, but effective
  cdef __next__general(self):
    """The version of next() for the general cases"""
    cdef long offset
    cdef object indexValid1, indexValid2
    cdef int ncond, op, recout
    cdef object opValue, field

    self.nextelement = self._nrow + self.step
    while self.nextelement < self.stop:
      if self.nextelement >= self.nrowsread:
        # Skip until there is interesting information
        while self.nextelement >= self.nrowsread + self.nrowsinbuf:
          self.nrowsread = self.nrowsread + self.nrowsinbuf
        # Compute the end for this iteration
        self.stopb = self.stop - self.nrowsread
        if self.stopb > self.nrowsinbuf:
          self.stopb = self.nrowsinbuf
        self._row = self.startb - self.step
        # Read a chunk
        recout = self._read_records(
          self._recarray, self.nrowsread, self.nrowsinbuf)
        self.nrowsread = self.nrowsread + recout

      self._row = self._row + self.step
      self._nrow = self.nextelement
      if self._row + self.step >= self.stopb:
        # Compute the start row for the next buffer
        self.startb = (self._row + self.step) % self.nrowsinbuf

      self.nextelement = self._nrow + self.step
      # Return this value
      return self
    else:
      self._close_read()  # Close the table
      self._riterator = 0
      raise StopIteration        # end of iteration

  def _fillCol(self, result, start, stop, step, field):
    "Read a field from a table on disk and put the result in result"
    cdef hsize_t startr, stopr, i, j, istartb, istopb
    cdef hsize_t istart, istop, istep, inrowsinbuf, inextelement, inrowsread
    cdef object fields

    self._initLoop(start, stop, step, None, 0)
    istart, istop, istep = (self.start, self.stop, self.step)
    inrowsinbuf, inextelement, inrowsread = (self.nrowsinbuf, istart, istart)
    istartb, startr = (self.startb, 0)
    if field:
      # If field is not None, select it
      fields = self._recarray._fields[field]
    else:
      # if don't, select all fields
      fields = self._recarray
    i = istart
    while i < istop:
      if (inextelement >= inrowsread + inrowsinbuf):
        inrowsread = inrowsread + inrowsinbuf
        i = i + inrowsinbuf
        continue
      # Compute the end for this iteration
      istopb = istop - inrowsread
      if istopb > inrowsinbuf:
        istopb = inrowsinbuf
      stopr = startr + ((istopb - istartb - 1) / istep) + 1
      # Read a chunk
      inrowsread = (
        inrowsread + self._read_records(self._recarray, i, inrowsinbuf))
      # Assign the correct part to result
      # The bottleneck is in this assignment. Hope that the numarray
      # people might improve this in the short future
#       print "result-->", result.info()
#       print "startr, stopr-->", startr, stopr
#       print "fields-->", fields.info()
#       print "istartb, istopb, istep-->", istartb, istopb, istep
      result[startr:stopr] = fields[istartb:istopb:istep]
      # Compute some indexes for the next iteration
      startr = stopr
      j = istartb + ((istopb - istartb - 1) / istep) * istep
      istartb = (j+istep) % inrowsinbuf
      inextelement = inextelement + istep
      i = i + inrowsinbuf
    self._close_read()  # Close the table
    self._riterator = 0
    return

  def nrow(self):
    """Get the global row number for this table"""
    return self._nrow

  def getTable(self):
    """Get the associated Table object"""
    return self._table

  def append(self):
    """Append self object to the output buffer."""

    if self._table._v_file.mode == 'r':
      raise IOError("attempt to write over a file opened in read-only mode")

    # Put this to allow things like:
    #    for i in xrange(self.nrows):
    #        row.append()
    if not self._wbuffer_initialized:
      # Create the arrays for buffering
      self._newBuffer(write=1)

    self._unsavednrows = self._unsavednrows + 1
    # Update the _row just in case a call to a read iterator has been called
    self._row = self._unsavednrows
    if self._table.indexed:
      self._table._unsaved_indexedrows = self._table._unsaved_indexedrows + 1
    # When the buffer is full, flush it
    if self._unsavednrows == self.nrowsinbuf:
      # Save the records on disk
      self._saveBufferedRows()
      # Get again the self._wfields of the new buffer
      # This is needed because the copy changes the _fields pointer
      self._wfields = self._table._v_wbuffer._fields
      self._rfields = self._table._v_rbuffer._fields

    return

  def _setUnsavedNRows(self, row):
    """ set the buffer row number for this buffer """
    self._unsavednrows = row
    self._row = row # set the current buffer read counter

  def _getUnsavedNRows(self):
    """ get the buffer row number for this buffer """
    return self._unsavednrows

  def _incUnsavedNRows(self):
    """ set the row for this record object """
    self._row = self._row + 1 # update the current buffer read counter
    self._unsavednrows = self._unsavednrows + 1
    return self._unsavednrows

#   def __getitem__orig(self, fieldName):
#     try:
#       return self._fields[fieldName][self._row]
#       #return 40  # Just for testing purposes
#     except KeyError:
#       raise KeyError("no such column: %s" % (fieldName,))

  # This method is twice as faster than __getattr__ because there is
  # not a lookup in the local dictionary
  def __getitem__(self, fieldName):
    cdef int index
    cdef long offset

    # Optimization follows for the case that the field dimension is
    # == 1, i.e. columns elements are scalars, and the column is not
    # of CharType. This code accelerates the access to column
    # elements a 20%

    try:
      # Get the column index. This is very fast!
      index = self._indexes[fieldName]
      if (self._enumtypes[index] <> CHARTYPE and self._scalar[index]):
        #return 40   # Just for tests purposes
        # if not NA_updateDataPtr(self._rfields[fieldName]):
        #  return None
        # This optimization sucks when using numarray 0.4!
        # And it works better with python 2.2 than python 2.3
        # I'll disable it until further study of it is done
        #
        # I'm going to activate this optimization from 0.7.1 on
        # 2003/08/08
        offset = self._row * self._strides
        return NA_getPythonScalar(self._rfields[fieldName], offset)
        #return self._rfields[fieldName][self._row]
      elif (self._enumtypes[index] == CHARTYPE and self._scalar[index]):
        # Case of a plain string in the cell
        # Call the universal indexing function
        return self._rfields[fieldName][self._row]
      else:  # Case when dimensions > 1
        # Call the universal indexing function
        # Make a copy of the (multi) dimensional array
        # so that the user does not have to do that!
        arr = self._rfields[fieldName][self._row].copy()
        return arr
    except KeyError:
      raise KeyError("no such column: %s" % (fieldName,))

  # This is slightly faster (around 3%) than __setattr__
  def __setitem__(self, object fieldName, object value):

    if self._table._v_file.mode == 'r':
      raise IOError("attempt to write over a file opened in read-only mode")

    if self._riterator:
      self._close_read()  # Close the table
      raise NotImplementedError("You cannot set Row fields when in middle of an table iterator. Use Table.modifyRows() or Table.modifyColumns() instead.")
    elif not self._wbuffer_initialized:
        # Create the recarrays for input/output buffering
        self._newBuffer(write=1)

    # Check validity of enumerated value.
    colenums = self._table._colenums
    if fieldName in colenums:
      enum = colenums[fieldName]
      cenvals = numarray.array(value).flat
      for cenval in cenvals:
        enum(cenval)  # raises ``ValueError`` on invalid values

    try:
      self._wfields[fieldName][self._unsavednrows] = value
      # Before write and read buffer got separated, we were able to write:
      # row['var1'] = '%04d' % (self.expectedrows - i)
      # row['var7'] = row['var1'][-1]
      # during table fillings. This is to allow this to continue happening.
      # F. Altet 2005-04-25
      if self._riterator:
        self._rfields[fieldName][self._unsavednrows] = value
      else:
        # Update the _row just in case a call to a read iterator has been called
        self._rfields = self._wfields
        self._row = self._unsavednrows
    except KeyError:
      raise KeyError("no such column: %s" % (fieldName,))
    except TypeError:
      raise TypeError("invalid type for ``%s`` column: %s" % (fieldName, type(value)))

  def _in_riterator(self):
    """Accessor for the read iterator flag."""

    if self._riterator:
      return True
    else:
      return False

  # Delete the I/O buffers
  def _cleanup(self):
    self._wfields = None         # Decrement the pointer to write buffer
    self._rfields = None         # Decrement the pointer to read buffer
    self._table._v_wbuffer = None   # Decrement the pointer to write buffer
    self._table._v_rbuffer = None   # Decrement the pointer to read buffer
    self._table._v_wbuffercpy = None  # Decrement the pointer to write buffer copy
    # Flag that tells that the buffer has been uninitialized
    self._rbuffer_initialized = 0
    self._wbuffer_initialized = 0

  def __str__(self):
    """ represent the record as an string """

    if not self._rbuffer_initialized:
      # Create the arrays for buffering
      self._newBuffer(write=0)
    outlist = []
    # Special case where Row has not been initialized yet
    if self._recarray == None:
      return "Row object has not been initialized for table:\n  %s\n %s" % \
             (self._table, \
    "You will normally want to use row objects in combination with iterators.")
    for name in self._recarray._names:
      outlist.append(`self._rfields[name][self._row]`)
    return "(" + ", ".join(outlist) + ")"

  def __repr__(self):
    """ represent the record as an string """

    return str(self)

  def __dealloc__(self):
    #print "Deleting Row object"
    free(<void *>self._scalar)
    free(<void *>self._enumtypes)
    # Check if the iterator has been close unexpectedly (ex with a break)
    # and if so, close the table
    if self._riterator:
      self._close_read()  # Close the table
      self._riterator = 0


