# Copyright (c) 2011-2014, B.I.Stepanov Institute of Physics, National Academy
# of Sciences of Belarus.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

import datetime
import numpy
import os
import os.path
import pywintypes
import win32com.client
import xlrd
import xlwt

from common.utils import access
from common.utils import excel
from common.utils import txt

__all__ = ['DataRecord', 'DataField']

# *****************************************************************************
class DataField:
    """Description of a class instance attribute mapped to a database field or
    optionally to a column of an Excel worksheet.

    Attributes:
      - 'attributeName': name of the attribute;
      - 'fieldName': name of the corresponding database field, or Excel file
        column header label (if the data are to be read from an Excel file);
      - 'dataType': name of the SQL data type for the database field (see
        'access.getDataTypeCode', 'access.getDataTypeName'). If the data are
        read from an Excel file instead of a database, this is used as a hint
        in cell format checking;
      - 'arrayDataType': Numpy array data type for array-like attributes. Not
        used unless 'dataType' is 'LONGBINARY'.
      - 'required': 'True' if this data field is mandatory for input database
        files and 'False' if the data field is optional. In the latter case,
        the value of the attribute would be set to 'None' if the corresponding
        database field is absent."""

    def __init__(self, attributeName, fieldName, dataType,
        arrayDataType = None, required = True):

        self.attributeName = attributeName
        self.fieldName = fieldName
        self.dataType = dataType
        self.arrayDataType = arrayDataType
        self.required = required

# *****************************************************************************
class DataRecord:
    """Base class for data structures intended for serialization in Microsoft
    Access database files or in Microsoft Excel files.

    Primary class attributes:
      - 'tableName': string holding the name of the database table to store the
        data in (or the name of the Excel worksheet if the data are to be read
        from an Excel file);
      - 'fields': list of 'DataField' instances describing the structure of the
        data records;

    Class attributes related to operations with database files (see
    'createDatabase', 'checkDataFormat', 'readDataList', and 'writeData'):
      - 'extraFieldNames': list of names of required database fields that are
        not referenced in 'fields' (this is an empty list by default);
      - 'selectStatementExtraText': string to be appended to 'SELECT <fields>
        FROM <tableName>' command when querying the data from a database (this
        is an empty string by default);

      - 'createDatabaseErrorMessage': error string for database file creation
        failure;
      - 'openDatabaseErrorMessage': error string for failure opening a
        database file for reading;
      - 'queryFailureErrorMessage': error string for failure executing the SQL
        'SELECT' statement on an opened database file;
      - 'invalidFormatErrorPrefix': prefix for error strings reporting failed
        database structure tests, ending with a colon and space;
      - 'writeDataErrorMessage': error string for failure adding a new record
        to a database file;

    Class attributes related to reading data from an Excel file instead of a
    database (see 'readDataListFromExcel'):
      - 'extraExcelHeaderRows': number of Excel file rows that are located
        above the main header row and may contain some additional information
        that doesn't need to be checked by the file format testing procedure
        (this is zero by default);

      - 'openExcelFileErrorMessage': error string for failure opening an Excel
        file for reading;
      - 'invalidExcelFormatErrorPrefix': prefix for error strings reporting
        failed Excel file structure tests, ending with a colon and space;

    Class attributes related to Excel import and export operations (see
    'importDataListFromExcel', 'exportDataListToExcel' and
    'exportDataToExcel'):
      - 'importDataFromExcelErrorPrefix': prefix for error strings reporting
        failed Excel import;
      - 'exportDataToExcelErrorPrefix': prefix for error strings reporting
        failed Excel export.

    Primary class attributes ('tableName' and 'fields') must be redefined in
    the derived class. Class attributes related to error messages must be
    redefined unless code capable of generating an error will never be called.

    'onDataLoaded' virtual method may be redefined in order to add some
    custom processing to the data loading procedure.

    The differences between reading from an Excel file and importing from an
    Excel file are as follows:
      - Reading operation does not support array data fields (see
        'DataField.arrayDataType'), but is more flexible with regard to layout
        of the data cells: it allows to read Excel files with extra rows above
        the main header (see 'extraExcelHeaderRows'). On the other hand, Excel
        file format for import/export is fixed and optimized for export of
        databases containing array data fields. It will contain exactly the
        same number of data fields as defined in the 'fields' class attribute,
        and a special numbering column will be included in the main worksheet
        to facilitate location of the corresponding array data;
      - For clarity, error messages used in these two operations differ a
        bit."""

    # ---- Class attributes ---------------------------------------------------
    tableName = None
    fields = []

    extraFieldNames = []
    selectStatementExtraText = ''

    createDatabaseErrorMessage = None
    openDatabaseErrorMessage = None
    queryFailureErrorMessage = None
    invalidFormatErrorPrefix = None
    writeDataErrorMessage = None

    extraExcelHeaderRows = 0

    openExcelFileErrorMessage = None
    invalidExcelFormatErrorPrefix = None

    importDataFromExcelErrorPrefix = None
    exportDataToExcelErrorPrefix = None

    # ---- Public methods -----------------------------------------------------
    def __init__(self):
        """Initialize all the attributes referenced in 'cls.fields' to
        'None'."""

        for name in self.getAttributeNames():
            setattr(self, name, None)

    # ---- Protected overridable methods --------------------------------------
    def onDataLoaded(self):
        """Called by 'readDataList' and similar functions after all the data
        record's attributes are read from a database or an Excel file.

        Redefine this method in the derived class to add any custom
        processing to the data loading procedure."""
        pass

    # ---- Public and private class methods -----------------------------------
    @classmethod
    def getAttributeNames(cls):
        """Return the list of attribute names referenced in 'cls.fields'."""
        return [field.attributeName for field in cls.fields]

    @classmethod
    def getRequiredAttributeNames(cls):
        """Return the list of attribute names referenced in 'cls.fields' whose
        'required' attributes are set to 'True'."""
        return [field.attributeName for field in cls.fields if field.required]

    @classmethod
    def getFieldName(cls, attributeName):
        """Return the name of the database field corresponding to the attribute
        with the given name."""

        for field in cls.fields:
            if field.attributeName == attributeName:
                return field.fieldName

    @classmethod
    def createDatabase(cls, dbFilePath):
        """Create a new empty database file at the given path.

        Raise 'txt.Error' on failure."""

        try:
            dbConnection = access.createDatabase(dbFilePath)
            try:
                # Construct a comma-separated list of field name-type pairs.
                fieldsDefinition = ', '.join(
                    access.quoteName(field.fieldName) + ' ' +
                    field.dataType for field in cls.fields)

                # Create an empty table.
                dbConnection.Execute('CREATE TABLE %s (%s)' %
                    (access.quoteName(cls.tableName), fieldsDefinition))
            finally:
                dbConnection.Close()

        except pywintypes.com_error:
            raise txt.Error(cls.createDatabaseErrorMessage)

    @classmethod
    def _checkDataFormatImpl(cls, dbFilePath):
        """Same as 'checkDataFormat', but return a tuple of opened database
        connection, opened Recordset pointing to the data, and a list of the
        database field names."""

        # Connect to the database.
        try:
            dbConnection = access.openDatabase(dbFilePath)

        except pywintypes.com_error:
            raise txt.Error(cls.openDatabaseErrorMessage)

        try:
            # Check that the required table exists in the database.
            dbTableNames = access.getTableNames(dbConnection)

            if cls.tableName not in dbTableNames:
                raise txt.Error(cls.invalidFormatErrorPrefix +
                    'there is no table named %s' % txt.quote(cls.tableName))

            # Check that the table contains the required fields.
            dbFieldNames = access.getFieldNames(dbConnection, cls.tableName)

            # Place 'extraFieldNames' entries earlier in the check list,
            # assuming that they are more important in identification of the
            # required database format. Skip optional database fields.
            for name in (cls.extraFieldNames +
                [field.fieldName for field in cls.fields if field.required]):

                if name not in dbFieldNames:
                    raise txt.Error(cls.invalidFormatErrorPrefix +
                        'the database contains no %s field' % txt.quote(name))

            try:
                # Construct a comma-separated list of the required field names.
                # Skip optional fields that are absent in the database.
                fieldList = ', '.join(access.quoteName(field.fieldName)
                    for field in cls.fields
                    if field.required or field.fieldName in dbFieldNames)

                # Query the required fields from the table.
                query = ('SELECT %s FROM %s' % (fieldList, access.quoteName(
                    cls.tableName)) + cls.selectStatementExtraText)

                (recSet, recordsAffected) = dbConnection.Execute(query)

            except pywintypes.com_error:
                # Report SQL query failure.
                raise txt.Error(cls.queryFailureErrorMessage)
        except:
            dbConnection.Close()
            raise

        # 'recSet' is now an opened forward-only cursor pointing to the data.
        try:
            # Check types of the queried database fields.
            dbFields = recSet.Fields

            for field in cls.fields:
                # Skip optional fields that are absent in the database.
                if not (field.required or field.fieldName in dbFieldNames):
                    continue

                dbFieldTypeCode = dbFields.Item(field.fieldName).Type

                if dbFieldTypeCode != access.getDataTypeCode(field.dataType):

                    # Compose the error message.
                    dataTypeName = access.getDataTypeName(field.dataType)
                    assert dataTypeName is not None

                    raise txt.Error(cls.invalidFormatErrorPrefix +
                        'type of the %s database field is different from %s' %
                        (txt.quote(field.fieldName), dataTypeName))

        except:
            recSet.Close()
            dbConnection.Close()
            raise

        return (dbConnection, recSet, dbFieldNames)

    @classmethod
    def checkDataFormat(cls, dbFilePath):
        """Assure that a file at the given path is a valid Access database and
        that its format matches the data structure.

        Raise 'txt.Error' on failure."""

        (dbConnection, recSet, dbFieldNames) = cls._checkDataFormatImpl(
            dbFilePath)
        recSet.Close()
        dbConnection.Close()

    @classmethod
    def readDataList(cls, dbFilePath):
        """Open a database file at the given path for reading, check its
        format, query the available data and return them as a list of 'cls'
        instances.

        Raise 'txt.Error' on failure.

        Date and time values are stored as 'datetime.date' and 'datetime.time'
        instances; long binary values are stored as Numpy arrays."""

        # Check the database format and query the data.
        (dbConnection, recSet, dbFieldNames) = cls._checkDataFormatImpl(
            dbFilePath)

        try:
            recordList = []

            while not recSet.EOF:
                # Read the data record.
                dbFields = recSet.Fields

                # Fill in the data structure.
                dataRecord = cls()

                for field in cls.fields:

                    # Don't query fields that are absent in the database.
                    if field.fieldName not in dbFieldNames:
                        value = None
                    else:
                        value = dbFields.Item(field.fieldName).Value

                    # Convert long binary values to Numpy arrays unless the
                    # data are missing.
                    if field.dataType == 'LONGBINARY' and value is not None:
                        assert isinstance(value, buffer)

                        # Check consistency of buffer size and the data type.
                        itemSize = numpy.dtype(field.arrayDataType).itemsize

                        if len(value) % itemSize != 0:
                            # Create an empty array to warn the user.
                            value = numpy.zeros(0)
                        else:
                            value = numpy.frombuffer(value,
                                dtype = field.arrayDataType)

                    # Convert date and time objects from pywin32's 'PyTime'
                    # data type to 'datetime.date' and 'datetime.time'.
                    elif field.dataType == 'DATE' and value is not None:
                        value = datetime.date(
                            value.year, value.month, value.day)

                    elif field.dataType == 'TIME' and value is not None:
                        value = datetime.time(
                            value.hour, value.minute, value.second)

                    setattr(dataRecord, field.attributeName, value)

                # Post-process the loaded data.
                dataRecord.onDataLoaded()

                recordList.append(dataRecord)
                recSet.MoveNext()

        finally:
            recSet.Close()
            dbConnection.Close()

        return recordList

    @classmethod
    def readDataListFromExcel(cls, excelFilePath):
        """Same as 'readDataList', but read the data from a specially prepared
        Excel file.

        Warning: this function won't work if the data record contains
        array-like data fields (see 'DataField.arrayDataType')."""

        try:
            # 'on_demand' means 'load the requested worksheets only'.
            workBook = xlrd.open_workbook(excelFilePath, on_demand = True)

        except (IOError, xlrd.XLRDError):
            raise txt.Error(cls.openExcelFileErrorMessage)

        if not cls.tableName in workBook.sheet_names():
            raise txt.Error(cls.invalidExcelFormatErrorPrefix +
                'there is no worksheet named %s' % txt.quote(cls.tableName))

        tableWorkSheet = workBook.sheet_by_name(cls.tableName)

        # The total number of header rows in the Excel file.
        rowOffset = cls.extraExcelHeaderRows + 1
        # Assume absence of empty or malformed rows outside the header.
        recordCount = tableWorkSheet.nrows - rowOffset

        if recordCount < 0:
            raise txt.Error(cls.invalidExcelFormatErrorPrefix +
                '%s worksheet contains no data' % txt.quote(cls.tableName))

        # A dictionary of column indices by their header row labels.
        fieldColumnIndices = {}

        # Search the header row for cells containing text values.
        for col in range(0, tableWorkSheet.ncols):
            cellValue = tableWorkSheet.cell_value(rowOffset - 1, col)
            cellType = tableWorkSheet.cell_type(rowOffset - 1, col)

            if cellType != xlrd.XL_CELL_TEXT:
                continue

            # Forbid duplicate header labels, so that the user is always warned
            # about any potentional ambiguity.
            if cellValue in fieldColumnIndices:
                raise txt.Error(cls.invalidExcelFormatErrorPrefix +
                    '%s and %s header cells have the same name (%s)' %
                    (txt.quote(xlrd.cellname(rowOffset - 1,
                        fieldColumnIndices[cellValue])),
                    txt.quote(xlrd.cellname(rowOffset - 1, col)),
                    txt.quote(cellValue)))

            fieldColumnIndices[cellValue] = col

        # Make sure that all the required labels are present in the header row.
        for field in cls.fields:
            if field.required and field.fieldName not in fieldColumnIndices:
                raise txt.Error(cls.invalidExcelFormatErrorPrefix +
                    'there is no header cell named %s in the %s worksheet' %
                    (txt.quote(field.fieldName),
                    txt.quote(tableWorkSheet.name)))

        recordList = []

        for recordIndex in range(recordCount):
            row = recordIndex + rowOffset

            dataRecord = cls()

            # Fill in the data.
            for field in cls.fields:
                # Skip data columns that are not present in the worksheet.
                if field.fieldName in fieldColumnIndices:
                    value = _readExcelCell(tableWorkSheet,
                        row, fieldColumnIndices[field.fieldName],
                        field.dataType, cls.invalidExcelFormatErrorPrefix,
                        workBook.datemode)
                else:
                    value = None

                setattr(dataRecord, field.attributeName, value)

            # Post-process the loaded data.
            dataRecord.onDataLoaded()

            recordList.append(dataRecord)

        return recordList

    @staticmethod
    def getExcelExportFilePath(dbFilePath):
        """Return the standard path to an Excel file used for data export."""

        # Replace the '.mdb' extension with '.xls'.
        return os.path.splitext(dbFilePath)[0] + '.xls'

    @classmethod
    def _importDataListFromExcelImpl(cls, excelFilePath, errorMessagePrefix):
        """Same as 'importDataListFromExcel', but use a custom error prefix."""

        invalidFormatPrefix = errorMessagePrefix + 'file format is invalid: '

        try:
            # 'on_demand' means 'load the requested worksheets only'.
            workBook = xlrd.open_workbook(excelFilePath, on_demand = True)

        except (IOError, xlrd.XLRDError):
            raise txt.Error(errorMessagePrefix +
                'the file is invalid or may not be opened for reading')

        # Assume that scalar table fields are stored in a worksheet with the
        # same name as that of the table.
        if not cls.tableName in workBook.sheet_names():
            raise txt.Error(invalidFormatPrefix +
                'there is no worksheet named %s' % txt.quote(cls.tableName))

        tableWorkSheet = workBook.sheet_by_name(cls.tableName)

        # Make sure that the header is present.
        if tableWorkSheet.nrows < 1:
            raise txt.Error(invalidFormatPrefix +
                '%s worksheet contains no data' % txt.quote(cls.tableName))

        # Separate scalar and array-like fields of the database table.
        scalarFields = [field for field in cls.fields if
            field.dataType != 'LONGBINARY']
        arrayFields = [field for field in cls.fields if
            field.dataType == 'LONGBINARY']

        # Assume that header labels coincide with table field names.
        for i in range(len(scalarFields)):
            _testExcelCell(tableWorkSheet, 0, i + 1,
                scalarFields[i].fieldName, 'VARCHAR', invalidFormatPrefix)

        recordList = []
        totalRecords = tableWorkSheet.nrows - 1

        for recordIndex in range(totalRecords):

            row = recordIndex + 1

            # Check if the first column contains a valid one-based record index.
            _testExcelCell(tableWorkSheet, row, 0,
                recordIndex + 1, 'INT', invalidFormatPrefix)

            dataRecord = cls()

            # Read the scalar fields.
            for i in range(len(scalarFields)):
                value = _readExcelCell(tableWorkSheet, row, i + 1,
                    scalarFields[i].dataType, invalidFormatPrefix,
                    workBook.datemode)

                setattr(dataRecord, scalarFields[i].attributeName, value)

            # Read the array data fields, if any.
            if len(arrayFields) != 0:

                # Assume that array-like data are stored on separate worksheets
                # with specially arranged names. Data for several records may
                # be stored at the same worksheet.
                (arraySheetName, arrayCol) = _getArrayWorkSheet(
                    len(arrayFields), recordIndex, totalRecords)

                if not arraySheetName in workBook.sheet_names():
                    raise txt.Error(errorPrefix +
                        'there is no worksheet named %s' % arraySheetName)

                arrayWorkSheet = workBook.sheet_by_name(arraySheetName)

                for i in range(len(arrayFields)):
                    # Each record is represented by 'len(arrayFields)' columns
                    # with a header and array data below it.

                    # Cells in the first header row hold the record index.
                    _testExcelCell(arrayWorkSheet, 0, arrayCol + i,
                        recordIndex + 1, 'INT', invalidFormatPrefix)

                    # Cells in the second header row hold table field names.
                    _testExcelCell(arrayWorkSheet, 1, arrayCol + i,
                        arrayFields[i].fieldName, 'VARCHAR',
                        invalidFormatPrefix)

                    value = _readExcelArray(arrayWorkSheet, 2, arrayCol + i,
                        arrayFields[i].arrayDataType, invalidFormatPrefix)

                    setattr(dataRecord, arrayFields[i].attributeName, value)

            # Post-process the loaded data.
            dataRecord.onDataLoaded()

            recordList.append(dataRecord)

        return recordList

    @classmethod
    def importDataListFromExcel(cls, excelFilePath):
        """Reverse of 'exportDataListToExcel'.

        Similar to 'readDataList', but read the data from a strictly formed
        Excel file instead of a database."""

        return cls._importDataListFromExcelImpl(excelFilePath,
            cls.importDataFromExcelErrorPrefix + ' (%s): ' %
            txt.quotePath(excelFilePath))

    @classmethod
    def exportDataListToExcel(cls, excelFilePath, recordList):
        """Write the given list of 'cls' instances to an Excel file compatible
        with 'importDataListFromExcel'.

        Raise 'txt.Error' on failure."""

        workBook = xlwt.Workbook()
        tableWorkSheet = workBook.add_sheet(cls.tableName)

        # Separate scalar and array-like fields of the database table.
        scalarFields = [field for field in cls.fields if
            field.dataType != 'LONGBINARY']
        arrayFields = [field for field in cls.fields if
            field.dataType == 'LONGBINARY']

        # Write the scalar worksheet header.
        for i in range(len(scalarFields)):
            tableWorkSheet.write(0, i + 1, scalarFields[i].fieldName)

            # Widen columns holding dates to an empirically determined width
            # of 11 digits, so that they have enough room for the values.
            if scalarFields[i].dataType == 'DATE':
                tableWorkSheet.col(i + 1).width = 256 * 11

        # The current array worksheet or 'None' if there are no array fields.
        arrayWorkSheet = None

        for recordIndex in range(len(recordList)):
            dataRecord = recordList[recordIndex]

            row = recordIndex + 1

            # Write the one-based record index in the first column.
            tableWorkSheet.write(row, 0, recordIndex + 1)

            # Write the scalar fields.
            for i in range(len(scalarFields)):
                value = getattr(dataRecord, scalarFields[i].attributeName)

                # Use custom styles for date and time values.
                if scalarFields[i].dataType == 'DATE' and value is not None:
                    style = xlwt.XFStyle()
                    style.num_format_str = 'yyyy-mm-dd'
                    tableWorkSheet.write(row, i + 1, value, style)

                elif scalarFields[i].dataType == 'TIME' and value is not None:
                    style = xlwt.XFStyle()
                    style.num_format_str = 'hh:mm:ss'
                    tableWorkSheet.write(row, i + 1, value, style)

                else:
                    # Numeric and string values don't need to be modified.
                    tableWorkSheet.write(row, i + 1, value)

            # Write the array fields, if any.
            if len(arrayFields) != 0:

                # Each record is represented by 'len(arrayFields)' columns
                # with a header and array data below it.
                (arraySheetName, arrayCol) = _getArrayWorkSheet(
                    len(arrayFields), recordIndex, len(recordList))

                # Append the data to the current worksheet until it fills up,
                # then switch to the next one.
                if (arrayWorkSheet is None or
                    arrayWorkSheet.name != arraySheetName):
                    arrayWorkSheet = workBook.add_sheet(arraySheetName)

                for i in range(len(arrayFields)):
                    # Cells in the first header row hold the record index.
                    arrayWorkSheet.write(0, arrayCol + i, recordIndex + 1)

                    # Cells in the second header row hold table field names.
                    arrayWorkSheet.write(1, arrayCol + i,
                        arrayFields[i].fieldName)

                    # Below the header is the data array.
                    arrayData = getattr(dataRecord,
                        arrayFields[i].attributeName)

                    if arrayData is not None:
                        for row in range(len(arrayData)):
                            # Convert the value from its Numpy type to float.
                            arrayWorkSheet.write(row + 2, arrayCol + i,
                                float(arrayData[row]))

        # Write the data, reporting all the possible errors in a uniform way.
        try:
            workBook.save(excelFilePath)

        except Exception:
            raise txt.Error(cls.exportDataToExcelErrorPrefix + ' (%s)' %
                txt.quotePath(excelFilePath))

    # ---- Public methods -----------------------------------------------------
    def writeData(self, dbFilePath):
        """Append the 'self' data record to a database file at the given path.

        Raise 'txt.Error' on failure.

        Note: no thorough examination of the database file structure is carried
        out; use 'checkDataFormat' for that."""

        try:
            # Connect to the database.
            dbConnection = access.openDatabase(dbFilePath)

            try:
                # Use 'EnsureDispatch' to precompile the interface.
                recSet = win32com.client.gencache.EnsureDispatch(
                    'ADODB.Recordset')

                # Open the entire database table as a Recordset object.
                # Set 'CursorType' (3rd parameter) to 'adOpenForwardOnly' (the
                # default) and 'LockType' (4th parameter) to 'adLockOptimistic'
                # (so that the recordset could be modified).
                recSet.Open(self.tableName, dbConnection, 0, 3)

                try:
                    # Fields to be updated in the newly added record.
                    fieldList = [field.fieldName for field in self.fields]
                    valueList = []

                    for field in self.fields:
                        value = getattr(self, field.attributeName)

                        # Convert long binary data to ADO-compliant format.
                        if field.dataType == 'LONGBINARY' and value is not None:
                            value = buffer(value.tostring())

                        # Convert date and time objects from 'datetime.date'
                        # and 'datetime.time' to pywin32's 'PyTime' data type.
                        elif field.dataType == 'DATE' and value is not None:
                            value = pywintypes.Time((
                                value.year, value.month, value.day, 0, 0, 0))

                        elif field.dataType == 'TIME' and value is not None:
                            # Date zero is at December 30, 1899, in MS Access.
                            value = pywintypes.Time((1899, 12, 30,
                                value.hour, value.minute, value.second))

                        valueList.append(value)

                    # Atomically append the record and populate its fields.
                    recSet.AddNew(fieldList, valueList)

                finally:
                    recSet.Close()
            finally:
                dbConnection.Close()

        except pywintypes.com_error:
            # Report all the errors in a uniform way.
            raise txt.Error(self.writeDataErrorMessage)

    def exportDataToExcel(self, excelFilePath):
        """Append the 'self' data record to an Excel file at the given path.

        Raise 'txt.Error' on failure.

        Warning: this function actually imports the entire file and then writes
        the data back, thus being quite time-consuming on large files."""

        # Read the data, if the file already exists.
        if os.path.exists(excelFilePath):
            recordList = self._importDataListFromExcelImpl(excelFilePath,
                self.exportDataToExcelErrorPrefix + ' (%s): ' %
                txt.quotePath(excelFilePath))
        else:
            recordList = []

        # Append the new record.
        recordList.append(self)

        # Write the data back.
        self.exportDataListToExcel(excelFilePath, recordList)

# **** Private functions ******************************************************
def _readExcelCell(workSheet, row, col, dataType, errorPrefix,
    workBookDateMode = None):
    """Assure that the content of an Excel cell is consistent with the given
    SQL data type and return the cell value.

    Raise 'txt.Error' on failure. 'workBookDateMode' has to be the
    workbook's 'datemode' attribute for date and time values."""

    # Check if the worksheet contains the requested cell.
    if workSheet.nrows <= row or workSheet.ncols <= col:
        raise txt.Error(errorPrefix +
            '%s cell in the %s worksheet contains no data' %
            (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name)))

    cellValue = workSheet.cell_value(row, col)
    cellType = workSheet.cell_type(row, col)

    # Accept empty cells as valid, similar to empty database records.
    if cellType == xlrd.XL_CELL_EMPTY:
        return None

    if dataType == 'DATE':
        value = excel.readDate(cellValue, cellType, workBookDateMode)

        if value is None:
            raise txt.Error(errorPrefix +
                '%s cell in the %s worksheet is not a valid date' %
                (txt.quote(xlrd.cellname(row, col)),
                txt.quote(workSheet.name)))

    elif dataType == 'TIME':
        value = excel.readTime(cellValue, cellType, workBookDateMode)

        if value is None:
            raise txt.Error(errorPrefix +
                '%s cell in the %s worksheet is not a valid time' %
                (txt.quote(xlrd.cellname(row, col)),
                txt.quote(workSheet.name)))

    elif dataType in ('INT', 'SMALLINT', 'SINGLE', 'DOUBLE'):
        if cellType != xlrd.XL_CELL_NUMBER:
            raise txt.Error(errorPrefix +
                '%s cell in the %s worksheet is not a valid number' %
                (txt.quote(xlrd.cellname(row, col)),
                txt.quote(workSheet.name)))

        # Make an extra check and convert the data type for integers.
        if dataType in ('INT', 'SMALLINT'):
            value = int(cellValue)

            if value != cellValue:
                raise txt.Error(errorPrefix +
                    '%s cell value in the %s worksheet is not an integer' %
                    (txt.quote(xlrd.cellname(row, col)),
                    txt.quote(workSheet.name)))
        else:
            # Do not convert the data type for floats.
            value = cellValue

    elif dataType == 'VARCHAR':
        if cellType != xlrd.XL_CELL_TEXT:
            raise txt.Error(errorPrefix +
                '%s cell in the %s worksheet is not a text field' %
                (txt.quote(xlrd.cellname(row, col)),
                txt.quote(workSheet.name)))

        value = cellValue

    # This will raise an exception if the given 'dataType' is not supported.
    return value

def _testExcelCell(workSheet, row, col, requiredValue, requiredType,
    errorPrefix, workBookDateMode = None):
    """Assure that the given Excel cell has the given value consistent with
    the given SQL data type.

    Raise 'txt.Error' on failure. 'workBookDateMode' has to be the
    workbook's 'datemode' attribute for date and time values."""

    if (_readExcelCell(workSheet, row, col, requiredType, errorPrefix,
        workBookDateMode) != requiredValue):

        # Format the value for the error message.
        if requiredType != 'VARCHAR':
            requiredValue = str(requiredValue)

        raise txt.Error(errorPrefix +
            '%s cell value in the %s worksheet is different from %s' %
            (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name),
            txt.quote(requiredValue)))

def _getArrayWorkSheet(arrayCount, recordIndex, totalRecords):
    """Calculate the name of the Excel worksheet to store array-like field
    data in and the starting data column for the given data record.

    Attributes:
      - 'arrayCount': number of array-like database fields;
      - 'recordIndex': zero-based index of the database record;
      - 'totalRecords': total number of data records in the database.

    Return values:
      - 'sheetName': name of the worksheet to store the array-like data in;
      - 'arrayColumn': starting column for the array-like data of the given
        database record in 'sheetName' worksheet."""

    # One worksheet may hold at most 256 columns; each record requires
    # 'arrayCount' columns in an array worksheet.
    recordsPerSheet = 256 / arrayCount
    # Zero-based index of the array worksheet for the given data record.
    sheetIndex = recordIndex / recordsPerSheet
    # Starting column for the data in the worksheet.
    arrayColumn = (recordIndex % recordsPerSheet) * arrayCount

    # Choose the most compact and readable name depending on the situation.
    if totalRecords <= recordsPerSheet:
        sheetName = 'Arrays'
    else:
        # First and last one-based record indices for the current worksheet.
        firstRecord = sheetIndex * recordsPerSheet + 1
        lastRecord = min((sheetIndex + 1) * recordsPerSheet, totalRecords)

        if firstRecord == lastRecord:
            sheetName = 'Arrays (%d)' % firstRecord
        else:
            sheetName = u'Arrays (%d\u2013%d)' % (firstRecord, lastRecord)

    return (sheetName, arrayColumn)

def _readExcelArray(workSheet, startingRow, col, arrayDataType, errorPrefix):
    """Read a numeric array stored in a column of the given Excel worksheet and
    return it as a Numpy array with the given data type.

    The array starts at 'startingRow' and spans downwards either up to the
    bottom of the worksheet or up to the first empty cell.

    Raise 'txt.Error' on failure."""

    # List of the values to be stored in the array.
    arrayData = []

    for row in range(startingRow, workSheet.nrows):

        cellValue = workSheet.cell_value(row, col)
        cellType = workSheet.cell_type(row, col)

        # Stop at the first empty cell, if any.
        if cellType == xlrd.XL_CELL_EMPTY:
            break

        # Otherwise, check that the cell contains valid numeric data.
        if cellType != xlrd.XL_CELL_NUMBER:
            raise txt.Error(errorPrefix +
                '%s cell in the %s worksheet is not a valid number' %
                (txt.quote(xlrd.cellname(row, col)),
                txt.quote(workSheet.name)))

        arrayData.append(cellValue)

    # Treat arrays with no data like empty database records.
    if len(arrayData) == 0:
        return None
    else:
        return numpy.array(arrayData, dtype = arrayDataType)
