# Copyright (c) 2011-2014, B.I.Stepanov Institute of Physics, National Academy # of Sciences of Belarus. # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., 51 # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import datetime import numpy import os import os.path import pywintypes import win32com.client import xlrd import xlwt from common.utils import access from common.utils import excel from common.utils import txt __all__ = ['DataRecord', 'DataField'] # ***************************************************************************** class DataField: """Description of a class instance attribute mapped to a database field or optionally to a column of an Excel worksheet. Attributes: - 'attributeName': name of the attribute; - 'fieldName': name of the corresponding database field, or Excel file column header label (if the data are to be read from an Excel file); - 'dataType': name of the SQL data type for the database field (see 'access.getDataTypeCode', 'access.getDataTypeName'). If the data are read from an Excel file instead of a database, this is used as a hint in cell format checking; - 'arrayDataType': Numpy array data type for array-like attributes. Not used unless 'dataType' is 'LONGBINARY'. - 'required': 'True' if this data field is mandatory for input database files and 'False' if the data field is optional. In the latter case, the value of the attribute would be set to 'None' if the corresponding database field is absent.""" def __init__(self, attributeName, fieldName, dataType, arrayDataType = None, required = True): self.attributeName = attributeName self.fieldName = fieldName self.dataType = dataType self.arrayDataType = arrayDataType self.required = required # ***************************************************************************** class DataRecord: """Base class for data structures intended for serialization in Microsoft Access database files or in Microsoft Excel files. Primary class attributes: - 'tableName': string holding the name of the database table to store the data in (or the name of the Excel worksheet if the data are to be read from an Excel file); - 'fields': list of 'DataField' instances describing the structure of the data records; Class attributes related to operations with database files (see 'createDatabase', 'checkDataFormat', 'readDataList', and 'writeData'): - 'extraFieldNames': list of names of required database fields that are not referenced in 'fields' (this is an empty list by default); - 'selectStatementExtraText': string to be appended to 'SELECT FROM ' command when querying the data from a database (this is an empty string by default); - 'createDatabaseErrorMessage': error string for database file creation failure; - 'openDatabaseErrorMessage': error string for failure opening a database file for reading; - 'queryFailureErrorMessage': error string for failure executing the SQL 'SELECT' statement on an opened database file; - 'invalidFormatErrorPrefix': prefix for error strings reporting failed database structure tests, ending with a colon and space; - 'writeDataErrorMessage': error string for failure adding a new record to a database file; Class attributes related to reading data from an Excel file instead of a database (see 'readDataListFromExcel'): - 'extraExcelHeaderRows': number of Excel file rows that are located above the main header row and may contain some additional information that doesn't need to be checked by the file format testing procedure (this is zero by default); - 'openExcelFileErrorMessage': error string for failure opening an Excel file for reading; - 'invalidExcelFormatErrorPrefix': prefix for error strings reporting failed Excel file structure tests, ending with a colon and space; Class attributes related to Excel import and export operations (see 'importDataListFromExcel', 'exportDataListToExcel' and 'exportDataToExcel'): - 'importDataFromExcelErrorPrefix': prefix for error strings reporting failed Excel import; - 'exportDataToExcelErrorPrefix': prefix for error strings reporting failed Excel export. Primary class attributes ('tableName' and 'fields') must be redefined in the derived class. Class attributes related to error messages must be redefined unless code capable of generating an error will never be called. 'onDataLoaded' virtual method may be redefined in order to add some custom processing to the data loading procedure. The differences between reading from an Excel file and importing from an Excel file are as follows: - Reading operation does not support array data fields (see 'DataField.arrayDataType'), but is more flexible with regard to layout of the data cells: it allows to read Excel files with extra rows above the main header (see 'extraExcelHeaderRows'). On the other hand, Excel file format for import/export is fixed and optimized for export of databases containing array data fields. It will contain exactly the same number of data fields as defined in the 'fields' class attribute, and a special numbering column will be included in the main worksheet to facilitate location of the corresponding array data; - For clarity, error messages used in these two operations differ a bit.""" # ---- Class attributes --------------------------------------------------- tableName = None fields = [] extraFieldNames = [] selectStatementExtraText = '' createDatabaseErrorMessage = None openDatabaseErrorMessage = None queryFailureErrorMessage = None invalidFormatErrorPrefix = None writeDataErrorMessage = None extraExcelHeaderRows = 0 openExcelFileErrorMessage = None invalidExcelFormatErrorPrefix = None importDataFromExcelErrorPrefix = None exportDataToExcelErrorPrefix = None # ---- Public methods ----------------------------------------------------- def __init__(self): """Initialize all the attributes referenced in 'cls.fields' to 'None'.""" for name in self.getAttributeNames(): setattr(self, name, None) # ---- Protected overridable methods -------------------------------------- def onDataLoaded(self): """Called by 'readDataList' and similar functions after all the data record's attributes are read from a database or an Excel file. Redefine this method in the derived class to add any custom processing to the data loading procedure.""" pass # ---- Public and private class methods ----------------------------------- @classmethod def getAttributeNames(cls): """Return the list of attribute names referenced in 'cls.fields'.""" return [field.attributeName for field in cls.fields] @classmethod def getRequiredAttributeNames(cls): """Return the list of attribute names referenced in 'cls.fields' whose 'required' attributes are set to 'True'.""" return [field.attributeName for field in cls.fields if field.required] @classmethod def getFieldName(cls, attributeName): """Return the name of the database field corresponding to the attribute with the given name.""" for field in cls.fields: if field.attributeName == attributeName: return field.fieldName @classmethod def createDatabase(cls, dbFilePath): """Create a new empty database file at the given path. Raise 'txt.Error' on failure.""" try: dbConnection = access.createDatabase(dbFilePath) try: # Construct a comma-separated list of field name-type pairs. fieldsDefinition = ', '.join( access.quoteName(field.fieldName) + ' ' + field.dataType for field in cls.fields) # Create an empty table. dbConnection.Execute('CREATE TABLE %s (%s)' % (access.quoteName(cls.tableName), fieldsDefinition)) finally: dbConnection.Close() except pywintypes.com_error: raise txt.Error(cls.createDatabaseErrorMessage) @classmethod def _checkDataFormatImpl(cls, dbFilePath): """Same as 'checkDataFormat', but return a tuple of opened database connection, opened Recordset pointing to the data, and a list of the database field names.""" # Connect to the database. try: dbConnection = access.openDatabase(dbFilePath) except pywintypes.com_error: raise txt.Error(cls.openDatabaseErrorMessage) try: # Check that the required table exists in the database. dbTableNames = access.getTableNames(dbConnection) if cls.tableName not in dbTableNames: raise txt.Error(cls.invalidFormatErrorPrefix + 'there is no table named %s' % txt.quote(cls.tableName)) # Check that the table contains the required fields. dbFieldNames = access.getFieldNames(dbConnection, cls.tableName) # Place 'extraFieldNames' entries earlier in the check list, # assuming that they are more important in identification of the # required database format. Skip optional database fields. for name in (cls.extraFieldNames + [field.fieldName for field in cls.fields if field.required]): if name not in dbFieldNames: raise txt.Error(cls.invalidFormatErrorPrefix + 'the database contains no %s field' % txt.quote(name)) try: # Construct a comma-separated list of the required field names. # Skip optional fields that are absent in the database. fieldList = ', '.join(access.quoteName(field.fieldName) for field in cls.fields if field.required or field.fieldName in dbFieldNames) # Query the required fields from the table. query = ('SELECT %s FROM %s' % (fieldList, access.quoteName( cls.tableName)) + cls.selectStatementExtraText) (recSet, recordsAffected) = dbConnection.Execute(query) except pywintypes.com_error: # Report SQL query failure. raise txt.Error(cls.queryFailureErrorMessage) except: dbConnection.Close() raise # 'recSet' is now an opened forward-only cursor pointing to the data. try: # Check types of the queried database fields. dbFields = recSet.Fields for field in cls.fields: # Skip optional fields that are absent in the database. if not (field.required or field.fieldName in dbFieldNames): continue dbFieldTypeCode = dbFields.Item(field.fieldName).Type if dbFieldTypeCode != access.getDataTypeCode(field.dataType): # Compose the error message. dataTypeName = access.getDataTypeName(field.dataType) assert dataTypeName is not None raise txt.Error(cls.invalidFormatErrorPrefix + 'type of the %s database field is different from %s' % (txt.quote(field.fieldName), dataTypeName)) except: recSet.Close() dbConnection.Close() raise return (dbConnection, recSet, dbFieldNames) @classmethod def checkDataFormat(cls, dbFilePath): """Assure that a file at the given path is a valid Access database and that its format matches the data structure. Raise 'txt.Error' on failure.""" (dbConnection, recSet, dbFieldNames) = cls._checkDataFormatImpl( dbFilePath) recSet.Close() dbConnection.Close() @classmethod def readDataList(cls, dbFilePath): """Open a database file at the given path for reading, check its format, query the available data and return them as a list of 'cls' instances. Raise 'txt.Error' on failure. Date and time values are stored as 'datetime.date' and 'datetime.time' instances; long binary values are stored as Numpy arrays.""" # Check the database format and query the data. (dbConnection, recSet, dbFieldNames) = cls._checkDataFormatImpl( dbFilePath) try: recordList = [] while not recSet.EOF: # Read the data record. dbFields = recSet.Fields # Fill in the data structure. dataRecord = cls() for field in cls.fields: # Don't query fields that are absent in the database. if field.fieldName not in dbFieldNames: value = None else: value = dbFields.Item(field.fieldName).Value # Convert long binary values to Numpy arrays unless the # data are missing. if field.dataType == 'LONGBINARY' and value is not None: assert isinstance(value, buffer) # Check consistency of buffer size and the data type. itemSize = numpy.dtype(field.arrayDataType).itemsize if len(value) % itemSize != 0: # Create an empty array to warn the user. value = numpy.zeros(0) else: value = numpy.frombuffer(value, dtype = field.arrayDataType) # Convert date and time objects from pywin32's 'PyTime' # data type to 'datetime.date' and 'datetime.time'. elif field.dataType == 'DATE' and value is not None: value = datetime.date( value.year, value.month, value.day) elif field.dataType == 'TIME' and value is not None: value = datetime.time( value.hour, value.minute, value.second) setattr(dataRecord, field.attributeName, value) # Post-process the loaded data. dataRecord.onDataLoaded() recordList.append(dataRecord) recSet.MoveNext() finally: recSet.Close() dbConnection.Close() return recordList @classmethod def readDataListFromExcel(cls, excelFilePath): """Same as 'readDataList', but read the data from a specially prepared Excel file. Warning: this function won't work if the data record contains array-like data fields (see 'DataField.arrayDataType').""" try: # 'on_demand' means 'load the requested worksheets only'. workBook = xlrd.open_workbook(excelFilePath, on_demand = True) except (IOError, xlrd.XLRDError): raise txt.Error(cls.openExcelFileErrorMessage) if not cls.tableName in workBook.sheet_names(): raise txt.Error(cls.invalidExcelFormatErrorPrefix + 'there is no worksheet named %s' % txt.quote(cls.tableName)) tableWorkSheet = workBook.sheet_by_name(cls.tableName) # The total number of header rows in the Excel file. rowOffset = cls.extraExcelHeaderRows + 1 # Assume absence of empty or malformed rows outside the header. recordCount = tableWorkSheet.nrows - rowOffset if recordCount < 0: raise txt.Error(cls.invalidExcelFormatErrorPrefix + '%s worksheet contains no data' % txt.quote(cls.tableName)) # A dictionary of column indices by their header row labels. fieldColumnIndices = {} # Search the header row for cells containing text values. for col in range(0, tableWorkSheet.ncols): cellValue = tableWorkSheet.cell_value(rowOffset - 1, col) cellType = tableWorkSheet.cell_type(rowOffset - 1, col) if cellType != xlrd.XL_CELL_TEXT: continue # Forbid duplicate header labels, so that the user is always warned # about any potentional ambiguity. if cellValue in fieldColumnIndices: raise txt.Error(cls.invalidExcelFormatErrorPrefix + '%s and %s header cells have the same name (%s)' % (txt.quote(xlrd.cellname(rowOffset - 1, fieldColumnIndices[cellValue])), txt.quote(xlrd.cellname(rowOffset - 1, col)), txt.quote(cellValue))) fieldColumnIndices[cellValue] = col # Make sure that all the required labels are present in the header row. for field in cls.fields: if field.required and field.fieldName not in fieldColumnIndices: raise txt.Error(cls.invalidExcelFormatErrorPrefix + 'there is no header cell named %s in the %s worksheet' % (txt.quote(field.fieldName), txt.quote(tableWorkSheet.name))) recordList = [] for recordIndex in range(recordCount): row = recordIndex + rowOffset dataRecord = cls() # Fill in the data. for field in cls.fields: # Skip data columns that are not present in the worksheet. if field.fieldName in fieldColumnIndices: value = _readExcelCell(tableWorkSheet, row, fieldColumnIndices[field.fieldName], field.dataType, cls.invalidExcelFormatErrorPrefix, workBook.datemode) else: value = None setattr(dataRecord, field.attributeName, value) # Post-process the loaded data. dataRecord.onDataLoaded() recordList.append(dataRecord) return recordList @staticmethod def getExcelExportFilePath(dbFilePath): """Return the standard path to an Excel file used for data export.""" # Replace the '.mdb' extension with '.xls'. return os.path.splitext(dbFilePath)[0] + '.xls' @classmethod def _importDataListFromExcelImpl(cls, excelFilePath, errorMessagePrefix): """Same as 'importDataListFromExcel', but use a custom error prefix.""" invalidFormatPrefix = errorMessagePrefix + 'file format is invalid: ' try: # 'on_demand' means 'load the requested worksheets only'. workBook = xlrd.open_workbook(excelFilePath, on_demand = True) except (IOError, xlrd.XLRDError): raise txt.Error(errorMessagePrefix + 'the file is invalid or may not be opened for reading') # Assume that scalar table fields are stored in a worksheet with the # same name as that of the table. if not cls.tableName in workBook.sheet_names(): raise txt.Error(invalidFormatPrefix + 'there is no worksheet named %s' % txt.quote(cls.tableName)) tableWorkSheet = workBook.sheet_by_name(cls.tableName) # Make sure that the header is present. if tableWorkSheet.nrows < 1: raise txt.Error(invalidFormatPrefix + '%s worksheet contains no data' % txt.quote(cls.tableName)) # Separate scalar and array-like fields of the database table. scalarFields = [field for field in cls.fields if field.dataType != 'LONGBINARY'] arrayFields = [field for field in cls.fields if field.dataType == 'LONGBINARY'] # Assume that header labels coincide with table field names. for i in range(len(scalarFields)): _testExcelCell(tableWorkSheet, 0, i + 1, scalarFields[i].fieldName, 'VARCHAR', invalidFormatPrefix) recordList = [] totalRecords = tableWorkSheet.nrows - 1 for recordIndex in range(totalRecords): row = recordIndex + 1 # Check if the first column contains a valid one-based record index. _testExcelCell(tableWorkSheet, row, 0, recordIndex + 1, 'INT', invalidFormatPrefix) dataRecord = cls() # Read the scalar fields. for i in range(len(scalarFields)): value = _readExcelCell(tableWorkSheet, row, i + 1, scalarFields[i].dataType, invalidFormatPrefix, workBook.datemode) setattr(dataRecord, scalarFields[i].attributeName, value) # Read the array data fields, if any. if len(arrayFields) != 0: # Assume that array-like data are stored on separate worksheets # with specially arranged names. Data for several records may # be stored at the same worksheet. (arraySheetName, arrayCol) = _getArrayWorkSheet( len(arrayFields), recordIndex, totalRecords) if not arraySheetName in workBook.sheet_names(): raise txt.Error(errorPrefix + 'there is no worksheet named %s' % arraySheetName) arrayWorkSheet = workBook.sheet_by_name(arraySheetName) for i in range(len(arrayFields)): # Each record is represented by 'len(arrayFields)' columns # with a header and array data below it. # Cells in the first header row hold the record index. _testExcelCell(arrayWorkSheet, 0, arrayCol + i, recordIndex + 1, 'INT', invalidFormatPrefix) # Cells in the second header row hold table field names. _testExcelCell(arrayWorkSheet, 1, arrayCol + i, arrayFields[i].fieldName, 'VARCHAR', invalidFormatPrefix) value = _readExcelArray(arrayWorkSheet, 2, arrayCol + i, arrayFields[i].arrayDataType, invalidFormatPrefix) setattr(dataRecord, arrayFields[i].attributeName, value) # Post-process the loaded data. dataRecord.onDataLoaded() recordList.append(dataRecord) return recordList @classmethod def importDataListFromExcel(cls, excelFilePath): """Reverse of 'exportDataListToExcel'. Similar to 'readDataList', but read the data from a strictly formed Excel file instead of a database.""" return cls._importDataListFromExcelImpl(excelFilePath, cls.importDataFromExcelErrorPrefix + ' (%s): ' % txt.quotePath(excelFilePath)) @classmethod def exportDataListToExcel(cls, excelFilePath, recordList): """Write the given list of 'cls' instances to an Excel file compatible with 'importDataListFromExcel'. Raise 'txt.Error' on failure.""" workBook = xlwt.Workbook() tableWorkSheet = workBook.add_sheet(cls.tableName) # Separate scalar and array-like fields of the database table. scalarFields = [field for field in cls.fields if field.dataType != 'LONGBINARY'] arrayFields = [field for field in cls.fields if field.dataType == 'LONGBINARY'] # Write the scalar worksheet header. for i in range(len(scalarFields)): tableWorkSheet.write(0, i + 1, scalarFields[i].fieldName) # Widen columns holding dates to an empirically determined width # of 11 digits, so that they have enough room for the values. if scalarFields[i].dataType == 'DATE': tableWorkSheet.col(i + 1).width = 256 * 11 # The current array worksheet or 'None' if there are no array fields. arrayWorkSheet = None for recordIndex in range(len(recordList)): dataRecord = recordList[recordIndex] row = recordIndex + 1 # Write the one-based record index in the first column. tableWorkSheet.write(row, 0, recordIndex + 1) # Write the scalar fields. for i in range(len(scalarFields)): value = getattr(dataRecord, scalarFields[i].attributeName) # Use custom styles for date and time values. if scalarFields[i].dataType == 'DATE' and value is not None: style = xlwt.XFStyle() style.num_format_str = 'yyyy-mm-dd' tableWorkSheet.write(row, i + 1, value, style) elif scalarFields[i].dataType == 'TIME' and value is not None: style = xlwt.XFStyle() style.num_format_str = 'hh:mm:ss' tableWorkSheet.write(row, i + 1, value, style) else: # Numeric and string values don't need to be modified. tableWorkSheet.write(row, i + 1, value) # Write the array fields, if any. if len(arrayFields) != 0: # Each record is represented by 'len(arrayFields)' columns # with a header and array data below it. (arraySheetName, arrayCol) = _getArrayWorkSheet( len(arrayFields), recordIndex, len(recordList)) # Append the data to the current worksheet until it fills up, # then switch to the next one. if (arrayWorkSheet is None or arrayWorkSheet.name != arraySheetName): arrayWorkSheet = workBook.add_sheet(arraySheetName) for i in range(len(arrayFields)): # Cells in the first header row hold the record index. arrayWorkSheet.write(0, arrayCol + i, recordIndex + 1) # Cells in the second header row hold table field names. arrayWorkSheet.write(1, arrayCol + i, arrayFields[i].fieldName) # Below the header is the data array. arrayData = getattr(dataRecord, arrayFields[i].attributeName) if arrayData is not None: for row in range(len(arrayData)): # Convert the value from its Numpy type to float. arrayWorkSheet.write(row + 2, arrayCol + i, float(arrayData[row])) # Write the data, reporting all the possible errors in a uniform way. try: workBook.save(excelFilePath) except Exception: raise txt.Error(cls.exportDataToExcelErrorPrefix + ' (%s)' % txt.quotePath(excelFilePath)) # ---- Public methods ----------------------------------------------------- def writeData(self, dbFilePath): """Append the 'self' data record to a database file at the given path. Raise 'txt.Error' on failure. Note: no thorough examination of the database file structure is carried out; use 'checkDataFormat' for that.""" try: # Connect to the database. dbConnection = access.openDatabase(dbFilePath) try: # Use 'EnsureDispatch' to precompile the interface. recSet = win32com.client.gencache.EnsureDispatch( 'ADODB.Recordset') # Open the entire database table as a Recordset object. # Set 'CursorType' (3rd parameter) to 'adOpenForwardOnly' (the # default) and 'LockType' (4th parameter) to 'adLockOptimistic' # (so that the recordset could be modified). recSet.Open(self.tableName, dbConnection, 0, 3) try: # Fields to be updated in the newly added record. fieldList = [field.fieldName for field in self.fields] valueList = [] for field in self.fields: value = getattr(self, field.attributeName) # Convert long binary data to ADO-compliant format. if field.dataType == 'LONGBINARY' and value is not None: value = buffer(value.tostring()) # Convert date and time objects from 'datetime.date' # and 'datetime.time' to pywin32's 'PyTime' data type. elif field.dataType == 'DATE' and value is not None: value = pywintypes.Time(( value.year, value.month, value.day, 0, 0, 0)) elif field.dataType == 'TIME' and value is not None: # Date zero is at December 30, 1899, in MS Access. value = pywintypes.Time((1899, 12, 30, value.hour, value.minute, value.second)) valueList.append(value) # Atomically append the record and populate its fields. recSet.AddNew(fieldList, valueList) finally: recSet.Close() finally: dbConnection.Close() except pywintypes.com_error: # Report all the errors in a uniform way. raise txt.Error(self.writeDataErrorMessage) def exportDataToExcel(self, excelFilePath): """Append the 'self' data record to an Excel file at the given path. Raise 'txt.Error' on failure. Warning: this function actually imports the entire file and then writes the data back, thus being quite time-consuming on large files.""" # Read the data, if the file already exists. if os.path.exists(excelFilePath): recordList = self._importDataListFromExcelImpl(excelFilePath, self.exportDataToExcelErrorPrefix + ' (%s): ' % txt.quotePath(excelFilePath)) else: recordList = [] # Append the new record. recordList.append(self) # Write the data back. self.exportDataListToExcel(excelFilePath, recordList) # **** Private functions ****************************************************** def _readExcelCell(workSheet, row, col, dataType, errorPrefix, workBookDateMode = None): """Assure that the content of an Excel cell is consistent with the given SQL data type and return the cell value. Raise 'txt.Error' on failure. 'workBookDateMode' has to be the workbook's 'datemode' attribute for date and time values.""" # Check if the worksheet contains the requested cell. if workSheet.nrows <= row or workSheet.ncols <= col: raise txt.Error(errorPrefix + '%s cell in the %s worksheet contains no data' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name))) cellValue = workSheet.cell_value(row, col) cellType = workSheet.cell_type(row, col) # Accept empty cells as valid, similar to empty database records. if cellType == xlrd.XL_CELL_EMPTY: return None if dataType == 'DATE': value = excel.readDate(cellValue, cellType, workBookDateMode) if value is None: raise txt.Error(errorPrefix + '%s cell in the %s worksheet is not a valid date' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name))) elif dataType == 'TIME': value = excel.readTime(cellValue, cellType, workBookDateMode) if value is None: raise txt.Error(errorPrefix + '%s cell in the %s worksheet is not a valid time' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name))) elif dataType in ('INT', 'SMALLINT', 'SINGLE', 'DOUBLE'): if cellType != xlrd.XL_CELL_NUMBER: raise txt.Error(errorPrefix + '%s cell in the %s worksheet is not a valid number' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name))) # Make an extra check and convert the data type for integers. if dataType in ('INT', 'SMALLINT'): value = int(cellValue) if value != cellValue: raise txt.Error(errorPrefix + '%s cell value in the %s worksheet is not an integer' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name))) else: # Do not convert the data type for floats. value = cellValue elif dataType == 'VARCHAR': if cellType != xlrd.XL_CELL_TEXT: raise txt.Error(errorPrefix + '%s cell in the %s worksheet is not a text field' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name))) value = cellValue # This will raise an exception if the given 'dataType' is not supported. return value def _testExcelCell(workSheet, row, col, requiredValue, requiredType, errorPrefix, workBookDateMode = None): """Assure that the given Excel cell has the given value consistent with the given SQL data type. Raise 'txt.Error' on failure. 'workBookDateMode' has to be the workbook's 'datemode' attribute for date and time values.""" if (_readExcelCell(workSheet, row, col, requiredType, errorPrefix, workBookDateMode) != requiredValue): # Format the value for the error message. if requiredType != 'VARCHAR': requiredValue = str(requiredValue) raise txt.Error(errorPrefix + '%s cell value in the %s worksheet is different from %s' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name), txt.quote(requiredValue))) def _getArrayWorkSheet(arrayCount, recordIndex, totalRecords): """Calculate the name of the Excel worksheet to store array-like field data in and the starting data column for the given data record. Attributes: - 'arrayCount': number of array-like database fields; - 'recordIndex': zero-based index of the database record; - 'totalRecords': total number of data records in the database. Return values: - 'sheetName': name of the worksheet to store the array-like data in; - 'arrayColumn': starting column for the array-like data of the given database record in 'sheetName' worksheet.""" # One worksheet may hold at most 256 columns; each record requires # 'arrayCount' columns in an array worksheet. recordsPerSheet = 256 / arrayCount # Zero-based index of the array worksheet for the given data record. sheetIndex = recordIndex / recordsPerSheet # Starting column for the data in the worksheet. arrayColumn = (recordIndex % recordsPerSheet) * arrayCount # Choose the most compact and readable name depending on the situation. if totalRecords <= recordsPerSheet: sheetName = 'Arrays' else: # First and last one-based record indices for the current worksheet. firstRecord = sheetIndex * recordsPerSheet + 1 lastRecord = min((sheetIndex + 1) * recordsPerSheet, totalRecords) if firstRecord == lastRecord: sheetName = 'Arrays (%d)' % firstRecord else: sheetName = u'Arrays (%d\u2013%d)' % (firstRecord, lastRecord) return (sheetName, arrayColumn) def _readExcelArray(workSheet, startingRow, col, arrayDataType, errorPrefix): """Read a numeric array stored in a column of the given Excel worksheet and return it as a Numpy array with the given data type. The array starts at 'startingRow' and spans downwards either up to the bottom of the worksheet or up to the first empty cell. Raise 'txt.Error' on failure.""" # List of the values to be stored in the array. arrayData = [] for row in range(startingRow, workSheet.nrows): cellValue = workSheet.cell_value(row, col) cellType = workSheet.cell_type(row, col) # Stop at the first empty cell, if any. if cellType == xlrd.XL_CELL_EMPTY: break # Otherwise, check that the cell contains valid numeric data. if cellType != xlrd.XL_CELL_NUMBER: raise txt.Error(errorPrefix + '%s cell in the %s worksheet is not a valid number' % (txt.quote(xlrd.cellname(row, col)), txt.quote(workSheet.name))) arrayData.append(cellValue) # Treat arrays with no data like empty database records. if len(arrayData) == 0: return None else: return numpy.array(arrayData, dtype = arrayDataType)