wc_utils/workbook/io.py
""" IO utilities
:Author: Jonathan Karr <karr@mssm.edu>
:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Date: 2016-11-28
:Copyright: 2016-2018, Karr Lab
:License: MIT
"""
from abc import ABCMeta, abstractmethod
from datetime import datetime
from glob import glob
from itertools import chain
from math import isnan, isinf
from natsort import natsorted, ns
from openpyxl import load_workbook
from openpyxl.cell.cell import Cell
from openpyxl.styles import Font, PatternFill
from openpyxl.styles.colors import Color
from openpyxl.utils import get_column_letter
from os.path import basename, dirname, splitext
from shutil import copyfile
from wc_utils.workbook.core import Workbook, Worksheet, Row, Formula
import copy
import enum
import openpyxl.cell.cell
import pyexcel
import xlsxwriter
class Writer(object, metaclass=ABCMeta):
""" Write data to file(s)
Attributes:
path (:obj:`str`): path to file(s)
"""
def __init__(self, path, title=None, description=None, keywords=None, version=None, language=None, creator=None):
"""
Args:
path (:obj:`str`): path to file(s)
title (:obj:`str`, optional): title
description (:obj:`str`, optional): description
keywords (:obj:`str`, optional): keywords
version (:obj:`str`, optional): version
language (:obj:`str`, optional): language
creator (:obj:`str`, optional): creator
"""
self.path = path
self.title = title
self.description = description
self.keywords = keywords
self.version = version
self.language = language
self.creator = creator
def run(self, data, style=None, validation=None, protected=False):
""" Write workbook to file(s)
Args:
data (:obj:`Workbook`): python representation of data; each element must be a string, boolean, integer, float, or NoneType
style (:obj:`WorkbookStyle`, optional): workbook style
validation (:obj:`WorkbookValidation`, optional): validation
protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
"""
self.initialize_workbook()
style = style or WorkbookStyle()
validation = validation or WorkbookValidation()
for ws_name, ws_data in data.items():
ws_style = style.get(ws_name, None)
ws_validation = validation.get(ws_name, None)
self.write_worksheet(ws_name, ws_data, style=ws_style, validation=ws_validation, protected=protected)
self.finalize_workbook()
@abstractmethod
def initialize_workbook(self):
""" Initialize workbook """
pass # pragma: no cover
@abstractmethod
def write_worksheet(self, sheet_name, data, style=None, validation=None, protected=False):
""" Write worksheet to file
Args:
sheet_name (:obj:`str`): sheet name
data (:obj:`Worksheet`): python representation of data; each element must be a string, boolean, integer, float, or NoneType
style (:obj:`WorksheetStyle`, optional): worksheet style
validation (:obj:`WorksheetValidation`, optional): worksheet validation
protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
"""
pass # pragma: no cover
@abstractmethod
def finalize_workbook(self):
""" Finalize workbook """
pass # pragma: no cover
class Reader(object, metaclass=ABCMeta):
""" Read data from file(s)
Attributes:
path (:obj:`str`): path to file(s)
"""
def __init__(self, path):
"""
Args:
path (:obj:`str`): path to file(s)
"""
self.path = path
def run(self):
""" Read data from file(s)
Returns:
:obj:`Workbook`: python representation of data
"""
workbook = self.initialize_workbook()
names = self.get_sheet_names()
for name in names:
workbook[name] = self.read_worksheet(name)
return workbook
@abstractmethod
def initialize_workbook(self):
""" Initialize workbook
Returns:
:obj:`Workbook`: data
"""
pass # pragma: no cover
@abstractmethod
def get_sheet_names(self):
""" Get names of sheets contained within path
Returns:
obj:`list` of `str`: list of sheet names
"""
pass # pragma: no cover
@abstractmethod
def read_worksheet(self, sheet_name, ignore_empty_final_rows=True, ignore_empty_final_cols=True):
""" Read data from file
Args:
sheet_name (:obj:`str`): sheet name
ignore_empty_final_rows (:obj:`bool`, optional): if :obj:`True`, ignore empty final rows
ignore_empty_final_cols (:obj:`bool`, optional): if :obj:`True`, ignore empty final columns
Returns:
:obj:`Worksheet`: data
"""
pass # pragma: no cover
class ExcelWriter(Writer):
""" Write data to Excel file
Attributes:
xls_workbook (:obj:`xlsxwriter.Workbook`): Excel workbook
"""
def __init__(self, path, title=None, description=None, keywords=None, version=None, language=None,
creator=None):
"""
Args:
path (:obj:`str`): path to file(s)
title (:obj:`str`, optional): title
description (:obj:`str`, optional): description
keywords (:obj:`str`, optional): keywords
version (:obj:`str`, optional): version
language (:obj:`str`, optional): language
creator (:obj:`str`, optional): creator
Raises:
:obj:`ValueError`: if file extension is not '.xlsx'
"""
_, ext = splitext(path)
if ext != '.xlsx':
raise ValueError("Extension of path '{}' must be '.xlsx'".format(path))
super(ExcelWriter, self).__init__(path,
title=title, description=description,
keywords=keywords, version=version, language=language, creator=creator)
self.xls_workbook = None
self._worksheet_styles = None
def initialize_workbook(self):
""" Initialize workbook """
# Initialize workbook
self.xls_workbook = wb = xlsxwriter.Workbook(self.path, {
'strings_to_numbers': False,
'strings_to_formulas': False,
'strings_to_urls': False,
'nan_inf_to_errors': True,
'default_date_format': 'yyyy-mm-dd',
})
# set metadata
wb.set_properties({
'title': self.title,
'keywords': self.keywords,
})
now = datetime.now()
wb.set_custom_property('description', self.description or '')
wb.set_custom_property('version', self.version or '')
wb.set_custom_property('language', self.language or '')
wb.set_custom_property('creator', self.creator or '')
wb.set_custom_property('created', now)
wb.set_custom_property('modified', now)
def write_worksheet(self, sheet_name, data, style=None, validation=None, protected=False, include_help_comments=False):
""" Write worksheet to file
Args:
sheet_name (:obj:`str`): sheet name
data (:obj:`Worksheet`): python representation of data; each element must be a string, boolean, integer, float, or NoneType
style (:obj:`WorksheetStyle`, optional): worksheet style
validation (:obj:`WorksheetValidation`, optional): worksheet validation
protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
include_help_comments (:obj:`bool`, optional): if :obj:`True`, include help comments
"""
xls_worksheet = self.xls_workbook.add_worksheet(sheet_name)
# data and formatting
if protected:
xls_worksheet.protect('', {
'insert_columns': False,
'delete_columns': False,
'insert_rows': True,
'delete_rows': True,
'insert_hyperlinks': False,
'objects': False,
'scenarios': False,
'pivot_tables': False,
'format_cells': False,
'format_columns': True,
'format_rows': True,
'sort': True,
'autofilter': True,
'select_locked_cells': True,
'select_unlocked_cells': True,
})
style = style or WorksheetStyle()
title_format = self.xls_workbook.add_format()
title_format.set_align('left')
title_format.set_align('top')
title_format.set_text_wrap(False)
title_format.set_font_name(style.font_family)
title_format.set_font_size(style.font_size)
title_format.set_bold(style.title_row_font_bold)
if style.title_row_fill_pattern:
if style.title_row_fill_pattern == 'solid':
title_format.set_pattern(1)
else:
raise ValueError('Unsupported pattern {}'.format(style.title_row_fill_pattern))
if style.title_row_fill_fgcolor:
title_format.set_fg_color('#' + style.title_row_fill_fgcolor)
title_format.set_locked(True)
head_format = self.xls_workbook.add_format()
head_format.set_align('left')
head_format.set_align('top')
head_format.set_text_wrap(True)
head_format.set_font_name(style.font_family)
head_format.set_font_size(style.font_size)
head_format.set_bold(style.head_row_font_bold)
if style.head_row_fill_pattern:
if style.head_row_fill_pattern == 'solid':
head_format.set_pattern(1)
else:
raise ValueError('Unsupported pattern {}'.format(style.head_row_fill_pattern))
if style.head_row_fill_fgcolor:
head_format.set_fg_color('#' + style.head_row_fill_fgcolor)
head_format.set_locked(True)
blank_head_format = self.xls_workbook.add_format()
blank_head_format.set_align('left')
blank_head_format.set_align('top')
blank_head_format.set_text_wrap(True)
blank_head_format.set_font_name(style.font_family)
blank_head_format.set_font_size(style.font_size)
blank_head_format.set_bold(True)
if style.head_row_fill_pattern:
if style.head_row_fill_pattern == 'solid':
blank_head_format.set_pattern(1)
else: # pragma: no cover # unreachable because error already checked above
raise ValueError('Unsupported pattern {}'.format(style.head_row_fill_pattern))
if style.blank_head_fill_fgcolor:
blank_head_format.set_fg_color('#' + style.blank_head_fill_fgcolor)
blank_head_format.set_locked(True)
extra_head_format = self.xls_workbook.add_format()
extra_head_format.set_align('left')
extra_head_format.set_align('top')
extra_head_format.set_text_wrap(True)
extra_head_format.set_font_name(style.font_family)
extra_head_format.set_font_size(style.font_size)
extra_head_format.set_bold(True)
if style.head_row_fill_pattern:
if style.head_row_fill_pattern == 'solid':
extra_head_format.set_pattern(1)
else: # pragma: no cover # unreachable because error already checked above
raise ValueError('Unsupported pattern {}'.format(style.head_row_fill_pattern))
if style.head_row_fill_fgcolor:
extra_head_format.set_fg_color('#' + style.head_row_fill_fgcolor)
extra_head_format.set_locked(False)
merged_head_format = self.xls_workbook.add_format()
merged_head_format.set_align('center')
merged_head_format.set_align('top')
merged_head_format.set_text_wrap(True)
merged_head_format.set_font_name(style.font_family)
merged_head_format.set_font_size(style.font_size)
merged_head_format.set_bold(True)
if style.head_row_fill_pattern:
if style.head_row_fill_pattern == 'solid':
merged_head_format.set_pattern(1)
else: # pragma: no cover # unreachable because error already checked above
raise ValueError('Unsupported pattern {}'.format(style.head_row_fill_pattern))
if style.merged_head_fill_fgcolor:
merged_head_format.set_fg_color('#' + style.merged_head_fill_fgcolor)
merged_head_format.set_locked(True)
body_format = self.xls_workbook.add_format()
body_format.set_align('left')
body_format.set_align('top')
body_format.set_text_wrap(True)
body_format.set_font_name(style.font_family)
body_format.set_font_size(style.font_size)
body_format.set_bold(False)
body_format.set_locked(False)
merge_body_format = self.xls_workbook.add_format()
merge_body_format.set_align('center')
merge_body_format.set_align('vcenter')
merge_body_format.set_text_wrap(True)
merge_body_format.set_font_name(style.font_family)
merge_body_format.set_font_size(style.font_size)
merge_body_format.set_bold(False)
merge_body_format.set_locked(False)
n_rows = len(data)
if data:
n_cols = max(len(row) for row in data)
else:
n_cols = 0
frozen_rows = style.title_rows + style.head_rows
frozen_columns = style.head_columns
row_height = style.row_height
col_width = style.col_width
if isinf(style.extra_rows):
extra_rows = min(100, 2**20 - n_rows)
else:
extra_rows = style.extra_rows
if isinf(style.extra_columns):
extra_columns = min(100, 2**14 - n_cols)
else:
extra_columns = style.extra_columns
# format rows
if isnan(row_height):
default_row_height = None
else:
default_row_height = row_height
hide_unused_rows = not isinf(style.extra_rows)
xls_worksheet.set_default_row(default_row_height, hide_unused_rows)
# format columns
if not isnan(col_width) and n_cols >= 1 and not isinf(style.extra_columns):
result = xls_worksheet.set_column(0, n_cols - 1, width=col_width, options={'hidden': False})
if result == -1:
raise ValueError("Column is out of bounds")
assert result in [0, None], "xlsxwriter error: {}".format(result)
# hyperlinks
for hyperlink in style.hyperlinks:
result = xls_worksheet.write_url(hyperlink.i_row, hyperlink.i_col, hyperlink.url, tip=hyperlink.tip)
if result == -1:
raise ValueError("Cell is out of bounds")
elif result == -2:
raise ValueError("String must be <= 32,000 characters")
elif result == -3:
raise ValueError("URL must be <= 2,079 characters")
elif result == -4:
raise ValueError("Worksheet must have <= 65,530 URLs")
assert result in [0, None], "xlsxwriter error: {}".format(result)
# write data
def get_format(i_row, i_col, style=style,
title_format=title_format, blank_head_format=blank_head_format,
head_format=head_format, body_format=body_format,
frozen_rows=frozen_rows, frozen_columns=frozen_columns):
if i_row < style.title_rows:
format = title_format
elif i_row < frozen_rows or i_col < frozen_columns:
if value is None or value == '':
format = blank_head_format
else:
format = head_format
else:
format = body_format
return format
for i_row, row in enumerate(data):
for i_col, value in enumerate(row + [None] * (n_cols - len(row))):
format = get_format(i_row, i_col)
self.write_cell(xls_worksheet, sheet_name, i_row, i_col, value, format)
if not isnan(row_height) and not isinf(style.extra_rows):
result = xls_worksheet.set_row(i_row, options={'hidden': False})
if result == -1:
raise ValueError("Row is out of bounds")
assert result in [0, None], "xlsxwriter error: {}".format(result)
# format extra columns
if not isinf(style.extra_columns):
result = xls_worksheet.set_column(n_cols + style.extra_columns, 2**14 - 1,
options={'hidden': True})
if result == -1:
raise ValueError("Column is out of bounds")
assert result in [0, None], "xlsxwriter error: {}".format(result)
for i_row in range(n_rows):
for i_col in range(n_cols, n_cols + extra_columns):
if i_row < frozen_rows or i_col < frozen_columns:
format = extra_head_format
else:
format = body_format
result = xls_worksheet.write_blank(i_row, i_col, None, format)
if result == -1:
raise ValueError("Row is out of bounds")
assert result in [0, None], "xlsxwriter error: {}".format(result)
# format extra rows
if not isinf(style.extra_rows):
for i_row in range(n_rows, n_rows + style.extra_rows):
result = xls_worksheet.set_row(i_row, options={'hidden': False})
if result == -1:
raise ValueError("Row is out of bounds")
assert result in [0, None], "xlsxwriter error: {}".format(result)
for i_col in range(n_cols + extra_columns):
if i_row < frozen_rows or i_col < frozen_columns:
format = extra_head_format
else:
format = body_format
result = xls_worksheet.write_blank(i_row, i_col, None, format)
if result == -1:
raise ValueError("Row is out of bounds")
assert result in [0, None], "xlsxwriter error: {}".format(result)
# merge ranges
for row_start, col_start, row_end, col_end in style.merge_ranges:
# get data
value = set()
for i_row in range(row_start, row_end + 1):
for i_col in range(col_start, col_end + 1):
if i_col < len(data[i_row]) and data[i_row][i_col] is not None:
value.add(data[i_row][i_col])
if len(value) == 0:
value = None
elif len(value) == 1:
value = list(value)[0]
else:
raise ValueError('Merge range {}{}:{}{} with values {{"{}"}} can have at most 1 value'.format(
get_column_letter(col_start + 1), row_start + 1,
get_column_letter(col_end + 1), row_end + 1,
'", "'.join(str(v) for v in value)))
if row_start <= frozen_rows or col_start <= frozen_columns:
format = merged_head_format
else:
format = merge_body_format
result = xls_worksheet.merge_range(row_start, col_start, row_end, col_end, None)
if result == -1:
raise ValueError("Range of out of bounds")
assert result in [0, None], "xlsxwriter error: {}".format(result)
self.write_cell(xls_worksheet, sheet_name, row_start, col_start, value, format)
# validation
if validation:
validation.apply(xls_worksheet,
frozen_rows, frozen_columns,
n_rows + extra_rows - 1, n_cols + extra_columns - 1,
include_help_comments=include_help_comments)
# freeze panes
xls_worksheet.freeze_panes(frozen_rows, frozen_columns)
# auto filter
if style.auto_filter and n_cols > 0 and n_cols > 0 and frozen_rows > 0:
xls_worksheet.autofilter(frozen_rows - 1, 0, n_rows - 1, n_cols - 1)
def write_cell(self, xls_worksheet, sheet_name, i_row, i_col, value, format):
""" Write a value to a cell
Args:
xls_worksheet (:obj:`xlsxwriter.Worksheet`): Excel worksheet
sheet_name (:obj:`str`): sheet name
i_row (:obj:`int`): row of cell to write
i_col (:obj:`int`): column of cell to write
value (:obj:`object`): value to write
format (:obj:`xlsxwriter.Format`): format for the cell
"""
if value is None or value == '':
result = xls_worksheet.write_blank(i_row, i_col, value, format)
elif isinstance(value, str):
result = xls_worksheet.write_string(i_row, i_col, value, format)
elif isinstance(value, bool):
result = xls_worksheet.write_boolean(i_row, i_col, value, format)
elif isinstance(value, int):
result = xls_worksheet.write_number(i_row, i_col, float(value), format)
elif isinstance(value, float):
result = xls_worksheet.write_number(i_row, i_col, value, format)
elif isinstance(value, Formula):
result = xls_worksheet.write_formula(i_row, i_col,
value.formula, format, value.value)
else:
raise ValueError('Unsupported type {} at {}:{}:{}{}'.format(
value.__class__.__name__,
self.path, sheet_name, get_column_letter(i_col + 1), i_row + 1))
if result == -1:
raise ValueError("Row is out of bounds")
if result == -2:
raise ValueError("Value must be <= 32,000 characters")
assert result in [0, None], 'Error code {} when writing "{}" to worksheet "{}"'.format(
result, value, sheet_name)
def finalize_workbook(self):
""" Finalize workbook """
self.xls_workbook.close()
class ExcelReader(Reader):
""" Read data from Excel file
Attributes:
xls_workbook (:obj:`Workbook`): Excel workbook
"""
def __init__(self, path):
"""
Args:
path (:obj:`str`): path to file(s)
Raises:
:obj:`ValueError`: if file extension is not '.xlsx'
"""
_, ext = splitext(path)
if ext != '.xlsx':
raise ValueError("Extension of path '{}' must be '.xlsx'".format(path))
super(ExcelReader, self).__init__(path)
self.xls_workbook = None
def initialize_workbook(self):
""" Initialize workbook
Returns:
:obj:`Workbook`: data
"""
self.xls_workbook = load_workbook(filename=self.path)
return Workbook()
def get_sheet_names(self):
""" Get names of sheets contained within path
Returns:
obj:`list` of `str`: list of sheet names
"""
return self.xls_workbook.sheetnames
def read_worksheet(self, sheet_name, ignore_empty_final_rows=True, ignore_empty_final_cols=True):
""" Read data from Excel worksheet
Args:
sheet_name (:obj:`str`): sheet name
ignore_empty_final_rows (:obj:`bool`, optional): if :obj:`True`, ignore empty final rows
ignore_empty_final_cols (:obj:`bool`, optional): if :obj:`True`, ignore empty final columns
Returns:
:obj:`Worksheet`: data
Raises:
:obj:`ValueError`:
"""
xls_worksheet = self.xls_workbook[sheet_name]
worksheet = Worksheet()
max_row = xls_worksheet.max_row
max_col = xls_worksheet.max_column
if ignore_empty_final_rows:
real_max_row = None
for i_row in range(xls_worksheet.max_row, 0, -1):
for i_col in range(1, max_col + 1):
value = self.read_cell(sheet_name, xls_worksheet, i_row, i_col)
if value not in (None, ''):
real_max_row = i_row
break
if real_max_row is not None:
break
if real_max_row is not None:
max_row = real_max_row
else:
max_row = 0
if ignore_empty_final_cols:
real_max_col = None
for i_col in range(max_col, 0, -1):
for i_row in range(1, max_row + 1):
value = self.read_cell(sheet_name, xls_worksheet, i_row, i_col)
if value not in (None, ''):
real_max_col = i_col
break
if real_max_col is not None:
break
if real_max_col is not None:
max_col = real_max_col
else:
max_col = 0
for i_row in range(1, max_row + 1):
row = Row()
worksheet.append(row)
for i_col in range(1, max_col + 1):
value = self.read_cell(sheet_name, xls_worksheet, i_row, i_col)
row.append(value)
for cell in xls_worksheet.merged_cells.ranges:
value = worksheet[cell.min_row-1][cell.min_col-1]
for i_row in range(cell.min_row-1, cell.max_row):
for i_col in range(cell.min_col-1, cell.max_col):
worksheet[i_row][i_col] = value
return worksheet
def read_cell(self, sheet_name, xls_worksheet, i_row, i_col):
""" Read the value of a cell
Args:
sheet_name (:obj:`str`): worksheet name
xls_worksheet (:obj:`openpyxl.Worksheet`): worksheet
i_row (:obj:`int`): row number
i_col (:obj:`int`): column number
Returns:
:obj:`object`: value of cell
"""
cell = xls_worksheet.cell(row=i_row, column=i_col)
if cell.data_type in (openpyxl.cell.cell.TYPE_STRING, openpyxl.cell.cell.TYPE_INLINE,
openpyxl.cell.cell.TYPE_NUMERIC, openpyxl.cell.cell.TYPE_NULL,
openpyxl.cell.cell.TYPE_BOOL):
value = cell.value
elif cell.data_type == openpyxl.cell.cell.TYPE_ERROR:
raise ValueError('Errors are not supported: {}:{}:{}{}'.format(self.path, sheet_name,
get_column_letter(i_col), i_row))
elif cell.data_type in (openpyxl.cell.cell.TYPE_FORMULA,
openpyxl.cell.cell.TYPE_FORMULA_CACHE_STRING):
if cell.value in ['=FALSE()', '=FALSE']:
value = False
elif cell.value in ['=TRUE()', '=TRUE']:
value = True
else:
value = Formula(cell.value)
elif cell.data_type == 'd':
# Excel cells with datetime formats are returned by openpyxl with a data_type of 'd' and a
# value converted to Python datetime objects. They don't have a TYPE_* in openpyxl.cell.cell.
# Alternatively, this condition could test isinstance(cell.value, datetime).
value = cell.value
else:
raise ValueError('Unsupported data type: {} at {}:{}:{}{}'.format(
cell.data_type, self.path, sheet_name, get_column_letter(i_col), i_row)) # pragma: no cover # unreachable
return value
class SeparatedValuesWriter(Writer):
""" Write data to csv/tsv file(s) """
def __init__(self, path, title=None, description=None, keywords=None, version=None, language=None, creator=None):
"""
Args:
path (:obj:`str`): path to file(s)
title (:obj:`str`, optional): title
description (:obj:`str`, optional): description
keywords (:obj:`str`, optional): keywords
version (:obj:`str`, optional): version
language (:obj:`str`, optional): language
creator (:obj:`str`, optional): creator
Raises:
:obj:`ValueError`: if file extension is not '.csv' or '.tsv' or if file name pattern
doesn't contain exactly one glob
"""
_, ext = splitext(path)
if ext not in ('.csv', '.tsv'):
raise ValueError("Extension of path '{}' must be one of '.csv' or '.tsv'".format(
path))
if '*' in dirname(path):
raise ValueError("path '{}' cannot have glob pattern '*' in its directory name".format(
path))
if basename(path).count('*') > 1:
raise ValueError("path '{}' cannot have multiple glob patterns '*' in its base name".format(
path))
super(SeparatedValuesWriter, self).__init__(path,
title=title, description=description,
keywords=keywords, version=version, language=language, creator=creator)
def run(self, data, style=None, validation=None, protected=False):
""" Write workbook to file(s)
Args:
data (:obj:`Workbook`): python representation of data; each element must be a string, boolean, integer, float, or NoneType
style (:obj:`WorkbookStyle`, optional): workbook style
validation (:obj:`WorkbookValidation`, optional): validation
protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
"""
if len(data) > 1 and basename(self.path).count('*') == 0:
raise ValueError("path '{}' must have a glob pattern '*' in its base name".format(
self.path))
super(SeparatedValuesWriter, self).run(data, style=style, validation=validation, protected=protected)
def initialize_workbook(self):
""" Initialize workbook """
pass
def write_worksheet(self, sheet_name, data, style=None, validation=None, protected=False):
""" Write worksheet to file
Args:
sheet_name (:obj:`str`): sheet name
data (:obj:`Worksheet`): python representation of data; each element must be a string, boolean, integer, float, or NoneType
style (:obj:`WorksheetStyle`, optional): worksheet style
validation (:obj:`WorksheetValidation`, optional): worksheet validation
protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
"""
data_values = []
for row in data:
row_values = Row(row)
has_formula = False
for i_cell, cell in enumerate(row_values):
if isinstance(cell, Formula):
row_values[i_cell] = cell.value
has_formula = True
if has_formula:
data_values.append(row_values)
else:
data_values.append(row)
pyexcel.save_as(array=data_values, dest_file_name=self.path.replace('*', sheet_name))
def finalize_workbook(self):
""" Finalize workbook """
pass
class SeparatedValuesReader(Reader):
""" Read data from csv/tsv file(s) """
def __init__(self, path):
"""
Args:
path (:obj:`str`): path to file(s)
Raises:
:obj:`ValueError`: if file extension is not '.csv' or '.tsv' or if file name pattern
doesn't contain exactly one glob
"""
_, ext = splitext(path)
if ext not in ('.csv', '.tsv'):
raise ValueError("Extension of path '{}' must be one of '.csv' or '.tsv'".format(
path))
if '*' in dirname(path):
raise ValueError("path '{}' cannot have glob pattern '*' in its directory name".format(
path))
if basename(path).count('*') > 1:
raise ValueError("path '{}' cannot have multiple glob patterns '*' in its base name".format(
path))
super(SeparatedValuesReader, self).__init__(path)
def initialize_workbook(self):
""" Initialize workbook
Returns:
:obj:`Workbook`: data
"""
return Workbook()
def get_sheet_names(self):
""" Get names of files contained within path glob
Returns:
obj:`list` of `str`: list of file names
Raises:
:obj:`ValueError`: if glob does not find any matching files
"""
i_glob = self.path.find('*')
if i_glob == -1:
return ['']
else:
names = []
for filename in natsorted(glob(self.path), alg=ns.IGNORECASE):
names.append(filename[i_glob:i_glob + len(filename) - len(self.path) + 1])
if not names:
raise ValueError("glob of path '{}' does not match any files".format(self.path))
return names
def read_worksheet(self, sheet_name, ignore_empty_final_rows=True, ignore_empty_final_cols=True):
""" Read data from file
Args:
sheet_name (:obj:`str`): sheet name
ignore_empty_final_rows (:obj:`bool`, optional): if :obj:`True`, ignore empty final rows
ignore_empty_final_cols (:obj:`bool`, optional): if :obj:`True`, ignore empty final columns
Returns:
:obj:`Worksheet`: data
"""
worksheet = Worksheet()
# todo: skip_empty_rows=False is the default for pyexcel-io v >= 0.3.2
# when it's available on pypi, set pyexcel>=0.4.0 pyexcel-io>=0.3.2 & remove skip_empty_rows option
sv_worksheet = pyexcel.get_sheet(file_name=self.path.replace('*', sheet_name),
skip_empty_rows=False)
rows = list(sv_worksheet.rows())
max_row = len(rows)
if max_row:
max_col = len(rows[0])
else:
max_col = 0
if ignore_empty_final_rows:
real_max_row = None
for i_row, row in enumerate(reversed(rows)):
for i_col, cell in enumerate(row):
value = self.read_cell(cell)
if value not in (None, ''):
real_max_row = max_row - i_row
break
if real_max_row is not None:
break
if real_max_row is not None:
max_row = real_max_row
if ignore_empty_final_cols:
real_max_col = None
for i_col in range(max_col - 1, 0, -1):
for i_row in range(0, max_row):
cell = rows[i_row][i_col]
value = self.read_cell(cell)
if value not in (None, ''):
real_max_col = i_col + 1
break
if real_max_col is not None:
break
if real_max_col is not None:
max_col = real_max_col
for sv_row in rows[0:max_row]:
row = Row()
worksheet.append(row)
for sv_cell in sv_row[0:max_col]:
row.append(self.read_cell(sv_cell))
return worksheet
def read_cell(self, value):
""" Read the value of a cell
Args:
value (:obj:`object`): value
Returns:
:obj:`object`: value
"""
if value == '':
value = None
elif value == 'True':
value = True
elif value == 'False':
value = False
return value
def get_writer(extension):
""" Get writer
Args:
extension (:obj:`str`): extension
Returns:
:obj:`class`: writer class
Raises:
:obj:`ValueError`: if extension is not one of ".xlsx", ".csv", or ".tsv"
"""
if extension == '.xlsx':
return ExcelWriter
elif extension in ['.csv', '.tsv']:
return SeparatedValuesWriter
else:
raise ValueError('Extension must be one of ".xlsx", ".csv", or ".tsv"')
def get_reader(extension):
""" Get reader
Args:
extension (:obj:`str`): extension
Returns:
:obj:`class`: reader class
Raises:
:obj:`ValueError`: if extension is not one of ".xlsx", ".csv", or ".tsv"
"""
if extension == '.xlsx':
return ExcelReader
elif extension in ['.csv', '.tsv']:
return SeparatedValuesReader
else:
raise ValueError('Extension must be one of ".xlsx", ".csv", or ".tsv"')
def write(path, workbook,
title=None, description=None, keywords=None, version=None, language=None, creator=None,
style=None):
""" Write data to Excel (.xlsx) file or collection of comma separated (.csv) or tab separated (.tsv) file(s)
Args:
path (:obj:`str`): path to file(s)
workbook (:obj:`Workbook`): python representation of data; each element must be a string, boolean, integer, float, or NoneType
title (:obj:`str`, optional): title
description (:obj:`str`, optional): description
keywords (:obj:`str`, optional): keywords
version (:obj:`str`, optional): version
language (:obj:`str`, optional): language
creator (:obj:`str`, optional): creator
style (:obj:`WorkbookStyle`, optional): workbook style
"""
# check extensions are valid
_, ext = splitext(path)
writer_cls = get_writer(ext)
writer = writer_cls(path,
title=title, description=description, keywords=keywords,
version=version, language=language, creator=creator)
writer.run(workbook, style=style)
def read(path):
""" Read data from Excel (.xlsx) file or collection of comma separated (.csv) or tab separated (.tsv) file(s)
Args:
path (:obj:`str`): path to file(s)
Returns:
:obj:`Workbook`: python representation of data
"""
# check extensions are valid
_, ext = splitext(path)
reader_cls = get_reader(ext)
reader = reader_cls(path)
return reader.run()
def convert(source, destination, worksheet_order=None, style=None, ignore_extra_sheets=True):
""" Convert among Excel (.xlsx), comma separated (.csv), and tab separated formats (.tsv)
Args:
source (:obj:`str`): path to source file
destination (:obj:`str`): path to save converted file
worksheet_order (:obj:`list` of :obj:`str`): worksheet order
style (:obj:`WorkbookStyle`, optional): workbook style for Excel
ignore_extra_sheets (:obj:`bool`, optional): true/false should extra sheets in worksheet_order be ignored or should an error be thrown
Raises:
:obj:`ValueError`: if file extensions are not supported or file names are equal
"""
# check source != destination
if source == destination:
raise ValueError('Source and destination names must be different')
# check extensions are valid
_, ext_src = splitext(source)
_, ext_dst = splitext(destination)
if ext_src not in ['.xlsx', '.csv', '.tsv']:
raise ValueError('Source extension must be one of ".xlsx", ".csv", or ".tsv"')
if ext_dst not in ['.xlsx', '.csv', '.tsv']:
raise ValueError('Destination extension must be one of ".xlsx", ".csv", or ".tsv"')
# if extensions are the same, copy file(s)
if ext_src == ext_dst and (worksheet_order is None or ext_src != '.xlsx'):
if ext_src == '.xlsx':
copyfile(source, destination)
else:
i_glob = source.find('*')
if not list(glob(source)):
raise ValueError("glob of path '{}' does not match any files".format(source))
for filename in glob(source):
if i_glob == -1:
sheet_name = ''
else:
sheet_name = filename[i_glob:i_glob + len(filename) - len(source) + 1]
copyfile(filename, destination.replace('*', sheet_name))
return
# read, convert, and write
workbook = read(source)
ordered_workbook = Workbook()
worksheet_order = worksheet_order or []
if not ignore_extra_sheets:
difference = set(worksheet_order) - set(workbook.keys())
if difference:
raise ValueError("source '{}' missing worksheets: '{}'".format(source, difference))
for worksheet in chain(worksheet_order, workbook.keys()):
if worksheet in workbook:
ordered_workbook[worksheet] = workbook[worksheet]
write(destination, ordered_workbook, style=style)
class WorkbookStyle(dict):
""" Workbook style: dictionary of worksheet styles """
pass
class WorksheetStyle(object):
""" Worksheet style
Attributes:
title_rows (:obj:`int`): number of title rows
title_row_font_bold (:obj:`bool`): title row bold
title_row_fill_pattern (:obj:`str`): title row fill pattern
title_row_fill_fgcolor (:obj:`str`): title row background color
head_rows (:obj:`int`): number of head rows
head_columns (:obj:`int`): number of head columns
head_row_font_bold (:obj:`bool`): head row bold
head_row_fill_pattern (:obj:`str`): head row fill pattern
head_row_fill_fgcolor (:obj:`str`): head row background color
blank_head_fill_fgcolor (:obj:`str`): background color of blank header cells
merged_head_fill_fgcolor (:obj:`str`): background color of merged header cells
extra_rows (:obj:`float`): number of additional rows to show
extra_columns (:obj:`float`): number of additional columns to show
font_family (:obj:`str`): font family
font_size (:obj:`float`): font size
row_height (:obj:`float`): row height
col_width (:obj:`float`): column width
auto_filter (:obj:`bool`): whether or not to activate auto filters for row
merge_ranges (:obj:`list` of :obj:`tuple` of :obj:`int`): list of tuples of the start row, start column, end row, and end column (0-based)
of each range to merge
hyperlinks (:obj:`list` of :obj:`Hyperlink`): list of hyperlinks
"""
def __init__(self,
title_rows=0, title_row_font_bold=True,
title_row_fill_pattern='solid', title_row_fill_fgcolor='888888',
head_rows=0, head_columns=0, head_row_font_bold=True,
head_row_fill_pattern='solid', head_row_fill_fgcolor='CCCCCC',
blank_head_fill_fgcolor='', merged_head_fill_fgcolor='AAAAAA',
extra_rows=float('inf'), extra_columns=float('inf'),
font_family='Arial', font_size=11.,
row_height=15.01, col_width=15.,
auto_filter=True, merge_ranges=None,
hyperlinks=None):
"""
Args:
title_rows (:obj:`int`, optional): number of title rows
title_row_font_bold (:obj:`bool`, optional): title row bold
title_row_fill_pattern (:obj:`str`, optional): title row fill pattern
title_row_fill_fgcolor (:obj:`str`, optional): title row background color
head_rows (:obj:`int`, optional): number of head rows
head_columns (:obj:`int`, optional): number of head columns
head_row_font_bold (:obj:`bool`, optional): head row bold
head_row_fill_pattern (:obj:`str`, optional): head row fill pattern
head_row_fill_fgcolor (:obj:`str`, optional): head row background color
blank_head_fill_fgcolor (:obj:`str`, optional): background color of blank header cells
merged_head_fill_fgcolor (:obj:`str`, optional): background color of merged header cells
extra_rows (:obj:`float`, optional): number of additional rows to show
extra_columns (:obj:`float`, optional): number of additional columns to show
font_family (:obj:`str`, optional): font family
font_size (:obj:`float`, optional): font size
row_height (:obj:`float`, optional): row height
col_width (:obj:`float`, optional): column width
auto_filter (:obj:`bool`, optional): whether or not to activate auto filters for row
merge_ranges (:obj:`list` of :obj:`tuple` of :obj:`int`, optional): list of tuples of the start row, start column, end row,
and end column (0-based) of each range to merge
hyperlinks (:obj:`list` of :obj:`Hyperlink`, optional): list of hyperlinks
"""
self.title_rows = title_rows
self.title_row_font_bold = title_row_font_bold
self.title_row_fill_pattern = title_row_fill_pattern
self.title_row_fill_fgcolor = title_row_fill_fgcolor
self.head_rows = head_rows
self.head_columns = head_columns
self.head_row_font_bold = head_row_font_bold
self.head_row_fill_pattern = head_row_fill_pattern
self.head_row_fill_fgcolor = head_row_fill_fgcolor
self.blank_head_fill_fgcolor = blank_head_fill_fgcolor
self.merged_head_fill_fgcolor = merged_head_fill_fgcolor
self.extra_rows = extra_rows
self.extra_columns = extra_columns
self.font_family = font_family
self.font_size = font_size
self.row_height = row_height
self.col_width = col_width
self.auto_filter = auto_filter
self.merge_ranges = merge_ranges or []
self.hyperlinks = hyperlinks or []
class Hyperlink(object):
""" Hyperlink from a cell
Attributes:
i_row (:obj:`int`): row
i_row (:obj:`col`): column
url (:obj:`str`): URL
tip (:obj:`str`): text of tooltip
"""
def __init__(self, i_row, i_col, url, tip=None):
"""
Args:
i_row (:obj:`int`): row
i_row (:obj:`col`): column
url (:obj:`str`): URL
tip (:obj:`str`, optional): text of tooltip
"""
self.i_row = i_row
self.i_col = i_col
self.url = url
self.tip = tip
class WorkbookValidation(dict):
""" Workbook validation: dictionary of worksheet validations """
pass
class WorksheetValidationOrientation(int, enum.Enum):
""" Worksheet validation orientation """
row = 1
column = 2
class WorksheetValidation(object):
""" List of field validations
Attributes:
orientation (:obj:`str`): row or col
fields (:obj:`list` of :obj:`FieldValidation`): field validations
"""
def __init__(self, orientation=WorksheetValidationOrientation.row, fields=None):
"""
Args:
orientation (:obj:`str`, optional): row or col
fields (:obj:`list` of :obj:`FieldValidation`, optional): field validations
"""
self.orientation = orientation
self.fields = fields
def apply(self, ws, first_row, first_col, last_row, last_col, include_help_comments=False):
""" Apply validation to worksheet
Args:
ws (:obj:`xlsxwriter.Worksheet`): worksheet
first_row (:obj:`int`): first row
first_col (:obj:`int`): first column
last_row (:obj:`int`): last row
last_col (:obj:`int`): last column
include_help_comments (:obj:`bool`, optional): if :obj:`True`, include help comments
"""
for i_field, field in enumerate(self.fields):
if field:
if self.orientation == WorksheetValidationOrientation.row:
if include_help_comments:
field.apply_help_comment(ws, first_row - 1, i_field)
field.apply_validation(ws, first_row, i_field, last_row, i_field)
else:
if include_help_comments:
field.apply_help_comment(ws, i_field, first_col - 1)
field.apply_validation(ws, i_field, first_col, i_field, last_col)
class FieldValidationType(int, enum.Enum):
""" Field validation type """
integer = 1
decimal = 2
list = 3
date = 4
time = 5
length = 6
custom = 7
any = 8
FieldValidationCriterion = enum.Enum('FieldValidationCriterion', type=str, names=[
('between', 'between'),
('not between', 'not between'),
('equal to', '=='),
('not equal to', '!='),
('greater than', '>'),
('less than', '<'),
('greater than or equal to', '>='),
('less than or equal to', '<='),
('==', '=='),
('!=', '!='),
('>', '>'),
('<', '<'),
('>=', '>='),
('<=', '<='),
])
class FieldValidationErrorType(int, enum.Enum):
""" Type of error dialog to display """
stop = 1
warning = 2
information = 3
class FieldValidation(object):
""" Validation for row- or column-oriented field
Attributes:
input_title (:obj:`str`): title of input dialog box
input_message (:obj:`str`): message in input dialog box
show_input (:obj:`bool`): if :obj:`True`, show input dialog box
type (:obj:`FieldValidationType`): type of validation
criterion (:obj:`FieldValidationCriterion`): validation criterion
allowed_scalar_value (:obj:`bool`, :obj:`int`, :obj:`float`, or :obj:`str`): allowable scalar value
minimum_scalar_value (:obj:`int` or :obj:`float`): minimum allowable value
maximum_scalar_value (:obj:`int` or :obj:`float`): maximum allowable value
allowed_list_values (:obj:`str` or :obj:`list` of :obj:`str`): allowable list values
show_dropdown (:obj:`bool`): if :obj:`True`, show dropdown menu for list validations
ignore_blank (:obj:`bool`): if :obj:`True`, don't validate blank cells
error_type (:obj:`FieldErrorType`): type of error dialog to display
error_title (:obj:`str`): title of error dialog box
error_message (:obj:`str`): message in error dialog box
show_error (:obj:`bool`): if :obj:`True`, show error dialog box
"""
def __init__(self, input_title='', input_message='', show_input=True,
type=FieldValidationType.any, criterion=None, allowed_scalar_value=None,
minimum_scalar_value=None, maximum_scalar_value=None, allowed_list_values=None,
show_dropdown=True, ignore_blank=True,
error_type=FieldValidationErrorType.warning, error_title='', error_message='', show_error=True):
"""
Args:
input_title (:obj:`str`, optional): title of input dialog box
input_message (:obj:`str`, optional): message in input dialog box
show_input (:obj:`bool`, optional): if :obj:`True`, show input dialog box
type (:obj:`FieldValidationType`, optional: type of validation
criterion (:obj:`FieldValidationCriterion`, optional): validation criterion
allowed_scalar_value (:obj:`bool`, :obj:`int`, :obj:`float`, or :obj:`str`, optional): allowable scalar value
minimum_scalar_value (:obj:`int` or :obj:`float`, optional): minimum allowable value
maximum_scalar_value (:obj:`int` or :obj:`float`, optional): maximum allowable value
allowed_list_values (:obj:`str` or :obj:`list`, optional): allowable list values
show_dropdown (:obj:`bool`, optional): if :obj:`True`, show dropdown menu for list validations
ignore_blank (:obj:`bool`, optional): if :obj:`True`, don't validate blank cells
error_type (:obj:`FieldErrorType`, optional): type of error dialog to display
error_title (:obj:`str`, optional): title of error dialog box
error_message (:obj:`str`, optional): message in error dialog box
show_error (:obj:`bool`, optional): if :obj:`True`, show error dialog box
"""
self.input_title = input_title
self.input_message = input_message
self.show_input = show_input
self.type = type
self.criterion = criterion
self.allowed_scalar_value = allowed_scalar_value
self.minimum_scalar_value = minimum_scalar_value
self.maximum_scalar_value = maximum_scalar_value
self.allowed_list_values = allowed_list_values
self.show_dropdown = show_dropdown
self.ignore_blank = ignore_blank
self.error_type = error_type
self.error_title = error_title
self.error_message = error_message
self.show_error = show_error
def apply_help_comment(self, ws, i_row, i_col):
""" Apply help comment to cell
Args:
ws (:obj:`xlsxwriter.Worksheet`): worksheet
i_row (:obj:`int`): row
i_col (:obj:`int`): column
"""
options = {
'visible': False,
'font_size': 10,
'width': 300, # pixels
'height': 74, # pixels
}
result = ws.write_comment(i_row, i_col, self.input_message, options)
if result == -1:
raise ValueError('Cell is out of bounds')
elif result == -2:
raise ValueError('Comment must be <= 32,000 characters')
assert result in [0, None], "xlsxwriter error: {}".format(result)
def apply_validation(self, ws, first_row, first_col, last_row, last_col):
""" Apply validation to cells
Args:
ws (:obj:`xlsxwriter.Worksheet`): worksheet
first_row (:obj:`int`): first row
first_col (:obj:`int`): first column
last_row (:obj:`int`): last row
last_col (:obj:`int`): last column
"""
result = ws.data_validation(first_row, first_col, last_row, last_col, self.get_options())
if result == -1:
raise ValueError('Range is out of bounds')
elif result == -2:
raise ValueError('Invalid options')
assert result in [0, None], "xlsxwriter error: {}".format(result)
def get_options(self):
""" Get options for :obj:`xlsxwriter.Worksheet.data_validation`
Returns
:obj:`dict`: dictonary of options for :obj:`xlsxwriter.Worksheet.data_validation`
"""
options = {}
# input dialog
if self.input_title:
if len(self.input_title) > 32:
options['input_title'] = self.input_title[0:32-4] + ' ...'
else:
options['input_title'] = self.input_title
if self.input_message:
if len(self.input_message) > 255:
options['input_message'] = self.input_message[0:255-4] + ' ...'
else:
options['input_message'] = self.input_message
options['show_input'] = self.show_input
# validation
options['validate'] = self.type.name
if self.criterion:
options['criteria'] = self.criterion.value
if self.allowed_scalar_value:
options['value'] = self.allowed_scalar_value
if self.minimum_scalar_value:
options['minimum'] = self.minimum_scalar_value
if self.maximum_scalar_value:
options['maximum'] = self.maximum_scalar_value
if self.allowed_list_values:
options['source'] = self.allowed_list_values
options['dropdown'] = self.show_dropdown
options['ignore_blank'] = self.ignore_blank
# error dialog
options['error_type'] = self.error_type.name
if self.error_title:
if len(self.error_title) > 32:
options['error_title'] = self.error_title[0:32-4] + ' ...'
else:
options['error_title'] = self.error_title
if self.error_message:
if len(self.error_message) > 255:
options['error_message'] = self.error_message[0:255-4] + ' ...'
else:
options['error_message'] = self.error_message
options['show_error'] = self.show_error
return options