Source code for pyexcel.core
"""
pyexcel.core
~~~~~~~~~~~~~~~~~~~
A list of pyexcel signature functions
:copyright: (c) 2015-2022 by Onni Software Ltd.
:license: New BSD License
"""
import re
from pyexcel import constants
from pyexcel import docstrings as docs
from pyexcel.book import Book, to_book
from pyexcel.sheet import Sheet
from pyexcel._compact import OrderedDict, append_doc, zip_longest
from pyexcel.internal import core as sources
from pyexcel_io import manager
STARTS_WITH_DEST = "^dest_(.*)"
SAVE_AS_EXCEPTION = (
"This function does not accept parameters for "
+ "pyexce.Sheet. Please use pyexcel.save_as instead."
)
[docs]
@append_doc(docs.GET_SHEET)
def get_sheet(**keywords):
"""
Get an instance of :class:`Sheet` from an excel source
"""
sheet_params = {}
for field in constants.VALID_SHEET_PARAMETERS:
if field in keywords:
sheet_params[field] = keywords.pop(field)
named_content = sources.get_sheet_stream(**keywords)
sheet = Sheet(named_content.payload, named_content.name, **sheet_params)
return sheet
[docs]
@append_doc(docs.GET_BOOK)
def get_book(**keywords):
"""
Get an instance of :class:`Book` from an excel source
"""
book_stream = sources.get_book_stream(**keywords)
book = Book(
book_stream.to_dict(),
filename=book_stream.filename,
path=book_stream.path,
)
return book
[docs]
@append_doc(docs.IGET_BOOK)
def iget_book(**keywords):
"""
Get an instance of :class:`BookStream` from an excel source
First use case is to get all sheet names without extracting
the sheets into memory.
"""
return sources.get_book_stream(on_demand=True, **keywords)
[docs]
@append_doc(docs.SAVE_AS)
def save_as(**keywords):
"""
Save a sheet from a data source to another one
"""
dest_keywords, source_keywords = _split_keywords(**keywords)
sheet_params = {}
for field in constants.VALID_SHEET_PARAMETERS:
if field in source_keywords:
sheet_params[field] = source_keywords.pop(field)
sheet_stream = sources.get_sheet_stream(**source_keywords)
output_sheet_name = sheet_stream.name
if "sheet_name" in dest_keywords:
output_sheet_name = dest_keywords["sheet_name"]
sheet = Sheet(sheet_stream.payload, output_sheet_name, **sheet_params)
return sources.save_sheet(sheet, **dest_keywords)
[docs]
@append_doc(docs.ISAVE_AS)
def isave_as(**keywords):
"""
Save a sheet from a data source to another one with less memory
It is similar to :meth:`pyexcel.save_as` except that it does
not accept parameters for :class:`pyexcel.Sheet`. And it read
when it writes.
"""
dest_keywords, source_keywords = _split_keywords(**keywords)
for field in constants.VALID_SHEET_PARAMETERS:
if field in source_keywords:
raise Exception(SAVE_AS_EXCEPTION)
sheet = sources.get_sheet_stream(on_demand=True, **source_keywords)
if "sheet_name" in dest_keywords:
sheet.name = dest_keywords["sheet_name"]
return sources.save_sheet(sheet, **dest_keywords)
[docs]
@append_doc(docs.SAVE_BOOK_AS)
def save_book_as(**keywords):
"""
Save a book from a data source to another one
"""
dest_keywords, source_keywords = _split_keywords(**keywords)
book = sources.get_book_stream(**source_keywords)
book = to_book(book)
return sources.save_book(book, **dest_keywords)
[docs]
@append_doc(docs.ISAVE_BOOK_AS)
def isave_book_as(**keywords):
"""
Save a book from a data source to another one
It is similar to :meth:`pyexcel.save_book_as` but it read
when it writes. This function provide some speedup but
the output data is not made uniform.
"""
dest_keywords, source_keywords = _split_keywords(**keywords)
book = sources.get_book_stream(on_demand=True, **source_keywords)
return sources.save_book(book, **dest_keywords)
[docs]
@append_doc(docs.GET_ARRAY)
def get_array(**keywords):
"""
Obtain an array from an excel source
It accepts the same parameters as :meth:`~pyexcel.get_sheet`
but return an array instead.
"""
sheet = get_sheet(**keywords)
return sheet.to_array()
[docs]
@append_doc(docs.GET_DICT)
def get_dict(name_columns_by_row=0, **keywords):
"""
Obtain a dictionary from an excel source
It accepts the same parameters as :meth:`~pyexcel.get_sheet`
but return a dictionary instead.
Specifically:
name_columns_by_row : specify a row to be a dictionary key.
It is default to 0 or first row.
If you would use a column index 0 instead, you should do::
get_dict(name_columns_by_row=-1, name_rows_by_column=0)
"""
sheet = get_sheet(name_columns_by_row=name_columns_by_row, **keywords)
return sheet.to_dict()
[docs]
@append_doc(docs.GET_RECORDS)
def get_records(name_columns_by_row=0, **keywords):
"""
Obtain a list of records from an excel source
It accepts the same parameters as :meth:`~pyexcel.get_sheet`
but return a list of dictionary(records) instead.
Specifically:
name_columns_by_row : specify a row to be a dictionary key.
It is default to 0 or first row.
If you would use a column index 0 instead, you should do::
get_records(name_columns_by_row=-1, name_rows_by_column=0)
"""
sheet = get_sheet(name_columns_by_row=name_columns_by_row, **keywords)
return list(sheet.to_records())
[docs]
@append_doc(docs.IGET_ARRAY)
def iget_array(**keywords):
"""
Obtain a generator of an two dimensional array from an excel source
It is similiar to :meth:`pyexcel.get_array` but it has less memory
footprint.
"""
sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords)
return sheet_stream.payload
[docs]
@append_doc(docs.IGET_RECORDS)
def iget_records(custom_headers=None, **keywords):
"""
Obtain a generator of a list of records from an excel source
It is similiar to :meth:`pyexcel.get_records` but it has less memory
footprint but requires the headers to be in the first row. And the
data matrix should be of equal length. It should consume less memory
and should work well with large files.
"""
sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords)
headers = None
for row_index, row in enumerate(sheet_stream.payload):
if row_index == 0:
headers = row
else:
if custom_headers:
# custom order
tmp_dict = dict(
zip_longest(headers, row, fillvalue=constants.DEFAULT_NA),
)
ordered_dict = OrderedDict()
for name in custom_headers:
ordered_dict[name] = tmp_dict[name]
yield ordered_dict
else:
# default order
yield OrderedDict(
zip_longest(headers, row, fillvalue=constants.DEFAULT_NA),
)
[docs]
@append_doc(docs.GET_BOOK_DICT)
def get_book_dict(**keywords):
"""
Obtain a dictionary of two dimensional arrays
It accepts the same parameters as :meth:`~pyexcel.get_book`
but return a dictionary instead.
"""
book = get_book(**keywords)
return book.to_dict()
def get_io_type(file_type):
"""
Return the io stream types, string or bytes
"""
io_type = manager.get_io_type(file_type)
if io_type is None:
io_type = "string"
return io_type
def _split_keywords(**keywords):
dest_keywords = {}
source_keywords = {}
for key, value in keywords.items():
result = re.match(STARTS_WITH_DEST, key)
if result:
parameter = result.group(1)
dest_keywords[parameter] = value
else:
source_keywords[key] = value
return dest_keywords, source_keywords