Skip to content

Instantly share code, notes, and snippets.

@Gribouillis
Last active December 29, 2021 22:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gribouillis/e4360ea3b0dbd0e37f4c13fa276adc8e to your computer and use it in GitHub Desktop.
Save Gribouillis/e4360ea3b0dbd0e37f4c13fa276adc8e to your computer and use it in GitHub Desktop.
Spreadsheet to dictionary
from collections import namedtuple
import pathlib
import pickle
import subprocess as sp
import tempfile
__version__ = '2019.01.16.2'
__all__ = ['spreadsheet_to_dict',]
WorksheetKey = namedtuple('WorksheetKey', "index name")
def spreadsheet_to_dict(filename, cachename=None):
"""Return a dict: {(index, name): <list of tuples>}
Arguments
filename: a path to a .ods or .xlsx file. It may also work
with .csv and .xls and others
cachename: if not None, a path to a pickle file where the
python dictionary will be stored. If this file exists
and its modification time is more recent than that of
the spreadsheet file, it is read directly instead of
the spreadsheet.
Worksheet keys are namedtuples with members .index and .name
see also: libreoffice, openpyxl
"""
filename = pathlib.Path(filename)
if cachename:
cachename = pathlib.Path(cachename)
if cachename.is_file() and (
cachename.stat().st_mtime > filename.stat().st_mtime):
return load_cache(cachename)
if filename.suffix == '.xlsx':
return _xlsx_to_dict(filename, cachename)
with tempfile.TemporaryDirectory() as tmpd:
tmp = pathlib.Path(tmpd)
userdir = tmp/'.user'
userdir.mkdir()
convert_to_xlsx = [
'soffice',
# avoid possible interaction with running instance of libreoffice
'-env:UserInstallation={}'.format(userdir.as_uri()),
'--headless',
'--convert-to',
# 'xlsx:"Calc MS Excel 2007 XML"', # <- doesn't work
# 'xlsx', # <- works
'xlsx:Calc MS Excel 2007 XML', # <- also works
'--outdir',
str(tmp),
str(filename)]
sp.run(convert_to_xlsx, check=True, stdout=sp.DEVNULL)
return _xlsx_to_dict(
(tmp/filename.name).with_suffix('.xlsx'), cachename)
def _xlsx_to_dict(filename, cachename):
db = {}
from openpyxl.reader.excel import load_workbook
wb=load_workbook(str(filename))
for i, (ws, name) in enumerate(zip(wb.worksheets, wb.sheetnames)):
db[WorksheetKey(i, name)] = list(ws.values)
if cachename:
dump_cache(db, cachename)
return db
def load_cache(cachename):
with cachename.open('rb') as ifh:
return pickle.load(ifh)
def dump_cache(obj, cachename):
try:
exc_occurred = False
try:
with cachename.open('wb') as ofh:
pickle.dump(obj, ofh)
except Exception:
exc_occurred = True
raise
finally:
if exc_occurred:
try:
cachename.unlink()
except OSError:
pass
if __name__ == '__main__':
with tempfile.TemporaryDirectory() as tmp:
fn = pathlib.Path(tmp)/'tmp_example.csv'
with fn.open('w') as ofh:
print("""\
Spam,maps,32
Eggs,sgge,64
Bacon,nocab,128""", file=ofh)
d = spreadsheet_to_dict(str(fn))
print(d)
@Gribouillis
Copy link
Author

@BlacksheepVMF-214 No, not at all. The data contained in the csv file (or ods or xls) does not need to be related to python, nor to this script. Create a simple spreadsheet with Excel or LibreOffice and try to load it in Python with this code. For each sheet in the workbook, a pair (key, value) will be created.

Just run the code and see what it produces.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment