Last active
April 10, 2020 15:20
-
-
Save joefromct/9d1b36ac89dd7ae31c7185355f5e0619 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ipython | |
from openpyxl import load_workbook | |
from toolz import thread_last, curry | |
import toolz.curried as tc | |
from typing import List, Dict, Tuple | |
import os | |
def list_to_dict_list(all_rows: List[List]) -> List[Dict]: | |
"""this just chops the headers off the spreadsheet, and then uses them to make | |
a dictionary out of the rest of the stuff.""" | |
headers, *rest = all_rows | |
return [dict(zip(headers, x)) for x in rest] | |
@curry | |
def fetch_cell_coordinate(ws, coordinate: Tuple[int, int]): | |
"Grab a value out of a cell coordinate." | |
x, y = coordinate | |
return ws.cell(x, y).value | |
def excel_ws_to_list_of_dicts(filename: str, worksheet: str) -> List[Dict]: | |
"Takes a file, and worksheet, and returns a list of dictionaries. Assumes the xls has a header row." | |
assert os.path.isfile(filename), "This doesn't look like a good file to open?" | |
wb = load_workbook(filename=filename, read_only=True) | |
ws = wb[worksheet] | |
# tuple containing all row and cell coordinates. | |
# if 3 rows by 4 columns by 8 rows: | |
# > [[1, 1], | |
# [1, 2], | |
# [1, 3], | |
# [1, 4], | |
# [2, 1], | |
# [2, 2], | |
# [2, 3], | |
# [2, 4], | |
# [3, 1], | |
# [3, 2], | |
# [3, 3], | |
# [3, 4]] | |
cell_coordinates = [[r, c] for r in range(ws.min_row, ws.max_row + 1) | |
for c in range(ws.min_column, ws.max_column + 1)] | |
# map all cell coordinates back into partitions of column width size and | |
# then yield to list. | |
my_fetch_cell_coordinates = fetch_cell_coordinate(ws) | |
return thread_last(cell_coordinates, | |
tc.map(my_fetch_cell_coordinates), # Pick out each value for each cell | |
tc.partition(ws.max_column), # partition to list of lists | |
list_to_dict_list, # convert list of lists to list of dicts | |
list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment