Skip to content

Instantly share code, notes, and snippets.

@BroHui
Created May 13, 2021 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BroHui/57460d9caae37e6ce74927859295adb7 to your computer and use it in GitHub Desktop.
Save BroHui/57460d9caae37e6ce74927859295adb7 to your computer and use it in GitHub Desktop.
# coding: utf-8
import warnings
import string
from openpyxl import Workbook, load_workbook
BLANK_LINES = 3
class ExcelBook(object):
def __init__(self):
self.parse_mode = 1
self.wb = None
self.sheets = []
self.ws = None
self.cols = []
self.row_range = []
self.headers = []
def load(self, xlsx_file_name=""):
if not xlsx_file_name:
warnings.warn("Missing xlsx file name.")
return
self.wb = load_workbook(xlsx_file_name)
self.sheets = self.wb.sheetnames
self.ws = self.wb.active
# peek the rows and cols at the excel
_, _, self.cols = self.get_cols_range()
row_start, row_end, _ = self.get_rows_range()
self.row_range = (row_start, row_end)
# try to load headers
self.headers = self.get_headers()
def get_cols_range(self):
"""
excel cols range peek
:return:
"""
col_start = string.ascii_uppercase[0]
col_end = ''
col_array = []
blank_col_count = 0
for alphabet in string.ascii_uppercase:
if self.ws['{}1'.format(alphabet)].value:
blank_col_count = 0
col_array.append(alphabet)
col_end = alphabet
continue
# blank col, start counting
blank_col_count += 1
if blank_col_count == 3:
break
return col_start, col_end, col_array
def get_rows_range(self, skip_table_headers=1, the_first_col='A'):
"""
excel rows range peek
default table header rows set as one.
:return:
"""
row_start = 1 + skip_table_headers
row_end = 1 + skip_table_headers
row_array = []
blank_row_count = 0
while 1:
pos_tag = '{}{}'.format(the_first_col, row_end)
if self.ws[pos_tag].value:
blank_row_count = 0
row_array.append(pos_tag)
row_end += 1
continue
# blank row, start counting
blank_row_count += 1
if blank_row_count == 3:
row_end -= 1
break
return row_start, row_end, row_array
def get_headers(self, headers_row=1, headers_blocks=[]):
headers = []
# _, _, col_range = self.get_cols_range()
for col in self.cols:
header_pos = "{}{}".format(col, headers_row)
cell_value = self.ws[header_pos].value
if not cell_value:
continue
headers.append(cell_value)
print(headers)
return headers
def get_data(self):
data = []
row_start, row_end = self.row_range
for i in range(row_start, row_end+1):
single_data = {}
for k in self.cols:
pos = "{}{}".format(k, i)
cell_value = self.ws[pos].value or ""
t_h = self.headers[self.cols.index(k)]
single_data[t_h] = cell_value
data.append(single_data)
return data
class SimpleExcelBook(ExcelBook):
def __init__(self):
super(SimpleExcelBook, self).__init__()
if __name__ == '__main__':
book = SimpleExcelBook()
book.load('EnglishWordSmaple.xlsx')
book.get_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment