Skip to content

Instantly share code, notes, and snippets.

@alexminza
Created December 7, 2023 13:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexminza/162f2dbff8ad91957eca8e47eff885fd to your computer and use it in GitHub Desktop.
Save alexminza/162f2dbff8ad91957eca8e47eff885fd to your computer and use it in GitHub Desktop.
Fix openpyxl ExcelReader to load broken XLSX files exported by 1C
import pandas as pd
import openpyxl
#https://foss.heptapod.net/openpyxl/openpyxl
#from openpyxl.xml.constants import (ARC_SHARED_STRINGS, SHARED_STRINGS, ARC_CONTENT_TYPES, CONTYPES_NS)
#https://foss.heptapod.net/openpyxl/openpyxl/-/blob/branch/3.1/openpyxl/xml/constants.py#L35
#openpyxl.xml.constants.ARC_SHARED_STRINGS = openpyxl.xml.constants.PACKAGE_XL + '/SharedStrings.xml'
#from openpyxl.reader.strings import read_string_table, read_rich_text
#https://foss.heptapod.net/openpyxl/openpyxl/-/blob/branch/3.1/openpyxl/reader/excel.py#L139
from openpyxl.reader.excel import ExcelReader
class ExcelReaderMonkey(ExcelReader):
def read_strings(self):
ct = self.package.find(openpyxl.xml.constants.SHARED_STRINGS)
reader = openpyxl.reader.strings.read_string_table
if self.rich_text:
reader = openpyxl.reader.strings.read_rich_text
if ct is not None:
archiveFilesMap = {filename.lower(): filename for filename in self.archive.namelist()}
strings_path = archiveFilesMap.get(ct.PartName[1:].lower())
with self.archive.open(strings_path,) as src:
self.shared_strings = reader(src)
openpyxl.reader.excel.ExcelReader = ExcelReaderMonkey
# excel_reader = ExcelReaderMonkey(fn="Tranzanctii.xlsx", read_only=True)
# #reader.read_strings = read_strings
# excel_reader.read()
# workbook = excel_reader.wb
# print(workbook.sheetnames)
#https://foss.heptapod.net/openpyxl/openpyxl/-/blob/branch/3.1/openpyxl/reader/excel.py#L315
#workbook = openpyxl.load_workbook("Tranzanctii.xlsx", read_only=True)
#print(workbook.sheetnames)
data = pd.read_excel("Tranzanctii.xlsx")
print(data.info())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment