-
-
Save tjumyk/3b6fe1741d6a402e22fe36e3ab5bd818 to your computer and use it in GitHub Desktop.
A patch to the openpyxl to access Rich Text objects in loaded worksheets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from openpyxl.cell.text import Text | |
from openpyxl.reader import excel | |
from openpyxl.xml.constants import SHEET_MAIN_NS | |
from openpyxl.xml.functions import iterparse | |
def read_string_table(xml_source): | |
"""Read in all shared strings in the table. | |
If a shared string has formatted snippets, the raw Text object is appended to the returned list. | |
Otherwise, only the plain text content of the shared string is appended to the list. | |
""" | |
strings = [] | |
STRING_TAG = '{%s}si' % SHEET_MAIN_NS | |
for _, node in iterparse(xml_source): | |
if node.tag == STRING_TAG: | |
text_obj = Text.from_tree(node) | |
if text_obj.formatted: | |
text = text_obj # return raw Text object | |
else: # original processing | |
text = text_obj.content | |
text = text.replace('x005F_', '') | |
node.clear() | |
strings.append(text) | |
return strings | |
def patch_read_string_table(): | |
"""call the method before importing any modules of openpyxl""" | |
excel.read_string_table = read_string_table |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment