Last active
March 28, 2020 10:06
-
-
Save pikhovkin/543709a2e2827d9c345d to your computer and use it in GitHub Desktop.
Patch for openpyxl 2.1.2 for basic reading images. drawings.py and excel.py are in the patch_reader package
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from __future__ import absolute_import | |
import os.path | |
from io import BytesIO | |
from openpyxl.xml.constants import (PACKAGE_WORKSHEET_RELS, | |
REL_NS, | |
PACKAGE_RELS, | |
PACKAGE_IMAGES, | |
PACKAGE_DRAWINGS, | |
DRAWING_NS, | |
SHEET_DRAWING_NS) | |
from openpyxl.xml.functions import fromstring | |
from openpyxl.drawing import Image | |
from openpyxl.cell import get_column_letter | |
IMAGE_NS = REL_NS + '/image' | |
_DRAWING_NS = REL_NS + '/drawing' | |
PACKAGE_DRAWINGS_RELS = PACKAGE_DRAWINGS + '/' + PACKAGE_RELS | |
def read_image_file(root, rid, valid_files): | |
for node in root: | |
if node.attrib['Type'] == IMAGE_NS and node.attrib['Id'] == rid: | |
image_file = os.path.split(node.attrib['Target'])[-1] | |
image_file = PACKAGE_IMAGES + '/' + image_file | |
if image_file in valid_files: | |
return image_file | |
return None | |
def read_drawings(ws, drawings_path, archive, valid_files): | |
""" Given a worksheet and the XML of its drawings file, links drawings to cells | |
""" | |
drawings_codename = os.path.split(drawings_path)[-1] | |
rels_file = PACKAGE_DRAWINGS_RELS + '/' + drawings_codename + '.rels' | |
if rels_file not in valid_files: | |
return None | |
rels_source = archive.read(rels_file) | |
rels_root = fromstring(rels_source) | |
root = fromstring(archive.read(drawings_path)) | |
for node in root: | |
col, row = 0, 0 | |
name = u'' | |
cell_from = node.find('{%s}from' % SHEET_DRAWING_NS) | |
if cell_from is not None: | |
col = cell_from.find('{%s}col' % SHEET_DRAWING_NS) | |
if col is not None: | |
col = int(col.text) | |
row = cell_from.find('{%s}row' % SHEET_DRAWING_NS) | |
if row is not None: | |
row = int(row.text) | |
cell = ws['%s%s' % (get_column_letter(col + 1), row + 1)] | |
pic = node.find('{%s}pic' % SHEET_DRAWING_NS) | |
if pic is not None: | |
nv_pic_pr = pic.find('{%s}nvPicPr' % SHEET_DRAWING_NS) | |
if nv_pic_pr is not None: | |
nv_pic_pr = nv_pic_pr.find('{%s}cNvPr' % SHEET_DRAWING_NS) | |
if nv_pic_pr is not None: | |
name = nv_pic_pr.attrib.get('name', '') | |
blip_fill = pic.find('{%s}blipFill' % SHEET_DRAWING_NS) | |
if blip_fill is not None: | |
blip = blip_fill.find('{%s}blip' % DRAWING_NS) | |
if blip is not None: | |
rid = blip.attrib.get('{%s}embed' % REL_NS) | |
if rid is not None: | |
image_file = read_image_file(rels_root, rid, valid_files) | |
if image_file: | |
img = Image(BytesIO(archive.read(image_file))) | |
img.drawing.name = name | |
img.anchor(cell, anchortype='oneCell') | |
ws.add_image(img) | |
def get_drawings_file(worksheet_path, archive, valid_files): | |
""" Returns the XML filename in the archive which contains the drawings for | |
the spreadsheet with codename sheet_codename. Returns None if there is no | |
such file | |
""" | |
sheet_codename = os.path.split(worksheet_path)[-1] | |
rels_file = PACKAGE_WORKSHEET_RELS + '/' + sheet_codename + '.rels' | |
if rels_file not in valid_files: | |
return None | |
rels_source = archive.read(rels_file) | |
root = fromstring(rels_source) | |
for node in root: | |
if node.attrib['Type'] == _DRAWING_NS: | |
drawings_file = os.path.split(node.attrib['Target'])[-1] | |
drawings_file = PACKAGE_DRAWINGS + '/' + drawings_file | |
if drawings_file in valid_files: | |
return drawings_file | |
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from zipfile import ZipFile | |
from io import BytesIO | |
from openpyxl.xml.constants import ( | |
ARC_CORE, | |
ARC_WORKBOOK, | |
ARC_STYLE, | |
ARC_THEME, | |
SHARED_STRINGS, | |
EXTERNAL_LINK, | |
) | |
from openpyxl.workbook import DocumentProperties | |
from openpyxl.workbook.names.external import detect_external_links | |
from openpyxl.workbook.names.named_range import read_named_ranges | |
from openpyxl.reader.strings import read_string_table | |
from openpyxl.reader.style import read_style_table | |
from openpyxl.reader.workbook import ( | |
read_content_types, | |
read_properties_core, | |
read_excel_base_date, | |
detect_worksheets, | |
read_rels, | |
read_workbook_code_name, | |
) | |
from openpyxl.reader.worksheet import read_worksheet | |
from openpyxl.reader.comments import read_comments, get_comments_file | |
from patch_reader.drawings import read_drawings, get_drawings_file | |
def _load_workbook(wb, archive, filename, read_only, keep_vba): | |
valid_files = archive.namelist() | |
# If are going to preserve the vba then attach a copy of the archive to the | |
# workbook so that is available for the save. | |
if keep_vba: | |
try: | |
f = open(filename, 'rb') | |
s = f.read() | |
f.close() | |
except: | |
pos = filename.tell() | |
filename.seek(0) | |
s = filename.read() | |
filename.seek(pos) | |
wb.vba_archive = ZipFile(BytesIO(s), 'r') | |
if read_only: | |
wb._archive = ZipFile(filename) | |
# get workbook-level information | |
try: | |
wb.properties = read_properties_core(archive.read(ARC_CORE)) | |
except KeyError: | |
wb.properties = DocumentProperties() | |
wb._read_workbook_settings(archive.read(ARC_WORKBOOK)) | |
# what content types do we have? | |
cts = dict(read_content_types(archive)) | |
rels = dict | |
strings_path = cts.get(SHARED_STRINGS) | |
if strings_path is not None: | |
if strings_path.startswith("/"): | |
strings_path = strings_path[1:] | |
shared_strings = read_string_table(archive.read(strings_path)) | |
else: | |
shared_strings = [] | |
try: | |
wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160) | |
except KeyError: | |
assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?" | |
style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE)) | |
wb.shared_styles = style_table | |
wb.style_properties = {'dxf_list':cond_styles} | |
wb.cond_styles = cond_styles | |
wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK)) | |
# get worksheets | |
wb.worksheets = [] # remove preset worksheet | |
for sheet in detect_worksheets(archive): | |
sheet_name = sheet['title'] | |
worksheet_path = sheet['path'] | |
if not worksheet_path in valid_files: | |
continue | |
if not read_only: | |
new_ws = read_worksheet(archive.read(worksheet_path), wb, | |
sheet_name, shared_strings, style_table, | |
color_index=color_index, | |
keep_vba=keep_vba) | |
else: | |
new_ws = read_worksheet(None, wb, sheet_name, shared_strings, | |
style_table, | |
color_index=color_index, | |
worksheet_path=worksheet_path) | |
new_ws.sheet_state = sheet.get('state') or 'visible' | |
wb._add_sheet(new_ws) | |
if not read_only: | |
# load comments into the worksheet cells | |
comments_file = get_comments_file(worksheet_path, archive, valid_files) | |
if comments_file is not None: | |
read_comments(new_ws, archive.read(comments_file)) | |
drawings_file = get_drawings_file(worksheet_path, archive, valid_files) | |
if drawings_file is not None: | |
read_drawings(new_ws, drawings_file, archive, valid_files) | |
wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb)) | |
wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK)) | |
if EXTERNAL_LINK in cts: | |
rels = read_rels(archive) | |
wb._external_links = list(detect_external_links(rels, archive)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from openpyxl.reader import excel | |
from patch_reader.excel import _load_workbook | |
setattr(excel, '_load_workbook', _load_workbook) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment