Skip to content

Instantly share code, notes, and snippets.

@pikhovkin
Last active March 28, 2020 10:06
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pikhovkin/543709a2e2827d9c345d to your computer and use it in GitHub Desktop.
Save pikhovkin/543709a2e2827d9c345d to your computer and use it in GitHub Desktop.
Patch for openpyxl 2.1.2 for basic reading images. drawings.py and excel.py are in the patch_reader package
# coding: utf-8
from __future__ import absolute_import
import os.path
from io import BytesIO
from openpyxl.xml.constants import (PACKAGE_WORKSHEET_RELS,
REL_NS,
PACKAGE_RELS,
PACKAGE_IMAGES,
PACKAGE_DRAWINGS,
DRAWING_NS,
SHEET_DRAWING_NS)
from openpyxl.xml.functions import fromstring
from openpyxl.drawing import Image
from openpyxl.cell import get_column_letter
IMAGE_NS = REL_NS + '/image'
_DRAWING_NS = REL_NS + '/drawing'
PACKAGE_DRAWINGS_RELS = PACKAGE_DRAWINGS + '/' + PACKAGE_RELS
def read_image_file(root, rid, valid_files):
for node in root:
if node.attrib['Type'] == IMAGE_NS and node.attrib['Id'] == rid:
image_file = os.path.split(node.attrib['Target'])[-1]
image_file = PACKAGE_IMAGES + '/' + image_file
if image_file in valid_files:
return image_file
return None
def read_drawings(ws, drawings_path, archive, valid_files):
""" Given a worksheet and the XML of its drawings file, links drawings to cells
"""
drawings_codename = os.path.split(drawings_path)[-1]
rels_file = PACKAGE_DRAWINGS_RELS + '/' + drawings_codename + '.rels'
if rels_file not in valid_files:
return None
rels_source = archive.read(rels_file)
rels_root = fromstring(rels_source)
root = fromstring(archive.read(drawings_path))
for node in root:
col, row = 0, 0
name = u''
cell_from = node.find('{%s}from' % SHEET_DRAWING_NS)
if cell_from is not None:
col = cell_from.find('{%s}col' % SHEET_DRAWING_NS)
if col is not None:
col = int(col.text)
row = cell_from.find('{%s}row' % SHEET_DRAWING_NS)
if row is not None:
row = int(row.text)
cell = ws['%s%s' % (get_column_letter(col + 1), row + 1)]
pic = node.find('{%s}pic' % SHEET_DRAWING_NS)
if pic is not None:
nv_pic_pr = pic.find('{%s}nvPicPr' % SHEET_DRAWING_NS)
if nv_pic_pr is not None:
nv_pic_pr = nv_pic_pr.find('{%s}cNvPr' % SHEET_DRAWING_NS)
if nv_pic_pr is not None:
name = nv_pic_pr.attrib.get('name', '')
blip_fill = pic.find('{%s}blipFill' % SHEET_DRAWING_NS)
if blip_fill is not None:
blip = blip_fill.find('{%s}blip' % DRAWING_NS)
if blip is not None:
rid = blip.attrib.get('{%s}embed' % REL_NS)
if rid is not None:
image_file = read_image_file(rels_root, rid, valid_files)
if image_file:
img = Image(BytesIO(archive.read(image_file)))
img.drawing.name = name
img.anchor(cell, anchortype='oneCell')
ws.add_image(img)
def get_drawings_file(worksheet_path, archive, valid_files):
""" Returns the XML filename in the archive which contains the drawings for
the spreadsheet with codename sheet_codename. Returns None if there is no
such file
"""
sheet_codename = os.path.split(worksheet_path)[-1]
rels_file = PACKAGE_WORKSHEET_RELS + '/' + sheet_codename + '.rels'
if rels_file not in valid_files:
return None
rels_source = archive.read(rels_file)
root = fromstring(rels_source)
for node in root:
if node.attrib['Type'] == _DRAWING_NS:
drawings_file = os.path.split(node.attrib['Target'])[-1]
drawings_file = PACKAGE_DRAWINGS + '/' + drawings_file
if drawings_file in valid_files:
return drawings_file
return None
from zipfile import ZipFile
from io import BytesIO
from openpyxl.xml.constants import (
ARC_CORE,
ARC_WORKBOOK,
ARC_STYLE,
ARC_THEME,
SHARED_STRINGS,
EXTERNAL_LINK,
)
from openpyxl.workbook import DocumentProperties
from openpyxl.workbook.names.external import detect_external_links
from openpyxl.workbook.names.named_range import read_named_ranges
from openpyxl.reader.strings import read_string_table
from openpyxl.reader.style import read_style_table
from openpyxl.reader.workbook import (
read_content_types,
read_properties_core,
read_excel_base_date,
detect_worksheets,
read_rels,
read_workbook_code_name,
)
from openpyxl.reader.worksheet import read_worksheet
from openpyxl.reader.comments import read_comments, get_comments_file
from patch_reader.drawings import read_drawings, get_drawings_file
def _load_workbook(wb, archive, filename, read_only, keep_vba):
valid_files = archive.namelist()
# If are going to preserve the vba then attach a copy of the archive to the
# workbook so that is available for the save.
if keep_vba:
try:
f = open(filename, 'rb')
s = f.read()
f.close()
except:
pos = filename.tell()
filename.seek(0)
s = filename.read()
filename.seek(pos)
wb.vba_archive = ZipFile(BytesIO(s), 'r')
if read_only:
wb._archive = ZipFile(filename)
# get workbook-level information
try:
wb.properties = read_properties_core(archive.read(ARC_CORE))
except KeyError:
wb.properties = DocumentProperties()
wb._read_workbook_settings(archive.read(ARC_WORKBOOK))
# what content types do we have?
cts = dict(read_content_types(archive))
rels = dict
strings_path = cts.get(SHARED_STRINGS)
if strings_path is not None:
if strings_path.startswith("/"):
strings_path = strings_path[1:]
shared_strings = read_string_table(archive.read(strings_path))
else:
shared_strings = []
try:
wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160)
except KeyError:
assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"
style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE))
wb.shared_styles = style_table
wb.style_properties = {'dxf_list':cond_styles}
wb.cond_styles = cond_styles
wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))
# get worksheets
wb.worksheets = [] # remove preset worksheet
for sheet in detect_worksheets(archive):
sheet_name = sheet['title']
worksheet_path = sheet['path']
if not worksheet_path in valid_files:
continue
if not read_only:
new_ws = read_worksheet(archive.read(worksheet_path), wb,
sheet_name, shared_strings, style_table,
color_index=color_index,
keep_vba=keep_vba)
else:
new_ws = read_worksheet(None, wb, sheet_name, shared_strings,
style_table,
color_index=color_index,
worksheet_path=worksheet_path)
new_ws.sheet_state = sheet.get('state') or 'visible'
wb._add_sheet(new_ws)
if not read_only:
# load comments into the worksheet cells
comments_file = get_comments_file(worksheet_path, archive, valid_files)
if comments_file is not None:
read_comments(new_ws, archive.read(comments_file))
drawings_file = get_drawings_file(worksheet_path, archive, valid_files)
if drawings_file is not None:
read_drawings(new_ws, drawings_file, archive, valid_files)
wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))
wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))
if EXTERNAL_LINK in cts:
rels = read_rels(archive)
wb._external_links = list(detect_external_links(rels, archive))
from openpyxl.reader import excel
from patch_reader.excel import _load_workbook
setattr(excel, '_load_workbook', _load_workbook)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment