pikhovkin/drawings.py

## drawings.py
# coding: utf-8
from __future__ import absolute_import

import os.path
from io import BytesIO

from openpyxl.xml.constants import (PACKAGE_WORKSHEET_RELS,
                                    REL_NS,
                                    PACKAGE_RELS,
                                    PACKAGE_IMAGES,
                                    PACKAGE_DRAWINGS,
                                    DRAWING_NS,
                                    SHEET_DRAWING_NS)
from openpyxl.xml.functions import fromstring
from openpyxl.drawing import Image
from openpyxl.cell import get_column_letter


IMAGE_NS = REL_NS + '/image'
_DRAWING_NS = REL_NS + '/drawing'
PACKAGE_DRAWINGS_RELS = PACKAGE_DRAWINGS + '/' + PACKAGE_RELS


def read_image_file(root, rid, valid_files):
    for node in root:
        if node.attrib['Type'] == IMAGE_NS and node.attrib['Id'] == rid:
            image_file = os.path.split(node.attrib['Target'])[-1]
            image_file = PACKAGE_IMAGES + '/' + image_file

            if image_file in valid_files:
                return image_file

    return None


def read_drawings(ws, drawings_path, archive, valid_files):
    """ Given a worksheet and the XML of its drawings file, links drawings to cells
    """
    drawings_codename = os.path.split(drawings_path)[-1]
    rels_file = PACKAGE_DRAWINGS_RELS + '/' + drawings_codename + '.rels'

    if rels_file not in valid_files:
        return None

    rels_source = archive.read(rels_file)
    rels_root = fromstring(rels_source)

    root = fromstring(archive.read(drawings_path))
    for node in root:
        col, row = 0, 0
        name = u''

        cell_from = node.find('{%s}from' % SHEET_DRAWING_NS)

        if cell_from is not None:
            col = cell_from.find('{%s}col' % SHEET_DRAWING_NS)
            if col is not None:
                col = int(col.text)

            row = cell_from.find('{%s}row' % SHEET_DRAWING_NS)
            if row is not None:
                row = int(row.text)

        cell = ws['%s%s' % (get_column_letter(col + 1), row + 1)]

        pic = node.find('{%s}pic' % SHEET_DRAWING_NS)
        if pic is not None:
            nv_pic_pr = pic.find('{%s}nvPicPr' % SHEET_DRAWING_NS)
            if nv_pic_pr is not None:
                nv_pic_pr = nv_pic_pr.find('{%s}cNvPr' % SHEET_DRAWING_NS)
                if nv_pic_pr is not None:
                    name = nv_pic_pr.attrib.get('name', '')

            blip_fill = pic.find('{%s}blipFill' % SHEET_DRAWING_NS)
            if blip_fill is not None:
                blip = blip_fill.find('{%s}blip' % DRAWING_NS)
                if blip is not None:
                    rid = blip.attrib.get('{%s}embed' % REL_NS)
                    if rid is not None:
                        image_file = read_image_file(rels_root, rid, valid_files)
                        if image_file:
                            img = Image(BytesIO(archive.read(image_file)))
                            img.drawing.name = name
                            img.anchor(cell, anchortype='oneCell')

                            ws.add_image(img)


def get_drawings_file(worksheet_path, archive, valid_files):
    """ Returns the XML filename in the archive which contains the drawings for
        the spreadsheet with codename sheet_codename. Returns None if there is no
        such file
    """
    sheet_codename = os.path.split(worksheet_path)[-1]
    rels_file = PACKAGE_WORKSHEET_RELS + '/' + sheet_codename + '.rels'

    if rels_file not in valid_files:
        return None

    rels_source = archive.read(rels_file)
    root = fromstring(rels_source)
    for node in root:
        if node.attrib['Type'] == _DRAWING_NS:
            drawings_file = os.path.split(node.attrib['Target'])[-1]
            drawings_file = PACKAGE_DRAWINGS + '/' + drawings_file

            if drawings_file in valid_files:
                return drawings_file

    return None

## excel.py
from zipfile import ZipFile
from io import BytesIO

from openpyxl.xml.constants import (
    ARC_CORE,
    ARC_WORKBOOK,
    ARC_STYLE,
    ARC_THEME,
    SHARED_STRINGS,
    EXTERNAL_LINK,
)

from openpyxl.workbook import DocumentProperties
from openpyxl.workbook.names.external import detect_external_links
from openpyxl.workbook.names.named_range import read_named_ranges
from openpyxl.reader.strings import read_string_table
from openpyxl.reader.style import read_style_table
from openpyxl.reader.workbook import (
    read_content_types,
    read_properties_core,
    read_excel_base_date,
    detect_worksheets,
    read_rels,
    read_workbook_code_name,
)
from openpyxl.reader.worksheet import read_worksheet
from openpyxl.reader.comments import read_comments, get_comments_file

from patch_reader.drawings import read_drawings, get_drawings_file


def _load_workbook(wb, archive, filename, read_only, keep_vba):
    valid_files = archive.namelist()

    # If are going to preserve the vba then attach a copy of the archive to the
    # workbook so that is available for the save.
    if keep_vba:
        try:
            f = open(filename, 'rb')
            s = f.read()
            f.close()
        except:
            pos = filename.tell()
            filename.seek(0)
            s = filename.read()
            filename.seek(pos)
        wb.vba_archive = ZipFile(BytesIO(s), 'r')

    if read_only:
        wb._archive = ZipFile(filename)

    # get workbook-level information
    try:
        wb.properties = read_properties_core(archive.read(ARC_CORE))
    except KeyError:
        wb.properties = DocumentProperties()
    wb._read_workbook_settings(archive.read(ARC_WORKBOOK))

    # what content types do we have?
    cts = dict(read_content_types(archive))
    rels = dict

    strings_path = cts.get(SHARED_STRINGS)
    if strings_path is not None:
        if strings_path.startswith("/"):
            strings_path = strings_path[1:]
        shared_strings = read_string_table(archive.read(strings_path))
    else:
        shared_strings = []

    try:
        wb.loaded_theme = archive.read(ARC_THEME)  # some writers don't output a theme, live with it (fixes #160)
    except KeyError:
        assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

    style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE))
    wb.shared_styles = style_table
    wb.style_properties = {'dxf_list':cond_styles}
    wb.cond_styles = cond_styles

    wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

    # get worksheets
    wb.worksheets = []  # remove preset worksheet
    for sheet in detect_worksheets(archive):
        sheet_name = sheet['title']
        worksheet_path = sheet['path']
        if not worksheet_path in valid_files:
            continue

        if not read_only:
            new_ws = read_worksheet(archive.read(worksheet_path), wb,
                                    sheet_name, shared_strings, style_table,
                                    color_index=color_index,
                                    keep_vba=keep_vba)
        else:
            new_ws = read_worksheet(None, wb, sheet_name, shared_strings,
                                    style_table,
                                    color_index=color_index,
                                    worksheet_path=worksheet_path)

        new_ws.sheet_state = sheet.get('state') or 'visible'
        wb._add_sheet(new_ws)

        if not read_only:
        # load comments into the worksheet cells
            comments_file = get_comments_file(worksheet_path, archive, valid_files)
            if comments_file is not None:
                read_comments(new_ws, archive.read(comments_file))

            drawings_file = get_drawings_file(worksheet_path, archive, valid_files)
            if drawings_file is not None:
                read_drawings(new_ws, drawings_file, archive, valid_files)

    wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

    wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

    if EXTERNAL_LINK in cts:
        rels = read_rels(archive)
        wb._external_links = list(detect_external_links(rels, archive))

## patch.py
from openpyxl.reader import excel

from patch_reader.excel import _load_workbook


setattr(excel, '_load_workbook', _load_workbook)
	# coding: utf-8
	from __future__ import absolute_import

	import os.path
	from io import BytesIO

	from openpyxl.xml.constants import (PACKAGE_WORKSHEET_RELS,
	REL_NS,
	PACKAGE_RELS,
	PACKAGE_IMAGES,
	PACKAGE_DRAWINGS,
	DRAWING_NS,
	SHEET_DRAWING_NS)
	from openpyxl.xml.functions import fromstring
	from openpyxl.drawing import Image
	from openpyxl.cell import get_column_letter


	IMAGE_NS = REL_NS + '/image'
	_DRAWING_NS = REL_NS + '/drawing'
	PACKAGE_DRAWINGS_RELS = PACKAGE_DRAWINGS + '/' + PACKAGE_RELS


	def read_image_file(root, rid, valid_files):
	for node in root:
	if node.attrib['Type'] == IMAGE_NS and node.attrib['Id'] == rid:
	image_file = os.path.split(node.attrib['Target'])[-1]
	image_file = PACKAGE_IMAGES + '/' + image_file

	if image_file in valid_files:
	return image_file

	return None


	def read_drawings(ws, drawings_path, archive, valid_files):
	""" Given a worksheet and the XML of its drawings file, links drawings to cells
	"""
	drawings_codename = os.path.split(drawings_path)[-1]
	rels_file = PACKAGE_DRAWINGS_RELS + '/' + drawings_codename + '.rels'

	if rels_file not in valid_files:
	return None

	rels_source = archive.read(rels_file)
	rels_root = fromstring(rels_source)

	root = fromstring(archive.read(drawings_path))
	for node in root:
	col, row = 0, 0
	name = u''

	cell_from = node.find('{%s}from' % SHEET_DRAWING_NS)

	if cell_from is not None:
	col = cell_from.find('{%s}col' % SHEET_DRAWING_NS)
	if col is not None:
	col = int(col.text)

	row = cell_from.find('{%s}row' % SHEET_DRAWING_NS)
	if row is not None:
	row = int(row.text)

	cell = ws['%s%s' % (get_column_letter(col + 1), row + 1)]

	pic = node.find('{%s}pic' % SHEET_DRAWING_NS)
	if pic is not None:
	nv_pic_pr = pic.find('{%s}nvPicPr' % SHEET_DRAWING_NS)
	if nv_pic_pr is not None:
	nv_pic_pr = nv_pic_pr.find('{%s}cNvPr' % SHEET_DRAWING_NS)
	if nv_pic_pr is not None:
	name = nv_pic_pr.attrib.get('name', '')

	blip_fill = pic.find('{%s}blipFill' % SHEET_DRAWING_NS)
	if blip_fill is not None:
	blip = blip_fill.find('{%s}blip' % DRAWING_NS)
	if blip is not None:
	rid = blip.attrib.get('{%s}embed' % REL_NS)
	if rid is not None:
	image_file = read_image_file(rels_root, rid, valid_files)
	if image_file:
	img = Image(BytesIO(archive.read(image_file)))
	img.drawing.name = name
	img.anchor(cell, anchortype='oneCell')

	ws.add_image(img)


	def get_drawings_file(worksheet_path, archive, valid_files):
	""" Returns the XML filename in the archive which contains the drawings for
	the spreadsheet with codename sheet_codename. Returns None if there is no
	such file
	"""
	sheet_codename = os.path.split(worksheet_path)[-1]
	rels_file = PACKAGE_WORKSHEET_RELS + '/' + sheet_codename + '.rels'

	if rels_file not in valid_files:
	return None

	rels_source = archive.read(rels_file)
	root = fromstring(rels_source)
	for node in root:
	if node.attrib['Type'] == _DRAWING_NS:
	drawings_file = os.path.split(node.attrib['Target'])[-1]
	drawings_file = PACKAGE_DRAWINGS + '/' + drawings_file

	if drawings_file in valid_files:
	return drawings_file

	return None
	from zipfile import ZipFile
	from io import BytesIO

	from openpyxl.xml.constants import (
	ARC_CORE,
	ARC_WORKBOOK,
	ARC_STYLE,
	ARC_THEME,
	SHARED_STRINGS,
	EXTERNAL_LINK,
	)

	from openpyxl.workbook import DocumentProperties
	from openpyxl.workbook.names.external import detect_external_links
	from openpyxl.workbook.names.named_range import read_named_ranges
	from openpyxl.reader.strings import read_string_table
	from openpyxl.reader.style import read_style_table
	from openpyxl.reader.workbook import (
	read_content_types,
	read_properties_core,
	read_excel_base_date,
	detect_worksheets,
	read_rels,
	read_workbook_code_name,
	)
	from openpyxl.reader.worksheet import read_worksheet
	from openpyxl.reader.comments import read_comments, get_comments_file

	from patch_reader.drawings import read_drawings, get_drawings_file


	def _load_workbook(wb, archive, filename, read_only, keep_vba):
	valid_files = archive.namelist()

	# If are going to preserve the vba then attach a copy of the archive to the
	# workbook so that is available for the save.
	if keep_vba:
	try:
	f = open(filename, 'rb')
	s = f.read()
	f.close()
	except:
	pos = filename.tell()
	filename.seek(0)
	s = filename.read()
	filename.seek(pos)
	wb.vba_archive = ZipFile(BytesIO(s), 'r')

	if read_only:
	wb._archive = ZipFile(filename)

	# get workbook-level information
	try:
	wb.properties = read_properties_core(archive.read(ARC_CORE))
	except KeyError:
	wb.properties = DocumentProperties()
	wb._read_workbook_settings(archive.read(ARC_WORKBOOK))

	# what content types do we have?
	cts = dict(read_content_types(archive))
	rels = dict

	strings_path = cts.get(SHARED_STRINGS)
	if strings_path is not None:
	if strings_path.startswith("/"):
	strings_path = strings_path[1:]
	shared_strings = read_string_table(archive.read(strings_path))
	else:
	shared_strings = []

	try:
	wb.loaded_theme = archive.read(ARC_THEME) # some writers don't output a theme, live with it (fixes #160)
	except KeyError:
	assert wb.loaded_theme == None, "even though the theme information is missing there is a theme object ?"

	style_table, color_index, cond_styles = read_style_table(archive.read(ARC_STYLE))
	wb.shared_styles = style_table
	wb.style_properties = {'dxf_list':cond_styles}
	wb.cond_styles = cond_styles

	wb.properties.excel_base_date = read_excel_base_date(xml_source=archive.read(ARC_WORKBOOK))

	# get worksheets
	wb.worksheets = [] # remove preset worksheet
	for sheet in detect_worksheets(archive):
	sheet_name = sheet['title']
	worksheet_path = sheet['path']
	if not worksheet_path in valid_files:
	continue

	if not read_only:
	new_ws = read_worksheet(archive.read(worksheet_path), wb,
	sheet_name, shared_strings, style_table,
	color_index=color_index,
	keep_vba=keep_vba)
	else:
	new_ws = read_worksheet(None, wb, sheet_name, shared_strings,
	style_table,
	color_index=color_index,
	worksheet_path=worksheet_path)

	new_ws.sheet_state = sheet.get('state') or 'visible'
	wb._add_sheet(new_ws)

	if not read_only:
	# load comments into the worksheet cells
	comments_file = get_comments_file(worksheet_path, archive, valid_files)
	if comments_file is not None:
	read_comments(new_ws, archive.read(comments_file))

	drawings_file = get_drawings_file(worksheet_path, archive, valid_files)
	if drawings_file is not None:
	read_drawings(new_ws, drawings_file, archive, valid_files)

	wb._named_ranges = list(read_named_ranges(archive.read(ARC_WORKBOOK), wb))

	wb.code_name = read_workbook_code_name(archive.read(ARC_WORKBOOK))

	if EXTERNAL_LINK in cts:
	rels = read_rels(archive)
	wb._external_links = list(detect_external_links(rels, archive))
	from openpyxl.reader import excel

	from patch_reader.excel import _load_workbook


	setattr(excel, '_load_workbook', _load_workbook)