kkAyataka/get_images_from_xlsx.py

## get_images_from_xlsx.py
import os
import zipfile
import xml.etree.ElementTree as xml

'''
book.xlsx/
 |- _rels/
 |- xl/
 |  |- drawings/
 |  |  |  |- _rels/
 |  |  |  |- drawing1.xml.rels # Assign an ID to an image
 |  |  |- drawing1.xml         # Image position
 |  |-media/
 |    |- image1.png            # Image file
 |    |- image2.jpg
'''

# Load book
xlsx_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/Book1.xlsx'))
xlsx = zipfile.ZipFile(xlsx_path)

# List items
print('\n# NameList:\n')
for name in xlsx.namelist():
    print(name)

# Image files
img_bytes = xlsx.open('xl/media/image1.png').read()
fs = open('image.png', 'wb')
# fs.write(img_bytes)

# GEt Image IDs
print('\n# drawing1.xml.rels\n')
drawing1_xml_rels_str = xlsx.open('xl/drawings/_rels/drawing1.xml.rels').read().decode('utf-8')
drawing1_xml_rels = xml.fromstring(drawing1_xml_rels_str)
relationships = drawing1_xml_rels.findall('.//Relationship', {'': 'http://schemas.openxmlformats.org/package/2006/relationships'})
print(relationships)
for r in drawing1_xml_rels:
    print(r.get('Id'))
    print(r.get('Target'))

# Get image information from drawing.xml
print('\n# Drawing:\n')
drawing1_xml_str = xlsx.open('xl/drawings/drawing1.xml').read().decode('utf-8')
drawing1_xml = xml.fromstring(drawing1_xml_str)
ns = {
    'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing',
    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
    'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
    }
two_cell_anchors = drawing1_xml.findall('.//xdr:twoCellAnchor', ns)
for tca in two_cell_anchors:
    title = tca.find('.//xdr:pic/xdr:nvPicPr/xdr:cNvPr', ns).get('name')
    fromCell = {
        'col': int(tca.find('.//xdr:from/xdr:col', ns).text),
        'row': int(tca.find('.//xdr:from/xdr:row', ns).text),
    }
    toCell = {
        'col': int(tca.find('.//xdr:to/xdr:col', ns).text),
        'row': int(tca.find('.//xdr:to/xdr:row', ns).text),
    }
    embed = tca.find('.//*/xdr:blipFill/a:blip', ns).get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed')
    print(title)
    print(fromCell)
    print(toCell)
    print(embed)
	import os
	import zipfile
	import xml.etree.ElementTree as xml

	'''
	book.xlsx/
	\|- _rels/
	\|- xl/
	\| \|- drawings/
	\| \| \| \|- _rels/
	\| \| \| \|- drawing1.xml.rels # Assign an ID to an image
	\| \| \|- drawing1.xml # Image position
	\| \|-media/
	\| \|- image1.png # Image file
	\| \|- image2.jpg
	'''

	# Load book
	xlsx_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/Book1.xlsx'))
	xlsx = zipfile.ZipFile(xlsx_path)

	# List items
	print('\n# NameList:\n')
	for name in xlsx.namelist():
	print(name)

	# Image files
	img_bytes = xlsx.open('xl/media/image1.png').read()
	fs = open('image.png', 'wb')
	# fs.write(img_bytes)

	# GEt Image IDs
	print('\n# drawing1.xml.rels\n')
	drawing1_xml_rels_str = xlsx.open('xl/drawings/_rels/drawing1.xml.rels').read().decode('utf-8')
	drawing1_xml_rels = xml.fromstring(drawing1_xml_rels_str)
	relationships = drawing1_xml_rels.findall('.//Relationship', {'': 'http://schemas.openxmlformats.org/package/2006/relationships'})
	print(relationships)
	for r in drawing1_xml_rels:
	print(r.get('Id'))
	print(r.get('Target'))

	# Get image information from drawing.xml
	print('\n# Drawing:\n')
	drawing1_xml_str = xlsx.open('xl/drawings/drawing1.xml').read().decode('utf-8')
	drawing1_xml = xml.fromstring(drawing1_xml_str)
	ns = {
	'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing',
	'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
	'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
	}
	two_cell_anchors = drawing1_xml.findall('.//xdr:twoCellAnchor', ns)
	for tca in two_cell_anchors:
	title = tca.find('.//xdr:pic/xdr:nvPicPr/xdr:cNvPr', ns).get('name')
	fromCell = {
	'col': int(tca.find('.//xdr:from/xdr:col', ns).text),
	'row': int(tca.find('.//xdr:from/xdr:row', ns).text),
	}
	toCell = {
	'col': int(tca.find('.//xdr:to/xdr:col', ns).text),
	'row': int(tca.find('.//xdr:to/xdr:row', ns).text),
	}
	embed = tca.find('.//*/xdr:blipFill/a:blip', ns).get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed')
	print(title)
	print(fromCell)
	print(toCell)
	print(embed)