Skip to content

Instantly share code, notes, and snippets.

@kkAyataka
Created April 7, 2023 21:46
Show Gist options
  • Save kkAyataka/20c689b302ee94e8034077937e9ed4c6 to your computer and use it in GitHub Desktop.
Save kkAyataka/20c689b302ee94e8034077937e9ed4c6 to your computer and use it in GitHub Desktop.
Get images from a xlsx file.
import os
import zipfile
import xml.etree.ElementTree as xml
'''
book.xlsx/
|- _rels/
|- xl/
| |- drawings/
| | | |- _rels/
| | | |- drawing1.xml.rels # Assign an ID to an image
| | |- drawing1.xml # Image position
| |-media/
| |- image1.png # Image file
| |- image2.jpg
'''
# Load book
xlsx_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/Book1.xlsx'))
xlsx = zipfile.ZipFile(xlsx_path)
# List items
print('\n# NameList:\n')
for name in xlsx.namelist():
print(name)
# Image files
img_bytes = xlsx.open('xl/media/image1.png').read()
fs = open('image.png', 'wb')
# fs.write(img_bytes)
# GEt Image IDs
print('\n# drawing1.xml.rels\n')
drawing1_xml_rels_str = xlsx.open('xl/drawings/_rels/drawing1.xml.rels').read().decode('utf-8')
drawing1_xml_rels = xml.fromstring(drawing1_xml_rels_str)
relationships = drawing1_xml_rels.findall('.//Relationship', {'': 'http://schemas.openxmlformats.org/package/2006/relationships'})
print(relationships)
for r in drawing1_xml_rels:
print(r.get('Id'))
print(r.get('Target'))
# Get image information from drawing.xml
print('\n# Drawing:\n')
drawing1_xml_str = xlsx.open('xl/drawings/drawing1.xml').read().decode('utf-8')
drawing1_xml = xml.fromstring(drawing1_xml_str)
ns = {
'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing',
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
}
two_cell_anchors = drawing1_xml.findall('.//xdr:twoCellAnchor', ns)
for tca in two_cell_anchors:
title = tca.find('.//xdr:pic/xdr:nvPicPr/xdr:cNvPr', ns).get('name')
fromCell = {
'col': int(tca.find('.//xdr:from/xdr:col', ns).text),
'row': int(tca.find('.//xdr:from/xdr:row', ns).text),
}
toCell = {
'col': int(tca.find('.//xdr:to/xdr:col', ns).text),
'row': int(tca.find('.//xdr:to/xdr:row', ns).text),
}
embed = tca.find('.//*/xdr:blipFill/a:blip', ns).get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed')
print(title)
print(fromCell)
print(toCell)
print(embed)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment