Skip to content

Instantly share code, notes, and snippets.

@aont
Created January 17, 2024 15:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aont/c2d90fbaa5af3739f55fcd73d05a3b73 to your computer and use it in GitHub Desktop.
Save aont/c2d90fbaa5af3739f55fcd73d05a3b73 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
import glob
import sys
import json
import subprocess
import zipfile
import io
import shutil
import lxml.etree
import wand
import wand.image
def wand_convert(img_bytes:bytes, input_format=None, output_format:str="jpeg", quality:int=80):
f_output = io.BytesIO()
with wand.image.Image(blob=img_bytes, format=input_format) as img_input:
with img_input.clone() as img_output:
# img_size = img_output.size
# img_output.resize(img_size[0]//2, img_size[1]//2)
img_output.format = output_format
img_output.compression_quality = quality
img_output.save(file=f_output)
return f_output.getvalue()
pptx_path_src = "src.pptx"
pptx_path_dst = "dst.pptx"
image_path_xml_rels_set = set()
with zipfile.ZipFile(pptx_path_dst, "w", compression=zipfile.ZIP_DEFLATED) as zip_pptx_dst:
with zipfile.ZipFile(pptx_path_src, "r") as zip_pptx_src:
filepath_inzip_list = zip_pptx_src.namelist()
# print(filepath_inzip_list)
xml_rels_list = []
otherfile_inzip_set = set()
for filepath_inzip in filepath_inzip_list:
if os.path.dirname(filepath_inzip)=="ppt/slides/_rels" and filepath_inzip.endswith(".xml.rels"):
xml_rels_list.append(filepath_inzip)
elif filepath_inzip=="[Content_Types].xml":
continue
else:
otherfile_inzip_set.add(filepath_inzip)
content_types_xml_path = "[Content_Types].xml"
sys.stderr.write(f"info: {content_types_xml_path}\n")
with zip_pptx_src.open(content_types_xml_path, "r") as f_xml:
xml_bytes = f_xml.read()
doc = lxml.etree.fromstring(xml_bytes)
has_jpg = False
for relationship in doc.iterfind("./Default", doc.nsmap):
if relationship.attrib["Extension"] == 'jpg':
has_jpg = True
if not has_jpg:
default_jpg = lxml.etree.Element("Default", nsmap=doc.nsmap)
default_jpg.attrib["Extension"] = "jpg"
default_jpg.attrib["ContentType"] = "image/jpeg"
doc.append(default_jpg)
with zip_pptx_dst.open(content_types_xml_path, "w") as f_xml:
f_xml.write(lxml.etree.tostring(doc, xml_declaration=True, encoding='UTF-8', standalone=True))
for xml_rels_path_inzip in xml_rels_list:
sys.stderr.write(f"info: {xml_rels_path_inzip}\n")
with zip_pptx_src.open(xml_rels_path_inzip, "r") as f_xml:
xml_bytes = f_xml.read()
doc = lxml.etree.fromstring(xml_bytes)
for relationship in doc.iterfind("./Relationship", doc.nsmap):
if relationship.attrib["Type"] != 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image':
continue
image_path_xml_rels = relationship.attrib["Target"]
# sys.stderr.write(f"info: {image_path_xml_rels}\n")
image_path_inzip = os.path.normpath(os.path.join("ppt/slides", image_path_xml_rels)).replace(os.sep, "/")
sys.stderr.write(f"info: {image_path_inzip}\n")
if image_path_inzip in otherfile_inzip_set:
otherfile_inzip_set.remove(image_path_inzip)
image_name, image_ext = os.path.splitext(image_path_inzip)
image_enc_path_inzip = image_name + ".jpg"
image_enc_path_xml_rels = os.path.relpath(image_enc_path_inzip, "ppt/slides").replace(os.sep, "/")
relationship.attrib["Target"] = image_enc_path_xml_rels
if image_path_xml_rels in image_path_xml_rels_set:
continue
else:
image_path_xml_rels_set.add(image_path_xml_rels)
sys.stderr.write(f"info: {image_enc_path_inzip}\n")
with zip_pptx_src.open(image_path_inzip, "r") as f_img:
img_bytes = f_img.read()
img_enc_bytes = wand_convert(img_bytes)
with zip_pptx_dst.open(image_enc_path_inzip, "w") as f_img:
f_img.write(img_enc_bytes)
sys.stderr.write(f"info: {xml_rels_path_inzip}\n")
with zip_pptx_dst.open(xml_rels_path_inzip, "w") as f_xml:
# f_xml.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'+lxml.etree.tostring(doc))
f_xml.write(lxml.etree.tostring(doc, xml_declaration=True, encoding='UTF-8', standalone=True))
for filepath_inzip in otherfile_inzip_set:
sys.stderr.write(f"info: {filepath_inzip}\n")
with zip_pptx_src.open(filepath_inzip, "r") as f_src:
with zip_pptx_dst.open(filepath_inzip, "w") as f_dst:
shutil.copyfileobj(f_src, f_dst)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment