Created
January 17, 2024 15:29
-
-
Save aont/c2d90fbaa5af3739f55fcd73d05a3b73 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import glob | |
import sys | |
import json | |
import subprocess | |
import zipfile | |
import io | |
import shutil | |
import lxml.etree | |
import wand | |
import wand.image | |
def wand_convert(img_bytes:bytes, input_format=None, output_format:str="jpeg", quality:int=80): | |
f_output = io.BytesIO() | |
with wand.image.Image(blob=img_bytes, format=input_format) as img_input: | |
with img_input.clone() as img_output: | |
# img_size = img_output.size | |
# img_output.resize(img_size[0]//2, img_size[1]//2) | |
img_output.format = output_format | |
img_output.compression_quality = quality | |
img_output.save(file=f_output) | |
return f_output.getvalue() | |
pptx_path_src = "src.pptx" | |
pptx_path_dst = "dst.pptx" | |
image_path_xml_rels_set = set() | |
with zipfile.ZipFile(pptx_path_dst, "w", compression=zipfile.ZIP_DEFLATED) as zip_pptx_dst: | |
with zipfile.ZipFile(pptx_path_src, "r") as zip_pptx_src: | |
filepath_inzip_list = zip_pptx_src.namelist() | |
# print(filepath_inzip_list) | |
xml_rels_list = [] | |
otherfile_inzip_set = set() | |
for filepath_inzip in filepath_inzip_list: | |
if os.path.dirname(filepath_inzip)=="ppt/slides/_rels" and filepath_inzip.endswith(".xml.rels"): | |
xml_rels_list.append(filepath_inzip) | |
elif filepath_inzip=="[Content_Types].xml": | |
continue | |
else: | |
otherfile_inzip_set.add(filepath_inzip) | |
content_types_xml_path = "[Content_Types].xml" | |
sys.stderr.write(f"info: {content_types_xml_path}\n") | |
with zip_pptx_src.open(content_types_xml_path, "r") as f_xml: | |
xml_bytes = f_xml.read() | |
doc = lxml.etree.fromstring(xml_bytes) | |
has_jpg = False | |
for relationship in doc.iterfind("./Default", doc.nsmap): | |
if relationship.attrib["Extension"] == 'jpg': | |
has_jpg = True | |
if not has_jpg: | |
default_jpg = lxml.etree.Element("Default", nsmap=doc.nsmap) | |
default_jpg.attrib["Extension"] = "jpg" | |
default_jpg.attrib["ContentType"] = "image/jpeg" | |
doc.append(default_jpg) | |
with zip_pptx_dst.open(content_types_xml_path, "w") as f_xml: | |
f_xml.write(lxml.etree.tostring(doc, xml_declaration=True, encoding='UTF-8', standalone=True)) | |
for xml_rels_path_inzip in xml_rels_list: | |
sys.stderr.write(f"info: {xml_rels_path_inzip}\n") | |
with zip_pptx_src.open(xml_rels_path_inzip, "r") as f_xml: | |
xml_bytes = f_xml.read() | |
doc = lxml.etree.fromstring(xml_bytes) | |
for relationship in doc.iterfind("./Relationship", doc.nsmap): | |
if relationship.attrib["Type"] != 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image': | |
continue | |
image_path_xml_rels = relationship.attrib["Target"] | |
# sys.stderr.write(f"info: {image_path_xml_rels}\n") | |
image_path_inzip = os.path.normpath(os.path.join("ppt/slides", image_path_xml_rels)).replace(os.sep, "/") | |
sys.stderr.write(f"info: {image_path_inzip}\n") | |
if image_path_inzip in otherfile_inzip_set: | |
otherfile_inzip_set.remove(image_path_inzip) | |
image_name, image_ext = os.path.splitext(image_path_inzip) | |
image_enc_path_inzip = image_name + ".jpg" | |
image_enc_path_xml_rels = os.path.relpath(image_enc_path_inzip, "ppt/slides").replace(os.sep, "/") | |
relationship.attrib["Target"] = image_enc_path_xml_rels | |
if image_path_xml_rels in image_path_xml_rels_set: | |
continue | |
else: | |
image_path_xml_rels_set.add(image_path_xml_rels) | |
sys.stderr.write(f"info: {image_enc_path_inzip}\n") | |
with zip_pptx_src.open(image_path_inzip, "r") as f_img: | |
img_bytes = f_img.read() | |
img_enc_bytes = wand_convert(img_bytes) | |
with zip_pptx_dst.open(image_enc_path_inzip, "w") as f_img: | |
f_img.write(img_enc_bytes) | |
sys.stderr.write(f"info: {xml_rels_path_inzip}\n") | |
with zip_pptx_dst.open(xml_rels_path_inzip, "w") as f_xml: | |
# f_xml.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'+lxml.etree.tostring(doc)) | |
f_xml.write(lxml.etree.tostring(doc, xml_declaration=True, encoding='UTF-8', standalone=True)) | |
for filepath_inzip in otherfile_inzip_set: | |
sys.stderr.write(f"info: {filepath_inzip}\n") | |
with zip_pptx_src.open(filepath_inzip, "r") as f_src: | |
with zip_pptx_dst.open(filepath_inzip, "w") as f_dst: | |
shutil.copyfileobj(f_src, f_dst) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment