Last active
October 11, 2023 08:45
-
-
Save shipengtaov/da9a3a66d365f39ff2384bd6739d28ca to your computer and use it in GitHub Desktop.
Modify any XML properties in Docx file. 设置 word 中 xml 的属性
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Originally Modified from here: https://stackoverflow.com/a/37956562/4923020 | |
""" | |
import re | |
from collections import defaultdict | |
from io import BytesIO | |
from typing import Union | |
from zipfile import ZipFile | |
def modify_xml(src_file: Union[str, BytesIO], inputs: list[tuple[str, str, str]]) -> BytesIO: | |
""" | |
:param inputs: list of tuples (xml_file_name, property, value) | |
""" | |
files: dict[str, list[bytes]] = {} | |
# We read all of the files and store them in "files" dictionary. | |
with ZipFile(src_file, 'r') as document_as_zip: | |
for internal_file in document_as_zip.infolist(): | |
file_reader = document_as_zip.open(internal_file.filename, "r") | |
files[internal_file.filename] = file_reader.readlines() | |
file_reader.close() | |
# print(files.keys()) | |
grouped_inputs: dict[str, list[tuple[str, str]]] = defaultdict(list) | |
for xml_file, property, new_value in inputs: | |
grouped_inputs[xml_file].append((property, new_value)) | |
for xml_file, property_values in grouped_inputs.items(): | |
if xml_file in files.keys(): | |
lines = files[xml_file] | |
new_lines: list[bytes] = [] | |
# Files contents have been read as list of byte strings. | |
for line in lines: | |
if isinstance(line, bytes): | |
for property, value in property_values: | |
new_lines.append(re.sub(rf'\<{property}[^\>]*?\>[^\<]*?\</{property}\>', f'<{property}>{value}</{property}>', line.decode('utf-8')).encode('utf-8')) | |
files[xml_file] = new_lines | |
ret = BytesIO() | |
with ZipFile(ret, 'w') as document_as_zip: | |
for file_name, lines in files.items(): | |
document_as_zip.writestr(file_name, b''.join(lines)) | |
ret.seek(0) | |
return ret | |
docx_file_name = '/path/to/your/document.docx' | |
output = modify_xml(docx_file_name, [ | |
('docProps/core.xml', 'cp:lastModifiedBy', 'Example User'), | |
]) | |
with open(docx_file_name, 'wb') as f: | |
f.write(output.read()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment