Skip to content

Instantly share code, notes, and snippets.

@shipengtaov
Last active October 11, 2023 08:45
Show Gist options
  • Save shipengtaov/da9a3a66d365f39ff2384bd6739d28ca to your computer and use it in GitHub Desktop.
Save shipengtaov/da9a3a66d365f39ff2384bd6739d28ca to your computer and use it in GitHub Desktop.
Modify any XML properties in Docx file. 设置 word 中 xml 的属性
"""
Originally Modified from here: https://stackoverflow.com/a/37956562/4923020
"""
import re
from collections import defaultdict
from io import BytesIO
from typing import Union
from zipfile import ZipFile
def modify_xml(src_file: Union[str, BytesIO], inputs: list[tuple[str, str, str]]) -> BytesIO:
"""
:param inputs: list of tuples (xml_file_name, property, value)
"""
files: dict[str, list[bytes]] = {}
# We read all of the files and store them in "files" dictionary.
with ZipFile(src_file, 'r') as document_as_zip:
for internal_file in document_as_zip.infolist():
file_reader = document_as_zip.open(internal_file.filename, "r")
files[internal_file.filename] = file_reader.readlines()
file_reader.close()
# print(files.keys())
grouped_inputs: dict[str, list[tuple[str, str]]] = defaultdict(list)
for xml_file, property, new_value in inputs:
grouped_inputs[xml_file].append((property, new_value))
for xml_file, property_values in grouped_inputs.items():
if xml_file in files.keys():
lines = files[xml_file]
new_lines: list[bytes] = []
# Files contents have been read as list of byte strings.
for line in lines:
if isinstance(line, bytes):
for property, value in property_values:
new_lines.append(re.sub(rf'\<{property}[^\>]*?\>[^\<]*?\</{property}\>', f'<{property}>{value}</{property}>', line.decode('utf-8')).encode('utf-8'))
files[xml_file] = new_lines
ret = BytesIO()
with ZipFile(ret, 'w') as document_as_zip:
for file_name, lines in files.items():
document_as_zip.writestr(file_name, b''.join(lines))
ret.seek(0)
return ret
docx_file_name = '/path/to/your/document.docx'
output = modify_xml(docx_file_name, [
('docProps/core.xml', 'cp:lastModifiedBy', 'Example User'),
])
with open(docx_file_name, 'wb') as f:
f.write(output.read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment