Skip to content

Instantly share code, notes, and snippets.

@Wzixiao
Last active June 15, 2023 05:18
Show Gist options
  • Save Wzixiao/53f6b3948dba4fcf657008058041cf97 to your computer and use it in GitHub Desktop.
Save Wzixiao/53f6b3948dba4fcf657008058041cf97 to your computer and use it in GitHub Desktop.
from glob import glob
import re
import os
import argparse
import win32com.client as win32
from win32com.client import constants
from pathlib import Path
from tqdm import tqdm
import time
import shutil
def save_as_docx(file_location, output_path):
# 获取文件的绝对路径,在Windows下调用客户端时必须使用绝对路径,否则无法打开
file_abs = os.path.abspath(file_location)
# 将文件扩展名更改为.docx
new_file_name = re.sub(r'\.\w+$', '.docx', file_abs.split('\\')[-1])
new_file_abs = os.path.join(output_path, new_file_name)
if os.path.exists(new_file_abs):
return
if ".docx" in file_abs:
# 也可以使用copy
shutil.move(file_abs, new_file_abs)
return
# 创建一个用于打开Word文档的客户端对象,需要下载WPS或者Office(Microsoft Word)
word = win32.gencache.EnsureDispatch('Word.Application')
# 打开指定路径的文档
doc = word.Documents.Open(file_abs)
doc.Activate()
# 将当前活动文档保存为.docx格式
word.ActiveDocument.SaveAs(new_file_abs, FileFormat=constants.wdFormatXMLDocument)
# 关闭打开的文档资源
doc.Close(False)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--doc_file_dir', type=str, help='doc文件目录')
parser.add_argument('--docx_file_save_dir', type=str, help='docx文件保存目录')
args = parser.parse_args()
doc_file_dir = args.doc_file_dir
docx_file_save_dir = args.docx_file_save_dir
if not doc_file_dir:
raise ValueError("doc_file_dir is need input")
if not docx_file_save_dir:
docx_file_save_dir = doc_file_dir
# 检测用户输入的输出路径在windwos下是否为绝对路径
if not ":" in docx_file_save_dir:
docx_file_save_dir = os.path.abspath(docx_file_save_dir)
# 创建输出目录的文件夹
Path(docx_file_save_dir).mkdir(exist_ok=True)
# 获取目录下所有的doc文件
doc_file_locations = glob(f"{doc_file_dir}/*.doc*", recursive=True)
for path in tqdm(doc_file_locations):
save_as_docx(path, docx_file_save_dir)
time.sleep(0.1)
@Wzixiao
Copy link
Author

Wzixiao commented Jun 15, 2023

以下是一个例子:
python .\convert_doc_into_docx.py --doc_file_dir='./doc' --docx_file_save_dir="./docx"

  • doc_file_dir:doc文件目录(输入目录)
  • docx_file_save_dir:docx文件保存的目录(输出目录)

以上两个参数都可以使用绝对路径
docx_file_save_dir在未输入的情况下等同与doc_file_dir
电脑必须存在可以打开Word文档的软件,比如WPS或者Office(Microsoft Word)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment