-
-
Save me-suzy/229072c144982840167cece05cd0427e to your computer and use it in GitHub Desktop.
Python: Convert doc to docx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
from pathlib import Path | |
import win32com.client | |
from docx2pdf import convert | |
# The location where the files are located | |
input_path = r'c:\Folder7\input' | |
# The location where we will write the PDF files | |
output_path = r'c:\Folder7\output' | |
# Create the output directory if it doesn't exist | |
os.makedirs(output_path, exist_ok=True) | |
# Check if the input directory exists | |
directory_path = Path(input_path) | |
if not directory_path.exists() or not directory_path.is_dir(): | |
print(directory_path, "is invalid") | |
sys.exit(1) | |
# Convert .doc files to .docx | |
word = win32com.client.Dispatch("Word.Application") | |
for file_path in directory_path.glob("*.doc"): | |
docx_file_path = os.path.join(output_path, file_path.stem + ".docx") | |
doc = word.Documents.Open(str(file_path)) | |
doc.SaveAs(docx_file_path, FileFormat=16) # FileFormat 16 is for .docx | |
doc.Close() | |
word.Quit() | |
# Convert each .docx file to .pdf | |
for file_path in directory_path.glob("*.docx"): | |
print("Converting file:", file_path) | |
output_file_path = os.path.join(output_path, file_path.stem + ".pdf") | |
convert(file_path, output_file_path) | |
print("Converted file:", file_path, "to", output_file_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment