Skip to content

Instantly share code, notes, and snippets.

@me-suzy
Created April 24, 2024 07:01
Show Gist options
  • Save me-suzy/229072c144982840167cece05cd0427e to your computer and use it in GitHub Desktop.
Save me-suzy/229072c144982840167cece05cd0427e to your computer and use it in GitHub Desktop.
Python: Convert doc to docx
import os
import sys
from pathlib import Path
import win32com.client
from docx2pdf import convert
# The location where the files are located
input_path = r'c:\Folder7\input'
# The location where we will write the PDF files
output_path = r'c:\Folder7\output'
# Create the output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)
# Check if the input directory exists
directory_path = Path(input_path)
if not directory_path.exists() or not directory_path.is_dir():
print(directory_path, "is invalid")
sys.exit(1)
# Convert .doc files to .docx
word = win32com.client.Dispatch("Word.Application")
for file_path in directory_path.glob("*.doc"):
docx_file_path = os.path.join(output_path, file_path.stem + ".docx")
doc = word.Documents.Open(str(file_path))
doc.SaveAs(docx_file_path, FileFormat=16) # FileFormat 16 is for .docx
doc.Close()
word.Quit()
# Convert each .docx file to .pdf
for file_path in directory_path.glob("*.docx"):
print("Converting file:", file_path)
output_file_path = os.path.join(output_path, file_path.stem + ".pdf")
convert(file_path, output_file_path)
print("Converted file:", file_path, "to", output_file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment