Skip to content

Instantly share code, notes, and snippets.

@mthh
Created May 25, 2023 09:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mthh/7f9a3d2e3f58986fb2e3b5332097352e to your computer and use it in GitHub Desktop.
Save mthh/7f9a3d2e3f58986fb2e3b5332097352e to your computer and use it in GitHub Desktop.
from pdf2docx import parse
import os
if __name__ == '__main__':
# Le chemin du dossier qui contient les pdf
path_input = '/home/mthh/Téléchargements/'
# Le chemin du dossier de sorties qui va acceuillir les fichiers docx
path_output = '/home/mthh/Téléchargements/output/'
# On créé le dossier de sortie s'il n'existe pas
if not os.path.exists(path_output):
os.makedirs(path_output)
# On liste les fichiers pdf du dossier d'entrées
files = [fp for fp in os.listdir(path_input) if 'pdf' in fp.lower()]
# Pour chaque fichier, on fait la conversion
for file_name in files:
parse(pdf_file=path_input + file_name, docx_file=path_output + file_name[:-4] + '.docx')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment