Skip to content

Instantly share code, notes, and snippets.

@Basemera
Last active October 12, 2022 14:58
Show Gist options
  • Save Basemera/0ac584819156355a6b724871d06a0daa to your computer and use it in GitHub Desktop.
Save Basemera/0ac584819156355a6b724871d06a0daa to your computer and use it in GitHub Desktop.
import pathlib
from tkinter import filedialog as fd
from PyPDF2 import PdfFileReader
from docx import Document
def open_file_selection():
filenames = fd.askopenfilenames()
for filename in filenames:
extension = pathlib.Path(filename).suffix
if extension == '.pdf':
reader = PdfFileReader(filename)
no_pages = reader.numPages
i = 0
while i < no_pages:
page = reader.pages[i]
print(page.extract_text())
i += 1
elif extension == '.txt':
with open(filename, 'r') as f:
read_data = f.read()
print(read_data)
elif extension in ['.doc', '.docx']:
doc = Document(filename)
all_paras = doc.paragraphs
for para in all_paras:
print(para.text)
else:
print("Can't read files with extension {} for file {}".format(extension, filename))
open_file_selection()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment