Skip to content

Instantly share code, notes, and snippets.

@programandaana
Created February 7, 2024 13:36
Show Gist options
  • Save programandaana/4c9f3b51c431ca20dd44f45db6c8c343 to your computer and use it in GitHub Desktop.
Save programandaana/4c9f3b51c431ca20dd44f45db6c8c343 to your computer and use it in GitHub Desktop.
#Colar no Jupyter Notebook
import fitz # PyMuPDF
def extract_text_from_pdf(pdf_path):
text = ""
try:
# Abrir o documento
pdf_document = fitz.open(pdf_path)
# Iterar entre as páginas
for page_number in range(pdf_document.page_count):
# Conseguir a página
page = pdf_document[page_number]
# Extrair texto da página
text += page.get_text()
# Fechar o documento
pdf_document.close()
except Exception as e:
print(f"Ocorreu um erro: {e}")
return text
# Exemplo
pdf_path = "nome_do_arquivo.pdf"
extracted_text = extract_text_from_pdf(pdf_path)
# Imprimir ou usar o texto
print(extracted_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment