kenzo0107/read_file.py Secret

## read_file.py
import os
import sys
from docx import Document
from pypdf import PdfReader

# .docx
def read_docx(filepath):
    doc = Document(filepath)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return "\n".join(full_text)

# .pdf
def read_pdf(filepath):
    reader = PdfReader(filepath)
    full_text = ""
    for p in reader.pages:
        full_text += p.extract_text()
    return full_text

# .txt, .md etc...
def read_txt(filepath):
    with open(filepath, 'r') as file:
        content = file.read()
    return content

def main(filepath):
    _, ext = os.path.splitext(filepath)

    if ext == '.docx':
        t = read_docx(filepath)
    elif ext == '.pdf':
        t = read_pdf(filepath)
    else:
        t = read_txt(filepath)

    print(t)


if __name__ == "__main__":
    filepath = sys.argv[1]
    main(filepath)
	import os
	import sys
	from docx import Document
	from pypdf import PdfReader

	# .docx
	def read_docx(filepath):
	doc = Document(filepath)
	full_text = []
	for para in doc.paragraphs:
	full_text.append(para.text)
	return "\n".join(full_text)

	# .pdf
	def read_pdf(filepath):
	reader = PdfReader(filepath)
	full_text = ""
	for p in reader.pages:
	full_text += p.extract_text()
	return full_text

	# .txt, .md etc...
	def read_txt(filepath):
	with open(filepath, 'r') as file:
	content = file.read()
	return content

	def main(filepath):
	_, ext = os.path.splitext(filepath)

	if ext == '.docx':
	t = read_docx(filepath)
	elif ext == '.pdf':
	t = read_pdf(filepath)
	else:
	t = read_txt(filepath)

	print(t)


	if __name__ == "__main__":
	filepath = sys.argv[1]
	main(filepath)