mudssrali/pdf-to-csv.py

## pdf-to-csv.py
import pandas as pd
from tabula import read_pdf

# Specify file name
FILE_NAME = "sample.pdf"
# Total Pages
TOTAL_PAGES = 2

# Read the first page.
final_frame = read_pdf(FILE_NAME, pages="1")[0]

for page in range(1, TOTAL_PAGES):
    data = read_pdf(FILE_NAME, pages=page)[0]
    data.columns = final_frame.columns
    final_frame = pd.concat([final_frame, data], ignore_index=True)

    print("Page", page, "Size", len(final_frame))

# Write final frame (records) to CSV
final_frame.to_csv("output.csv")

# See the output
print(final_frame)

# Log the records length
print("Total Size (in Rows): ", len(final_frame))
	import pandas as pd
	from tabula import read_pdf

	# Specify file name
	FILE_NAME = "sample.pdf"
	# Total Pages
	TOTAL_PAGES = 2

	# Read the first page.
	final_frame = read_pdf(FILE_NAME, pages="1")[0]

	for page in range(1, TOTAL_PAGES):
	data = read_pdf(FILE_NAME, pages=page)[0]
	data.columns = final_frame.columns
	final_frame = pd.concat([final_frame, data], ignore_index=True)

	print("Page", page, "Size", len(final_frame))

	# Write final frame (records) to CSV
	final_frame.to_csv("output.csv")

	# See the output
	print(final_frame)

	# Log the records length
	print("Total Size (in Rows): ", len(final_frame))