Skip to content

Instantly share code, notes, and snippets.

@emcniece
Created December 31, 2020 21:35
Show Gist options
  • Save emcniece/cf2589ff9d6480b5023fdb641eab3355 to your computer and use it in GitHub Desktop.
Save emcniece/cf2589ff9d6480b5023fdb641eab3355 to your computer and use it in GitHub Desktop.
Parse CRD utility water bill PDF
from pdfreader import SimplePDFViewer
def to_float(string):
return float(string.strip())
def extract_usage_from_pdf(file_name):
fd = open(file_name, "rb")
viewer = SimplePDFViewer(fd)
viewer.render()
balance = None
consumption = None
billing_rate = None
for i, string in enumerate(viewer.canvas.strings):
if string.strip() == 'Balance Due':
balance = to_float(viewer.canvas.strings[i+1])
if string.strip() == 'Water Consumption Charge':
consumption = to_float(viewer.canvas.strings[i+1])
billing_str = viewer.canvas.strings[i+2].replace('x', '').replace('$', '').strip()
billing_rate = to_float(billing_str)
return(balance, consumption, billing_rate)
if __name__ == '__main__':
file_name = 'crd-invoice-oct2020.pdf'
balance, consumption, billing_rate = extract_usage_from_pdf(file_name)
print('Balance:', balance)
print('consumption:', consumption)
print('billing_rate:', billing_rate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment