Skip to content

Instantly share code, notes, and snippets.

@neuroid
Created March 17, 2019 23:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save neuroid/da16cff0b3849fcb82f7e60be52af4fa to your computer and use it in GitHub Desktop.
Save neuroid/da16cff0b3849fcb82f7e60be52af4fa to your computer and use it in GitHub Desktop.
Parser for OVO Energy (ovoenergy.com) statement PDFs
#!/usr/bin/env python3
"""Parse OVO Energy (ovoenergy.com) statement PDFs.
Requires the pdftotext command (part of poppler-utils).
"""
import csv
from datetime import datetime
import subprocess
import sys
def pdftotext(path):
process = subprocess.run(['pdftotext', '-layout', path, '-'],
capture_output=True, check=True, text=True)
return process.stdout
def parse(text):
def nopound(amount):
return amount.replace('£', '')
for line in text.splitlines():
line = line.strip()
if line.startswith('Electricity Used'):
usage = {'fuel': 'electricity', 'units': []}
elif line.startswith('Gas Used'):
usage = {'fuel': 'gas', 'units': []}
elif line.startswith('Charge period from'):
usage['period_start'], usage['period_end'] = map(
lambda date: datetime.strptime(date, '%d %B %Y').date(),
line[19:].split(' to '))
elif line.startswith('Price £/kWh'):
unit_price, *_, units = line[12:].split()
usage['units'].append({'unit_price': nopound(unit_price),
'units': units})
elif (line.startswith('Cost of electricity used') or
line.startswith('Cost of gas used')):
usage['units'][-1]['cost'] = nopound(line.rsplit(None, 1)[-1])
elif line.startswith('Standing charge for'):
usage['standing_charge'] = nopound(line.rsplit(None, 1)[-1])
elif (line.startswith('Cost of electricity supplied') or
line.startswith('Cost of gas supplied')):
usage['total_cost'] = nopound(line.rsplit(None, 1)[-1])
yield usage
usage = None
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('Usage: {} statement.pdf [statement2.pdf]...'.format(
sys.argv[0]))
writer = csv.DictWriter(sys.stdout, ['period_start', 'period_end', 'fuel',
'unit_price', 'units', 'cost',
'standing_charge', 'total_cost'])
writer.writeheader()
for path in sys.argv[1:]:
for usage in parse(pdftotext(path)):
for item in usage['units'][0:len(usage['units'])-1]:
writer.writerow(dict(usage, **item,
standing_charge=None,
total_cost=None))
writer.writerow(dict(usage, **usage['units'][-1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment