Created
March 17, 2019 23:29
-
-
Save neuroid/da16cff0b3849fcb82f7e60be52af4fa to your computer and use it in GitHub Desktop.
Parser for OVO Energy (ovoenergy.com) statement PDFs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Parse OVO Energy (ovoenergy.com) statement PDFs. | |
Requires the pdftotext command (part of poppler-utils). | |
""" | |
import csv | |
from datetime import datetime | |
import subprocess | |
import sys | |
def pdftotext(path): | |
process = subprocess.run(['pdftotext', '-layout', path, '-'], | |
capture_output=True, check=True, text=True) | |
return process.stdout | |
def parse(text): | |
def nopound(amount): | |
return amount.replace('£', '') | |
for line in text.splitlines(): | |
line = line.strip() | |
if line.startswith('Electricity Used'): | |
usage = {'fuel': 'electricity', 'units': []} | |
elif line.startswith('Gas Used'): | |
usage = {'fuel': 'gas', 'units': []} | |
elif line.startswith('Charge period from'): | |
usage['period_start'], usage['period_end'] = map( | |
lambda date: datetime.strptime(date, '%d %B %Y').date(), | |
line[19:].split(' to ')) | |
elif line.startswith('Price £/kWh'): | |
unit_price, *_, units = line[12:].split() | |
usage['units'].append({'unit_price': nopound(unit_price), | |
'units': units}) | |
elif (line.startswith('Cost of electricity used') or | |
line.startswith('Cost of gas used')): | |
usage['units'][-1]['cost'] = nopound(line.rsplit(None, 1)[-1]) | |
elif line.startswith('Standing charge for'): | |
usage['standing_charge'] = nopound(line.rsplit(None, 1)[-1]) | |
elif (line.startswith('Cost of electricity supplied') or | |
line.startswith('Cost of gas supplied')): | |
usage['total_cost'] = nopound(line.rsplit(None, 1)[-1]) | |
yield usage | |
usage = None | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
sys.exit('Usage: {} statement.pdf [statement2.pdf]...'.format( | |
sys.argv[0])) | |
writer = csv.DictWriter(sys.stdout, ['period_start', 'period_end', 'fuel', | |
'unit_price', 'units', 'cost', | |
'standing_charge', 'total_cost']) | |
writer.writeheader() | |
for path in sys.argv[1:]: | |
for usage in parse(pdftotext(path)): | |
for item in usage['units'][0:len(usage['units'])-1]: | |
writer.writerow(dict(usage, **item, | |
standing_charge=None, | |
total_cost=None)) | |
writer.writerow(dict(usage, **usage['units'][-1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment