Skip to content

Instantly share code, notes, and snippets.

@neuroid
Last active October 11, 2021 13:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save neuroid/a5e7b8a54a90ae2809cbf35716c127cd to your computer and use it in GitHub Desktop.
Save neuroid/a5e7b8a54a90ae2809cbf35716c127cd to your computer and use it in GitHub Desktop.
Parser for Bulb Energy (bulb.co.uk) statement PDFs
#!/usr/bin/env python3
"""Parse Bulb Energy (bulb.co.uk) statement PDFs.
Requires the pdftotext command (part of poppler-utils).
"""
from collections import defaultdict
import csv
from datetime import datetime
from decimal import Decimal
import subprocess
import sys
def pdftotext(path):
process = subprocess.run(['pdftotext', '-layout', path, '-'],
capture_output=True, check=True, text=True)
return process.stdout
def parse(text):
lines = text.splitlines()
for i, line in enumerate(lines):
line = line.strip()
try:
next_line = lines[i + 1].strip()
except IndexError:
next_line = None
if line.startswith('For the period'):
year = next_line[:4]
year = ' ' + year if year.isdigit() else ''
period_start, period_end = map(
lambda date: datetime.strptime(date, '%d %B %Y').date(),
(line[15:].split(' ')[0] + year).split(' to '))
elif line.startswith('Electricity use in detail'):
usage = defaultdict(Decimal,
period_start=period_start,
period_end=period_end,
fuel='electricity')
elif line.startswith('Gas use in detail'):
usage = defaultdict(Decimal,
period_start=period_start,
period_end=period_end,
fuel='gas')
elif line.startswith('Meter units used:'):
usage['meter_units'] = line[18:].split()[0]
elif (line.startswith('Energy ')
or line.startswith('Energy(1) ')
or line.startswith('Energy*')
or line.startswith('Energy*(1)')):
if 'p/kWh' in line:
_, kwh, _, _, kwh_price, _, _, cost, *_ = line.split()
else:
_, kwh, _, _, kwh_price, _, cost, *_ = line.split()
usage['kwh'] += Decimal(kwh)
usage['kwh_price'] = Decimal(kwh_price) / 100
usage['cost'] += Decimal(cost)
elif (line.startswith('Standing charge ')
or line.startswith('Standing charge(1) ')
or line.startswith('Standing charge(1) ')):
usage['standing_charge'] += Decimal(
line.split(None, 8)[-1].split()[0])
elif (line.startswith('Total electricity costs for this bill')
or line.startswith('Total gas costs for this bill')):
usage['total_cost'] = line.split('£')[1].split()[0]
yield usage
usage = None
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('Usage: {} statement.pdf [statement2.pdf]...'.format(
sys.argv[0]))
writer = csv.DictWriter(sys.stdout, ['period_start', 'period_end', 'fuel',
'meter_units', 'kwh_price', 'kwh',
'cost', 'standing_charge',
'total_cost'])
writer.writeheader()
for usage in sorted((usage for path in sys.argv[1:]
for usage in parse(pdftotext(path))),
key=lambda usage: (usage['period_start'],
usage['period_end'])):
writer.writerow(usage)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment