Skip to content

Instantly share code, notes, and snippets.

@Peetz0r
Created January 13, 2015 14:55
Show Gist options
  • Save Peetz0r/91f484a97cc1128d7250 to your computer and use it in GitHub Desktop.
Save Peetz0r/91f484a97cc1128d7250 to your computer and use it in GitHub Desktop.
Ben facturen (pdf) to html+svg data usage graphs
#!/usr/bin/env python2
from __future__ import print_function
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from calendar import monthrange
import StringIO, re, glob, os, pygal, locale, datetime, sys
locale.setlocale(locale.LC_ALL, '')
all_months_data = []
all_months_rendered = []
filenames = glob.glob("FACT*_*.pdf")
filenames.sort()
out = '<!doctype html>'
out += '<html>'
out += '<head>'
out += '<meta charset="utf-8">'
out += '<title>Mobile data usage by day and by month</title>'
out += '<style>'
out += '* {'
out += 'background-color: black;'
out += 'padding: 0;'
out += 'margin: 0;'
out += 'text-align: center;'
out += 'line-height: 200%;'
out += 'color: white;'
out += 'font-family: sans-serif;'
out += '}'
out += 'svg {'
out += 'margin: 50px;'
out += '}'
out += '</style>'
out += '</head>'
out += '<body>'
for file_nr, filename in enumerate(filenames):
print('[%s>%s]' % ('='*file_nr*2, ' '*(len(filenames)*2-file_nr*2)), end='\r')
sys.stdout.flush()
rsrcmgr = PDFResourceManager()
retstr = StringIO.StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec)
interpreter = PDFPageInterpreter(rsrcmgr, device)
fp = file(filename, 'rb')
for page in PDFPage.get_pages(fp):
interpreter.process_page(page)
fp.close()
device.close()
m = re.findall('Internet(\d{2}-\d{2}-\d{4})\d{2}:\d{2}:\d{2}\s*(\d+) kb', retstr.getvalue())
retstr.close()
total = 0.0
now = datetime.datetime.strptime(m[0][0], '%d-%m-%Y')
data_list = [0]*monthrange(now.year, now.month)[1]
for day in m:
day_nr = datetime.datetime.strptime(day[0], '%d-%m-%Y').day-1
data_list[day_nr] += int(day[1])/1024
total += int(day[1])
all_months_data.append([now.strftime('%b%y'), round(total/1024/1024, 2)])
bar_chart = pygal.Bar(width=1000, height=500, explicit_size=True, label_font_size=12)
bar_chart.title = now.strftime('Mobile data usage by day in %B %Y (total: '+'%.2f GB' % (total/1024/1024)+')')
bar_chart.x_labels = map(str, range(1,len(data_list)+1))
bar_chart.add('MB', data_list)
all_months_rendered.append(bar_chart.render())
all_months_rendered.reverse()
bar_chart = pygal.Bar(width=1000, height=500, explicit_size=True, x_label_rotation=30, label_font_size=14)
bar_chart.title = 'Mobile data usage by month'
bar_chart.add('GB', list(map(list.pop, all_months_data)))
bar_chart.x_labels = list(map(list.pop, all_months_data))
out += bar_chart.render()
out += ''.join(all_months_rendered)
out += '<hr>Generated at '
out += datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
out += '</body>'
out += '</html>'
with open('mobile-data-usage.html', 'w') as fp:
fp.write(out)
print('[%s]' % ('='*(len(filenames)*2+1)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment