Created
October 28, 2012 12:46
-
-
Save Gnonthgol/3968512 to your computer and use it in GitHub Desktop.
Scraperwiki som hentar veglengder frå veglistene til vegvesenet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Scraperwiki som hentar veglengder frå veglistene til vegvesenet | |
#Av Gnonthgol | |
import scraperwiki | |
import urllib2 | |
import lxml.etree | |
import re | |
res = {} | |
#Henta frå http://www.vegvesen.no/Kjoretoy/Yrkestransport/Veglister+og+dispensasjoner/Veglister+2012 | |
url = "http://www.vegvesen.no/_attachment/314828/binary/553942" | |
pdfdata = urllib2.urlopen(url).read() | |
xmldata = scraperwiki.pdftoxml(pdfdata) | |
root = lxml.etree.fromstring(xmldata) | |
pages = list(root) | |
for page in pages: | |
ref = None | |
for el in list(page): | |
if el.tag == "text" and el.text: | |
if el.text.find('FV ') == 0 or el.text.find('KV ') == 0: | |
ref = el.text.strip(" *") | |
if ref and re.match("(\d*,\d{3})", el.text): | |
if not res.has_key(ref): | |
res[ref] = 0 | |
val = float(re.match("(\d*,\d{3})", el.text).groups()[0].replace(",", ".")) | |
res[ref] = res[ref] + val | |
ref = None | |
for ref in res: | |
print ref, res[ref] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment