Created
May 27, 2012 16:11
-
-
Save mdornseif/2814922 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Hackish Script to leech Karst/Cave Data from | |
# http://environnement.wallonie.be/cartosig/index.asp | |
# See http://cavehackers.de/77347676 for some background | |
import codecs | |
import urllib2 | |
import re | |
import subprocess | |
# To get decent "GPS" Coordinates: | |
# gdaltransform -s_srs EPSG:31370 -t_srs EPSG:4326 | |
out_s = {} | |
out_o = {} | |
def extract(url): | |
data = urllib2.urlopen(url).read().replace(' ', ' ').decode('latin-1') | |
# print data | |
name_re = \ | |
re.compile(r'Nom :</td>.+?<td align=left wrap class=text >.+?<b>(.*?)</b>.+?</td>' | |
, re.DOTALL | re.MULTILINE) | |
lambert_re = \ | |
re.compile(r'Coordonnées Lambert 72:.+?</td>.+?<td align=left wrap class=text>(.+?)</td>' | |
, re.DOTALL) | |
description_re = \ | |
re.compile(r'Description : </td>.+?<td align=left class=text>(.*?)</td>' | |
, re.DOTALL | re.MULTILINE) | |
laenge_re = \ | |
re.compile(r'veloppement.*?:.*?</td>.+?<td align=left class=text>(.*?)</td>' | |
, re.DOTALL | re.MULTILINE) | |
interesse_re = \ | |
re.compile(r'ts du site.*?:.*?</td>.+?<td align=left class=text>(.*?)</td>' | |
, re.DOTALL | re.MULTILINE) | |
# Denivellation : 6.0 | |
interest = interesse_re.search(data) | |
if not interest: | |
# nothing found | |
return False | |
lambert = ' '.join(lambert_re.search(data).group(1).strip().split()) | |
lambert = re.sub(r'[XY:m,site a\(\)]', ' ', lambert) | |
lambert = re.sub(r' +', ' ', lambert).strip() | |
gps = subprocess.check_output(['sh', '-c', | |
'echo \'%s\' | gdaltransform -s_srs EPSG:31370 -t_srs EPSG:4326' | |
% lambert]) | |
(lon, lat, h) = gps.split() | |
interest = interest.group(1).strip().lower() | |
interest = interest.replace('<i>', '').replace('</i>', '') | |
interest = interest.replace('&', ' ').replace('/', ' ') | |
interest = interest.replace(' et ', ' ').replace(',', ' ').strip('. ') | |
interest = [x.strip('.()? ') for x in interest.split()] | |
interest = [x for x in interest if x != 'et'] | |
interest = list(set([x.strip() for x in interest if x.strip()])) | |
interest = ', '.join(sorted(interest, reverse=True)) | |
if not interest: | |
interest = '?' | |
name = ' '.join(name_re.search(data).group(1).strip().split()) | |
name = name.replace('&', '&') | |
beschreibung = description_re.search(data).group(1).strip() | |
beschreibung = beschreibung.replace('&', '&') | |
if u'spéléologique' in interest: | |
out = out_s | |
else: | |
out = out_o | |
if interest not in out: | |
out[interest] = [] | |
out[interest].append(dict(name=name, lambert=lambert, gps='%s, %s' % (lon, | |
lat), beschreibung=beschreibung, | |
laenge=laenge_re.search(data).group(1).strip(), | |
interest=interest, url=url)) | |
print out[interest][-1] | |
return True | |
## Um Raeren | |
# for count in range(1, 110): | |
# url = 'http://carto1.wallonie.be/documents/Karst/fiche_karst.idc?AKWANUM=431-%03d' % count | |
# print "---------" | |
# print url | |
# extract(url) | |
# for count in range(1, 74): | |
# url = 'http://carto1.wallonie.be/documents/Karst/fiche_karst.idc?AKWANUM=432-%03d' % count | |
# print "---------" | |
# print url | |
# extract(url) | |
# Anderes | |
for a in range(40, 61): | |
for b in range(1, 10): | |
for count in range(1, 500): | |
url = \ | |
'http://carto1.wallonie.be/documents/Karst/fiche_karst.idc?AKWANUM=%d%d-%03d' \ | |
% (a, b, count) | |
print '---------' | |
print url | |
if not extract(url): | |
break | |
# http://carto1.wallonie.be/documents/karst/site/42650z.html | |
# http://carto1.wallonie.be/documents/karst/site/42659z.html | |
# http://carto1.wallonie.be/documents/karst/site/47744z.html | |
# http://carto1.wallonie.be/documents/karst/site/47810z.html | |
# http://carto1.wallonie.be/documents/karst/site/47825z.html | |
# http://carto1.wallonie.be/documents/karst/site/48217z.html | |
# http://carto1.wallonie.be/documents/karst/site/48521z.html | |
# http://carto1.wallonie.be/documents/karst/site/492139z.html | |
# http://carto1.wallonie.be/documents/karst/site/492151z.html | |
# http://carto1.wallonie.be/documents/karst/site/492153z.html | |
# http://carto1.wallonie.be/documents/karst/site/492155z.html | |
# http://carto1.wallonie.be/documents/karst/site/4926z.html | |
# http://carto1.wallonie.be/documents/karst/site/49367z.html | |
# http://carto1.wallonie.be/documents/karst/site/49377z.html | |
# http://carto1.wallonie.be/documents/karst/site/49382z.html | |
# http://carto1.wallonie.be/documents/karst/site/496101z.html | |
# http://carto1.wallonie.be/documents/karst/site/5065z.html | |
# http://carto1.wallonie.be/documents/karst/site/53442z.html | |
# http://carto1.wallonie.be/documents/karst/site/538245b.html | |
# http://carto1.wallonie.be/documents/karst/site/55113Z.html | |
# http://carto1.wallonie.be/documents/karst/site/5522z.html | |
# http://carto1.wallonie.be/documents/karst/site/5522z.html | |
# http://carto1.wallonie.be/documents/karst/site/5555Z.html | |
# http://carto1.wallonie.be/documents/karst/site/59235z.html | |
# generate kml | |
fd = codecs.open('carto1.wallonie.be.kml', mode='w', encoding='utf-8') | |
fd.write('''<?xml version="1.0" encoding="UTF-8"?> | |
<kml xmlns="http://www.opengis.net/kml/2.2"> | |
<Document> | |
''') | |
for out, fname in [(out_s, 'Speleo'), (out_o, 'Other')]: | |
fd.write(''' <Folder><name>%s</name>\n''' % fname) | |
for kat in sorted(out.keys()): | |
fd.write(''' <Folder><name>%s</name>''' % kat) | |
for loc in out[kat]: | |
fd.write(''' <Placemark><name>%(name)s</name> | |
<description>%(beschreibung)s | |
%(laenge)s | |
%(interest)s | |
%(url)s | |
</description> | |
<Point><coordinates>%(gps)s,0</coordinates> | |
</Point>\n </Placemark>\n''' % loc) | |
fd.write(' </Folder>\n') | |
fd.write(' </Folder>\n') | |
fd.write('</Document>\n</kml>') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment