Skip to content

Instantly share code, notes, and snippets.

@mlbright
Created August 13, 2009 15:30
Show Gist options
  • Save mlbright/167238 to your computer and use it in GitHub Desktop.
Save mlbright/167238 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# Ou bouffer a Paris
import urllib2
import re
import time
import random
import sys
try:
import json
except:
print >> sys.stderr, "no json module: use python 2.6"
sys.exit()
mapanchor = r'<a href="(http://maps.google\S+)"'
url = "http://www.francoissimon.typepad.fr/simonsays/page/"
html = """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<title>F. Simon food map</title>
<script src="http://maps.google.com/maps?file=api&amp;v=2&amp;key=ABQIAAAARvs3FSKHhSiQdG-C9Z_BlxTwrmJHW8spEO7GXWIUI9I6BwQtYhSb_REtz-ic7NME5Rc7dhhSkul-aQ"
type="text/javascript"></script>
<script type="text/javascript">
function initialize() {
if (GBrowserIsCompatible()) {
var map = new GMap2(document.getElementById("map_canvas"));
map.addControl(new GLargeMapControl());
map.setCenter(new GLatLng(48.856667,2.350987), 12);
var a = %s;
for (var i = 0; i < a.length; i++) {
var point = new GLatLng(parseFloat(a[i][0]),parseFloat(a[i][1]));
map.addOverlay(new GMarker(point));
}
}
}
</script>
</head>
<body onload="initialize()" onunload="GUnload()">
<div id="map_canvas" style="width: 1000px; height: 700px"></div>
</body>
</html>
"""
link = re.compile(mapanchor)
count = 0
pages = []
while True:
time.sleep(random.randint(1,5))
count = count + 1
page = url + str(count)
try:
fspage = urllib2.urlopen(page).read()
except:
break
else:
pages.append(fspage)
maps = {}
for fspage in pages:
for map in re.finditer(link, fspage):
address = map.group(1)
maps.setdefault(address)
coordinates_pattern = re.compile(r"ll=([\d.]+),([\d.]+)")
coordinates = []
for u in maps:
m = coordinates_pattern.search(u)
if m:
lon = m.group(1)
lat = m.group(2)
coordinates.append((lon,lat))
a = json.JSONEncoder().encode(coordinates)
print html % (a)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment