Skip to content

Instantly share code, notes, and snippets.

@miklobit
Created December 9, 2013 19:48
Show Gist options
  • Save miklobit/7879551 to your computer and use it in GitHub Desktop.
Save miklobit/7879551 to your computer and use it in GitHub Desktop.
import urllib
import urllib2
import os.path
from lxml import etree
import string
def output_node(node, message):
link = e.attrib['v'].encode('utf-8')
link = link.replace("pl:", "")
print
print message
print ("http://www.openstreetmap.org/node/"+node.attrib['id']).encode('utf-8')
print "https://pl.wikipedia.org/wiki/"+urllib2.quote(link)
print node.attrib['lat'].encode('utf-8')
print node.attrib['lon'].encode('utf-8')
data = etree.parse("..\cyclemap\interpreter.osm")
for node in data.getiterator("node"):
for e in node:
if e.attrib['k'] == "wikipedia":
link = e.attrib['v'].encode('utf-8')
if string.find(link, "pl:")== -1:
output_node(node, "missing pl:")
else:
link = link.replace("pl:", "")
filename = "cache\\"+link+".txt"
if not os.path.isfile(filename):
print "fetching "+link
file = open(filename, 'w')
f = urllib.urlopen("https://pl.wikipedia.org/wiki/"+urllib2.quote(link))
file.write(f.read())
file.close()
file = open(filename, 'r')
text = file.read()
#<span class="latitude">50°04'02”N</span>&#160;<span class="longitude">19°55'03”E</span>
if string.find(text, "<span class=\"latitude\">")== -1:
if string.find(text, "W Wikipedii nie ma jeszcze artykułu pod taką nazwą. Możesz wykonać jedną z poniższych czynności:") != -1:
output_node(node, "missing article at wiki:")
else:
output_node(node, "missing coordinates at wiki:")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment