Skip to content

Instantly share code, notes, and snippets.

@iandees
Created June 3, 2011 22:05
Show Gist options
  • Save iandees/1007256 to your computer and use it in GitHub Desktop.
Save iandees/1007256 to your computer and use it in GitHub Desktop.
An OSM "weirdness" detector: Reads minutely/hourly diffs and looks for oddly-shaped ways or ways that don't quite make sense.
#!/bin/python
import sys
import urllib2
import xml.etree.cElementTree as ElementTree
import simplejson as json
from datetime import datetime
import StringIO
import gzip
import time
import math
# Parse the diff and write out a simplified version
class OscHandler():
def __init__(self):
self.changes = {}
self.nodes = {}
self.ways = {}
self.relations = {}
self.action = ""
self.primitive = {}
self.missingNds = set()
def startElement(self, name, attributes):
if name in ('modify', 'delete', 'create'):
self.action = name
if name in ('node', 'way', 'relation'):
self.primitive['id'] = int(attributes['id'])
self.primitive['version'] = int(attributes['version'])
self.primitive['changeset'] = int(attributes['changeset'])
self.primitive['user'] = attributes.get('user')
self.primitive['timestamp'] = isoToTimestamp(attributes['timestamp'])
self.primitive['tags'] = {}
self.primitive['action'] = self.action
if name == 'node':
self.primitive['lat'] = float(attributes['lat'])
self.primitive['lon'] = float(attributes['lon'])
elif name == 'tag':
key = attributes['k']
val = attributes['v']
self.primitive['tags'][key] = val
elif name == 'way':
self.primitive['nodes'] = []
elif name == 'relation':
self.primitive['members'] = []
elif name == 'nd':
ref = int(attributes['ref'])
self.primitive['nodes'].append(ref)
if ref not in self.nodes:
self.missingNds.add(ref)
elif name == 'member':
self.primitive['members'].append(
{
'type': attributes['type'],
'role': attributes['role'],
'ref': attributes['ref']
})
def endElement(self, name):
if name == 'node':
self.nodes[self.primitive['id']] = self.primitive
elif name == 'way':
self.ways[self.primitive['id']] = self.primitive
elif name == 'relation':
self.relations[self.primitive['id']] = self.primitive
if name in ('node', 'way', 'relation'):
self.primitive = {}
def isoToTimestamp(isotime):
t = datetime.strptime(isotime, "%Y-%m-%dT%H:%M:%SZ")
return time.mktime(t.timetuple())
def distanceBetweenNodes(node1, node2):
dlat = math.fabs(node1['lat'] - node2['lat'])
dlon = math.fabs(node1['lon'] - node2['lon'])
return math.hypot(dlat, dlon)
def angleBetweenNodes(a, b, c):
d = ((a * a) + (b * b) - (c * c)) / (2.0 * a * b)
if 1.0 - d < 0.00001:
d = 1.0
elif 1.0 + d < 0.00001:
d = -1.0
return math.degrees(math.acos(d))
def requestNodes(ndlist):
url = "http://jxapi.openstreetmap.org/xapi/api/0.6/node/%s" % (",".join(map(str, ndlist)))
content = urllib2.urlopen(url)
return content
def parseOsm(source, handler):
for event, elem in ElementTree.iterparse(source, events=('start', 'end')):
if event == 'start':
handler.startElement(elem.tag, elem.attrib)
elif event == 'end':
handler.endElement(elem.tag)
elem.clear()
# from http://stackoverflow.com/questions/2348317/how-to-write-a-pager-for-python-iterators
def grouper( page_size, iterable ):
page= []
for item in iterable:
page.append( item )
if len(page) == page_size:
yield page
page= []
yield page
# from http://stackoverflow.com/questions/1335392/iteration-over-list-slices/1335466#1335466
def sliceIterator(lst, sliceLen):
for i in range(len(lst) - sliceLen + 1):
yield lst[i:i + sliceLen]
def minutelyUpdateRun():
# Read the state.txt
sf = open('state.txt', 'r')
state = {}
for line in sf:
if line[0] == '#':
continue
(k, v) = line.split('=')
state[k] = v.strip().replace("\\:", ":")
minuteNumber = int(isoToTimestamp(state['timestamp'])) / 60
print "Minute Number: %s" % (minuteNumber)
# Grab the sequence number and build a URL out of it
sqnStr = state['sequenceNumber'].zfill(9)
url = "http://planet.openstreetmap.org/minute-replicate/%s/%s/%s.osc.gz" % (sqnStr[0:3], sqnStr[3:6], sqnStr[6:9])
print "Downloading change file (%s)." % (url)
content = urllib2.urlopen(url)
content = StringIO.StringIO(content.read())
gzipper = gzip.GzipFile(fileobj=content)
print "Parsing change file."
handler = OscHandler()
parseOsm(gzipper, handler)
# Fetch from jxapi the nodes that weren't in the changeset
print "Filling in %d missing nodes." % (len(handler.missingNds))
ndchunk = []
for group in grouper(350, handler.missingNds):
parseOsm(requestNodes(group), handler)
# Now that we have the data in memory, start looking for suspicious-looking changes
print "%d nodes, %d ways, %d relations." % (len(handler.nodes), len(handler.ways), len(handler.relations))
print "Detecting problems."
## Ways should not have bends > 95deg in them
for way in handler.ways.itervalues():
if way['action'] == 'delete':
# Skip deleted ways for now
continue
prevNode = None
nds = way['nodes']
ndCount = len(nds)
if ndCount <= 1:
print "Action was %s" % (way['action'])
print "!!! Way %d consists of one or fewer nodes." % (way['id'])
elif ndCount == 2:
try:
node1 = handler.nodes[nds[0]]
node2 = handler.nodes[nds[1]]
except KeyError, e:
print "Node %s was not found in way %d. Skipping." % (e, way['id'])
continue
if distanceBetweenNodes(node1, node2) > 0.3:
print "!!! Way %d consists of two nodes that are far apart." % (way['id'])
else:
totalAngle = 0.0
for ndSlice in sliceIterator(nds, 3):
nd1 = ndSlice[0]
nd2 = ndSlice[1]
nd3 = ndSlice[2]
try:
A = handler.nodes[nd1]
B = handler.nodes[nd3]
C = handler.nodes[nd2]
except KeyError, e:
print "Node %s was not found in way %d. Skipping." % (e, way['id'])
continue
a = distanceBetweenNodes(C, B)
b = distanceBetweenNodes(C, A)
c = distanceBetweenNodes(A, B)
if 0.0 in (a, b, c):
if a == 0.0:
print "!!! Way %d has nodes %d and %d that overlap." % (way['id'], nd2, nd3)
if b == 0.0:
print "!!! Way %d has nodes %d and %d that overlap." % (way['id'], nd1, nd3)
if c == 0.0:
print "!!! Way %d has nodes %d and %d that overlap." % (way['id'], nd1, nd2)
continue
try:
totalAngle = totalAngle + angleBetweenNodes(a, b, c)
except ValueError, e:
print "a=%f b=%f c=%f" % (a, b, c)
raise e
averageAngle = totalAngle / (len(nds) - 2)
#print "Way %d has average angle %f." % (way['id'], averageAngle)
if averageAngle < 75.0 or averageAngle > 195.0:
print "!!! Way %d is abnormally shaped with an average angle of %f." % (way['id'], averageAngle)
#sys.exit(0)
# Download the next state file
nextSqn = int(state['sequenceNumber']) + 1
sqnStr = str(nextSqn).zfill(9)
url = "http://planet.openstreetmap.org/minute-replicate/%s/%s/%s.state.txt" % (sqnStr[0:3], sqnStr[3:6], sqnStr[6:9])
try:
u = urllib2.urlopen(url)
statefile = open('state.txt', 'w')
statefile.write(u.read())
statefile.close()
except Exception, e:
print e
return False
return True
if __name__ == "__main__":
result = minutelyUpdateRun()
while result is True:
result = minutelyUpdateRun()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment