Skip to content

Instantly share code, notes, and snippets.

@maxerickson
Last active February 5, 2020 01:05
Show Gist options
  • Save maxerickson/eafdf72f34dc908bd4c373521e958794 to your computer and use it in GitHub Desktop.
Save maxerickson/eafdf72f34dc908bd4c373521e958794 to your computer and use it in GitHub Desktop.
Split an osm xml file into pieces of a given size, outputting reasonably compact, referentially complete chunks.
#!python3
import os.path
import argparse
import collections
import operator
import xml.etree.ElementTree as ElementTree
import geohash_hilbert
def geohash(node):
lat=float(node.element.attrib['lat'])
lon=float(node.element.attrib['lon'])
return geohash_hilbert.encode(lon,lat)
class Item:
def __init__(self, osmelement):
self.element=osmelement
self.osmid=osmelement.attrib["id"]
self.osmtype=osmelement.tag
self.uid=self.osmtype+"/"+self.osmid
self.modified=False
if osmelement.attrib.get('action', None)=='modify':
self.modified=True
self.refs=list()
self.tags=dict()
for grandkid in osmelement:
if grandkid.tag=="tag":
k=grandkid.attrib['k']
v=grandkid.attrib['v']
self.tags[k]=v
if grandkid.tag=="nd":
self.refs.append("node/"+grandkid.attrib["ref"])
if grandkid.tag=="member":
self.refs.append(grandkid.attrib["type"]+"/"+grandkid.attrib["ref"])
def __str__(self):
return self.uid
def add_tag(self, key, value, overwrite=False):
if not overwrite and key in self.tags:
raise ValueError("Key already exists.")
e=ElementTree.Element("tag",attrib={"k":key,"v":value})
self.element.append(e)
self.element.set('action', 'modify')
self.modified=True
def remove_tag(self, key):
e=self.element.find("./tag[@k='{}']".format(key))
if e is not None:
self.element.remove(e)
self.element.set('action', 'modify')
self.modified=True
class OSMTree:
def __init__(self, tree=None):
self.items=dict()
self.parent_map=collections.defaultdict(list)
if tree is not None:
for child in tree:
if child.tag in ['node','way','relation']:
nit=Item(child)
self.add(nit)
def add(self, item):
self.items[item.uid]=item
for element in item.refs:
self.parent_map[element].append(item.uid)
def remove(self, item):
del self.items[item.uid]
def get_related(self, item):
related=set()
visited=set()
tocheck=[item]
while tocheck:
curitem=tocheck.pop(0)
if curitem in visited:
continue
else:
visited.add(curitem)
for parent in self.parent_map.get(curitem.uid,[]):
pitem=self.items[parent]
related.add(pitem)
tocheck.append(pitem)
for ref in curitem.refs:
if ref in self.items:
ritem=self.items[ref]
related.add(ritem)
tocheck.append(ritem)
return related
def split(source, targetsize):
worklist=[i for i in source.items.values() if i.osmtype=="node"]
worklist.sort(key=geohash)
target=OSMTree()
while source.items:
while worklist:
item=worklist.pop(0)
target.add(item)
source.remove(item)
for sitem in source.get_related(item):
if sitem.osmtype=="node":
worklist.remove(sitem)
target.add(sitem)
source.remove(sitem)
if len(target.items.keys()) >= targetsize:
yield target
target=OSMTree()
if target.items:
yield target
def write_osm(items, filename):
root=ElementTree.Element("osm", generator="osm_chunker.py", version="0.6", upload="never")
note=ElementTree.SubElement(root, "note")
note.text="The data included in this document is from www.openstreetmap.org. The data is made available under ODbL."
item_map={"node":list(),
"way":list(),
"relation":list()}
for item in items:
item_map[item.osmtype].append(item)
for key in ["node","way","relation"]:
for item in sorted(item_map[key], key=operator.attrgetter("osmid")):
root.append(item.element)
ElementTree.ElementTree(root).write(filename)
if __name__=="__main__":
parser = argparse.ArgumentParser(description='Split an osm file into reasonably compact chunks.')
parser.add_argument('infile',
help='Source data')
parser.add_argument('--size', type=int, default=5000, help='Number of objects per output file.')
args = parser.parse_args()
tree = ElementTree.parse(args.infile)
root = tree.getroot()
source=OSMTree(root)
objectcount=len(source.items)
print(objectcount, "OSM objects.")
print("Approximately {:d} output files.".format(objectcount//args.size+1))
fbase=os.path.splitext(os.path.basename(args.infile))[0]
count=0
for osmtree in split(source, args.size):
write_osm(osmtree.items.values(),"{}_{}.osm".format(fbase,count))
count+=1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment