Skip to content

Instantly share code, notes, and snippets.

@ttomasz
Created October 29, 2022 20:50
Show Gist options
  • Save ttomasz/4880e3f487f3971dab092041fdcef905 to your computer and use it in GitHub Desktop.
Save ttomasz/4880e3f487f3971dab092041fdcef905 to your computer and use it in GitHub Desktop.
OSM PBF count statistics about tag usage
from collections import Counter
from datetime import timedelta
from itertools import combinations
from sys import argv
from time import perf_counter
import osmium
from osmium.osm import Node, Way, Relation
class CounterHandler(osmium.SimpleHandler):
def __init__(self):
osmium.SimpleHandler.__init__(self)
self.num_nodes = 0
self.num_ways = 0
self.num_relations = 0
self.tag_counter = Counter()
self.value_counter = Counter()
self.tag_value_counter = Counter()
self.tag_pair_counter = Counter()
def add_tag_and_value_counts(self, element):
for tag in element.tags:
self.tag_counter[tag.k] += 1
self.value_counter[tag.v] += 1
self.tag_value_counter[f'{tag.k}={tag.v}'] += 1
def add_tag_pair_counts(self, element):
tags = [tag.k for tag in element.tags]
for tag_pair in combinations(sorted(tags), 2):
t1, t2 = tag_pair
self.tag_pair_counter[f'{t1} + {t2}'] += 1
def node(self, n: Node):
self.num_nodes += 1
self.add_tag_and_value_counts(n)
self.add_tag_pair_counts(n)
def way(self, w: Way):
self.num_ways += 1
self.add_tag_and_value_counts(w)
self.add_tag_pair_counts(w)
def relation(self, r: Relation):
self.num_relations += 1
self.add_tag_and_value_counts(r)
self.add_tag_pair_counts(r)
if __name__ == '__main__':
h = CounterHandler()
start_time = perf_counter()
h.apply_file(argv[1] if len(argv) == 2 else "test.osm.pbf")
end_time = perf_counter()
delta = end_time - start_time
processing_time = timedelta(seconds=delta)
print("Processing took:", processing_time)
print("Number of nodes: %d" % h.num_nodes)
print("Number of ways: %d" % h.num_ways)
print("Number of relations: %d" % h.num_relations)
print("Most common tags:")
for t in h.tag_counter.most_common(5):
print(" -", f"{t[0]}: {t[1]}")
print("Most common values:")
for t in h.value_counter.most_common(5):
print(" -", f"{t[0]}: {t[1]}")
print("Most common tag=value:")
for t in h.tag_value_counter.most_common(5):
print(" -", f"{t[0]}: {t[1]}")
print("Most common tag combinations:")
for t in h.tag_pair_counter.most_common(5):
print(" -", f"{t[0]}: {t[1]}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment