Skip to content

Instantly share code, notes, and snippets.

@cbeddow
Last active November 8, 2023 12:11
Show Gist options
  • Save cbeddow/c5014f06456413a74348640b60f4fb7f to your computer and use it in GitHub Desktop.
Save cbeddow/c5014f06456413a74348640b60f4fb7f to your computer and use it in GitHub Desktop.
import json
import pandas as pd
tags_dict = None
with open('./overture2osm.json') as file:
tags_dict = json.load(file)
def jsonize_tags(tags):
# Split the string at the '=' character
tags = {tag.split('=')[0]: tag.split('=')[1] for tag in tags}
return tags
def retag(obj):
# quick tip: Overture is out left table and has categories, we are converting it to OSM as right table, which has tags
# categories to skip, we cannot easily convert this category
skip_cats = ['structure_and_geography']
#empty object that will be our final OpenStreetMap tags
osm_tags = {}
# list of all keys in the input object from Overture
# remove nulls
keys = [key for key, value in obj.items() if value is not None]
# first we make sure the object has a category--if it does not, we cannot do anything
# we need to skip the category of 'structure_and_geography' because it is too vague to convert without context
if 'categories' in keys:
#get the main category, we will ignore any secondary category
main_category = obj['categories']['main']
# all the Overture categories except in the skip list are in the tags_dict already
# the Overture category will match a list of OSM tags, sometimes a list of only one item
# OSM tags are a key value pair but written like "key=value", so we will jsonize
cat_tags = tags_dict[main_category]
# set the output object as the cat_tags, then we will append more key/values to it after
osm_tags = cat_tags
# get place names. There is always a common local name that is default, and sometimes multilingual names
if 'names' in keys:
primary_name = [name['value'] for name in obj['names']['common'] if name['language'] == 'local'][0]
secondary_names = {name['language']:name['value'] for name in obj['names']['common'] if name['language'] != 'local'}
# add primary name
osm_tags['name'] = primary_name
# loop through secondary names if they exist and add as an alternate language name
if len(secondary_names) > 0:
for k in secondary_names.keys():
language = k
osm_tags[f'name:{k}'] = secondary_names[k]
if 'addresses' in keys:
# example in Javascript already: https://is.gd/addrjs
# if addresses -> convert to addr tag in OSM, so addresses where region country locality exist preferred, freeform to addr:street etc
street, housenumber = None
if 'brand' in keys:
osm_tags['brand'] = obj['brand']['names']['common']['local']
# multiple pieces of contact information in the format of 'contact:type':'<value>', always stringified
# contact info is always a list so we join by ';' with no space
if 'phones' in keys:
osm_tags['contact:phone'] = ';'.join(obj['phones'])
if 'websites' in keys:
osm_tags['contact:website'] = ';'.join(obj['websites'])
if 'emails' in keys:
osm_tags['contact:email'] = ';'.join(obj['emails'])
# social media we handle a little differently:
if 'socials' in keys:
for url in obj['socials']:
social = url.split('.')[1] # Extract the key from the URL
osm_tags[f'contact:{social}'] = url
# check if the floor number of the place within a building exists, rarely it exists
if 'level' in keys:
osm_tags['level'] = obj['level']
# check similarly if the number of floors in the building exists, probably almost never
if 'numFloors' in keys:
osm_tags['building:levels'] = obj['numFloors']
# finally, we cite the souce info, starting with the name and record ID, then date
if 'sources' in keys:
for source in obj['sources']:
if 'record_id' in source:
osm_tags['source'] = f'Overture/{source["dataset"]}/{source["record_id"]}'
else:
osm_tags['source'] = f'Overture/{source["dataset"]}'
if 'updateTime' in keys:
osm_tags['source:date'] = obj['updateTime'][0:10]
return osm_tags
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment