Skip to content

Instantly share code, notes, and snippets.

@cenit
Last active September 11, 2018 09:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cenit/3b0eb229cdcd48fc17146faa60566d90 to your computer and use it in GitHub Desktop.
Save cenit/3b0eb229cdcd48fc17146faa60566d90 to your computer and use it in GitHub Desktop.
lat/lon from docx to nominatim
#! /usr/bin/env python2
# -*- coding: utf-8 -*-
#in case of any problem on macOS, remember to export PYTHONIOENCODING=utf-8
# first extract document.xml unzipping the docx
file = 'document.xml'
with open(file) as f:
contents = f.readlines()
content_big_tokens = []
content_tokens = []
tokens = []
urls = []
url_tokens = []
for content in contents:
content_big_tokens=content.split(">")
for content_big_token in content_big_tokens:
content_tokens.append(content_big_token.split("<"))
for sublist in content_tokens:
for item in sublist:
tokens.append(item)
del tokens[-1]
for url in tokens:
url_tokens = url.split(" ")
for suburl in url_tokens:
suburl=suburl.strip('\"')
if suburl.find('https://goo.gl') != -1:
urls.append(suburl)
from collections import OrderedDict
urls = list(OrderedDict.fromkeys(urls))
# 1 - unshort url to extract the geolocalization from ip address
# 2 - encode a geohash to create a unique ID for each install point
import requests
import json
import Geohash
import pytablewriter as ptw
writer = ptw.MarkdownTableWriter()
writer.table_name = "Punti installazione"
writer.header_list = ["nome", "tipo", "lat", "lon", "url"]
writer.type_hint_list = [ptw.String, ptw.String, ptw.String, ptw.String, ptw.String] # to avoid loss of precision for lat-lon, we manually interpret all fields as strings
writer.value_matrix = []
ptlist = []
pattern="maps/@"
nominatim_url_part1="https://nominatim.openstreetmap.org/reverse?format=jsonv2&lat="
nominatim_url_part2="&lon="
for url in urls:
resp = requests.head(url, allow_redirects=True)
expanded=resp.url
start=expanded.find(pattern)
expanded_mod = expanded[start + len(pattern):]
expanded_mod = expanded_mod.split(",")
lat = float(expanded_mod[0])
lon = float(expanded_mod[1])
nominatim_url=nominatim_url_part1 + str(lat) + nominatim_url_part2 + str(lon)
nominatim_data = requests.get(nominatim_url)
nominatim_json = json.loads(nominatim_data.text)
if 'pedestrian' in nominatim_json['address']:
nome_cam = nominatim_json['address']['pedestrian']
else:
nome_cam = nominatim_json['address']['neighbourhood']
jsonpt = {}
jsonpt['google_url_shortened'] = url
jsonpt['google_url'] = expanded
jsonpt['nominatim_url'] = nominatim_url
jsonpt['lat'] = lat
jsonpt['lon'] = lon
jsonpt['geohash'] = Geohash.encode(lat, lon)
jsonpt['nominatim'] = nominatim_json
jsonpt['name'] = nome_cam
ptlist.append(jsonpt)
writer.value_matrix.append([nome_cam, "", str(lat), str(lon), url])
with open('cam_points.json', 'w') as outfile:
json.dump(ptlist, outfile, sort_keys=True, indent=2, separators=(',', ': '))
writer.write_table()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment