Last active
September 11, 2018 09:53
-
-
Save cenit/3b0eb229cdcd48fc17146faa60566d90 to your computer and use it in GitHub Desktop.
lat/lon from docx to nominatim
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
#in case of any problem on macOS, remember to export PYTHONIOENCODING=utf-8 | |
# first extract document.xml unzipping the docx | |
file = 'document.xml' | |
with open(file) as f: | |
contents = f.readlines() | |
content_big_tokens = [] | |
content_tokens = [] | |
tokens = [] | |
urls = [] | |
url_tokens = [] | |
for content in contents: | |
content_big_tokens=content.split(">") | |
for content_big_token in content_big_tokens: | |
content_tokens.append(content_big_token.split("<")) | |
for sublist in content_tokens: | |
for item in sublist: | |
tokens.append(item) | |
del tokens[-1] | |
for url in tokens: | |
url_tokens = url.split(" ") | |
for suburl in url_tokens: | |
suburl=suburl.strip('\"') | |
if suburl.find('https://goo.gl') != -1: | |
urls.append(suburl) | |
from collections import OrderedDict | |
urls = list(OrderedDict.fromkeys(urls)) | |
# 1 - unshort url to extract the geolocalization from ip address | |
# 2 - encode a geohash to create a unique ID for each install point | |
import requests | |
import json | |
import Geohash | |
import pytablewriter as ptw | |
writer = ptw.MarkdownTableWriter() | |
writer.table_name = "Punti installazione" | |
writer.header_list = ["nome", "tipo", "lat", "lon", "url"] | |
writer.type_hint_list = [ptw.String, ptw.String, ptw.String, ptw.String, ptw.String] # to avoid loss of precision for lat-lon, we manually interpret all fields as strings | |
writer.value_matrix = [] | |
ptlist = [] | |
pattern="maps/@" | |
nominatim_url_part1="https://nominatim.openstreetmap.org/reverse?format=jsonv2&lat=" | |
nominatim_url_part2="&lon=" | |
for url in urls: | |
resp = requests.head(url, allow_redirects=True) | |
expanded=resp.url | |
start=expanded.find(pattern) | |
expanded_mod = expanded[start + len(pattern):] | |
expanded_mod = expanded_mod.split(",") | |
lat = float(expanded_mod[0]) | |
lon = float(expanded_mod[1]) | |
nominatim_url=nominatim_url_part1 + str(lat) + nominatim_url_part2 + str(lon) | |
nominatim_data = requests.get(nominatim_url) | |
nominatim_json = json.loads(nominatim_data.text) | |
if 'pedestrian' in nominatim_json['address']: | |
nome_cam = nominatim_json['address']['pedestrian'] | |
else: | |
nome_cam = nominatim_json['address']['neighbourhood'] | |
jsonpt = {} | |
jsonpt['google_url_shortened'] = url | |
jsonpt['google_url'] = expanded | |
jsonpt['nominatim_url'] = nominatim_url | |
jsonpt['lat'] = lat | |
jsonpt['lon'] = lon | |
jsonpt['geohash'] = Geohash.encode(lat, lon) | |
jsonpt['nominatim'] = nominatim_json | |
jsonpt['name'] = nome_cam | |
ptlist.append(jsonpt) | |
writer.value_matrix.append([nome_cam, "", str(lat), str(lon), url]) | |
with open('cam_points.json', 'w') as outfile: | |
json.dump(ptlist, outfile, sort_keys=True, indent=2, separators=(',', ': ')) | |
writer.write_table() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment