Skip to content

Instantly share code, notes, and snippets.

@mthh
Created November 6, 2015 09:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mthh/9cf7fdf682c4c81ebed6 to your computer and use it in GitHub Desktop.
Save mthh/9cf7fdf682c4c81ebed6 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
@author: mthh
-------------
Example of loading node coordinates from an osrm or osrm.node file
in order to search the nearest OSRM node of an input points dataset.
"""
import csv
import sys
import numpy as np
from time import time
from struct import unpack
from scipy.spatial import cKDTree
from multiprocessing.pool import ThreadPool
def haversine_distance(locs1, locs2):
# (lat, lon) (lat, lon)
locs1 = locs1 * 0.0174532925
locs2 = locs2 * 0.0174532925
cos_lat1 = np.cos(locs1[..., 0])
cos_lat2 = np.cos(locs2[..., 0])
cos_lat_d = np.cos(locs1[..., 0] - locs2[..., 0])
cos_lon_d = np.cos(locs1[..., 1] - locs2[..., 1])
return 6367 * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))
def load_osrm_nodes(osrm_node_path):
"""
Load an .osrm.nodes (or .osrm) file and return a numpy array of (x, y).
(Loading from the .osrm.nodes file is faster)
Parameters
----------
osrm_node_path: String
The path of the osrm.nodes file
"""
with open(osrm_node_path, 'rb') as f_osrm:
if 'osrm.nodes' in osrm_node_path[-10:]:
node_dtype = np.dtype(
[('lat-long', (np.int32, 2)), ('Id', ('<I',1))]
)
nb_nodes = unpack('<I', f_osrm.read(4))[0]
osrm_nodes = np.fromfile(f_osrm, dtype=node_dtype, count=nb_nodes)
osrm_xy = osrm_nodes['lat-long']
elif 'osrm' in osrm_node_path[-4:]:
node_dtype = np.dtype(
[('lat', '<i'), ('long', '<i'), ('Id', '<I'), ('Flags', '<I')]
)
f_osrm.seek(156)
nb_nodes = unpack('<I', f_osrm.read(4))[0]
osrm_nodes = np.fromfile(f_osrm, dtype=node_dtype, count=nb_nodes)
osrm_xy = np.array(
[(i[0], i[1]) for i in osrm_nodes[['lat', 'long']]])
print("{} OSRM nodes".format(nb_nodes))
return osrm_xy
def load_coords_to_snap(to_snap_filepath):
"""
Read the points to snap from a csv file and return a numpy array of (x, y).
Assuming the points are in a csv (with a header line) and the rows are like
longitude, latitude, id
Parameters
----------
to_snap_filepath: String
The path of the csv file (containing pts coordinates)
"""
with open(to_snap_filepath, 'r', encoding='utf-8') as fpts:
reader = csv.reader(fpts)
next(reader) # Skip the header
coords = np.array([(round(float(row[1]), 6)*1000000,
round(float(row[0]), 6)*1000000) for row in reader])
print("{} pts to snap".format(len(coords)))
return coords
def snap(osrm_node_file, to_snap_filepath, output_file):
"""
Parameters
----------
osrm_node_file: String
The path of the osrm.nodes file
to_snap_filepath: String
The path of the csv file (containing pts coordinates)
output_file: String
The path for the result csv file to write.
"""
ref_coords = load_osrm_nodes(osrm_node_file)
to_snap_coords = load_coords_to_snap(to_snap_filepath)
tree = cKDTree(ref_coords)
_, idx = tree.query(to_snap_coords, k=1)
res = tree.data[idx]
res = res / 1000000
to_snap_coords = to_snap_coords / 1000000
dist = haversine_distance(to_snap_coords, res)
with open(output_file, 'w', encoding='utf-8', newline='') as f_out:
writer = csv.writer(f_out)
writer.writerow(['uid', 'x_snap', 'y_snap', 'x_ref', 'y_ref', 'dist'])
writer.writerows(
[[i, res[i][1], res[i][0], to_snap_coords[i][1], to_snap_coords[i][0], dist[i]] for i in range(len(res))])
def snap_thread_pool_load(osrm_node_file, to_snap_filepath, output_file):
pool = ThreadPool(processes=4)
p1 = pool.apply_async(load_osrm_nodes, [osrm_node_file])
p2 = pool.apply_async(load_coords_to_snap, [to_snap_filepath])
to_snap_coords = p2.get()
ref_coords = p1.get()
tree = cKDTree(ref_coords)
_, idx = tree.query(to_snap_coords, k=1)
res = tree.data[idx]
res = res / 1000000
to_snap_coords = to_snap_coords / 1000000
dist = haversine_distance(to_snap_coords, res)
with open(output_file, 'w', encoding='utf-8', newline='') as f_out:
writer = csv.writer(f_out)
writer.writerow(['uid', 'x_snap', 'y_snap', 'x_ref', 'y_ref', 'dist'])
writer.writerows(
[[i, res[i][1], res[i][0], to_snap_coords[i][1], to_snap_coords[i][0], dist[i]] for i in range(len(res))])
if __name__ == '__main__':
try:
osrm_node_file = sys.argv[1]
to_snap_filepath = sys.argv[2]
output_file = sys.argv[3]
except IndexError:
print('Usage:\n{} <file.osrm.nodes> <file_to_snap.csv> <result.csv>'
.format(sys.argv[0]))
sys.exit()
s_t=time()
snap(osrm_node_file, to_snap_filepath, output_file)
print('{:.2f}s\nResults saved in {}'.format(time()-s_t, output_file))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment