mthh/snap_on_osrm_nodes.py

## snap_on_osrm_nodes.py
# -*- coding: utf-8 -*-
"""
@author: mthh
-------------
Example of loading node coordinates from an osrm or osrm.node file
in order to search the nearest OSRM node of an input points dataset.
"""
import csv
import sys
import numpy as np
from time import time
from struct import unpack
from scipy.spatial import cKDTree
from multiprocessing.pool import ThreadPool


def haversine_distance(locs1, locs2):
    # (lat, lon) (lat, lon)
    locs1 = locs1 * 0.0174532925
    locs2 = locs2 * 0.0174532925
    cos_lat1 = np.cos(locs1[..., 0])
    cos_lat2 = np.cos(locs2[..., 0])
    cos_lat_d = np.cos(locs1[..., 0] - locs2[..., 0])
    cos_lon_d = np.cos(locs1[..., 1] - locs2[..., 1])
    return 6367 * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))

def load_osrm_nodes(osrm_node_path):
    """
    Load an .osrm.nodes (or .osrm) file and return a numpy array of (x, y).
    (Loading from the .osrm.nodes file is faster)

    Parameters
    ----------
    osrm_node_path: String
        The path of the osrm.nodes file
    """
    with open(osrm_node_path, 'rb') as f_osrm:
        if 'osrm.nodes' in osrm_node_path[-10:]:
            node_dtype = np.dtype(
                [('lat-long', (np.int32, 2)), ('Id', ('<I',1))]
                )
            nb_nodes = unpack('<I', f_osrm.read(4))[0]
            osrm_nodes = np.fromfile(f_osrm, dtype=node_dtype, count=nb_nodes)
            osrm_xy = osrm_nodes['lat-long']

        elif 'osrm' in osrm_node_path[-4:]:
            node_dtype = np.dtype(
                [('lat', '<i'), ('long', '<i'), ('Id', '<I'), ('Flags', '<I')]
                )
            f_osrm.seek(156)
            nb_nodes = unpack('<I', f_osrm.read(4))[0]
            osrm_nodes = np.fromfile(f_osrm, dtype=node_dtype, count=nb_nodes)
            osrm_xy = np.array(
                [(i[0], i[1]) for i in osrm_nodes[['lat', 'long']]])

    print("{} OSRM nodes".format(nb_nodes))
    return osrm_xy

def load_coords_to_snap(to_snap_filepath):
    """
    Read the points to snap from a csv file and return a numpy array of (x, y).
    Assuming the points are in a csv (with a header line) and the rows are like
        longitude, latitude, id
    Parameters
    ----------
    to_snap_filepath: String
        The path of the csv file (containing pts coordinates)
    """
    with open(to_snap_filepath, 'r', encoding='utf-8') as fpts:
        reader = csv.reader(fpts)
        next(reader) # Skip the header
        coords = np.array([(round(float(row[1]), 6)*1000000,
                            round(float(row[0]), 6)*1000000) for row in reader])
    print("{} pts to snap".format(len(coords)))
    return coords

def snap(osrm_node_file, to_snap_filepath, output_file):
    """
    Parameters
    ----------
    osrm_node_file: String
        The path of the osrm.nodes file
    to_snap_filepath: String
        The path of the csv file (containing pts coordinates)
    output_file: String
        The path for the result csv file to write.
    """
    ref_coords = load_osrm_nodes(osrm_node_file)
    to_snap_coords = load_coords_to_snap(to_snap_filepath)
    tree = cKDTree(ref_coords)
    _, idx = tree.query(to_snap_coords, k=1)
    res = tree.data[idx]
    res = res / 1000000
    to_snap_coords = to_snap_coords / 1000000
    dist = haversine_distance(to_snap_coords, res)
    with open(output_file, 'w', encoding='utf-8', newline='') as f_out:
        writer = csv.writer(f_out)
        writer.writerow(['uid', 'x_snap', 'y_snap', 'x_ref', 'y_ref', 'dist'])
        writer.writerows(
            [[i, res[i][1], res[i][0], to_snap_coords[i][1], to_snap_coords[i][0], dist[i]] for i in range(len(res))])

def snap_thread_pool_load(osrm_node_file, to_snap_filepath, output_file):
    pool = ThreadPool(processes=4)
    p1 = pool.apply_async(load_osrm_nodes, [osrm_node_file])
    p2 = pool.apply_async(load_coords_to_snap, [to_snap_filepath])
    to_snap_coords = p2.get()
    ref_coords = p1.get()
    tree = cKDTree(ref_coords)
    _, idx = tree.query(to_snap_coords, k=1)
    res = tree.data[idx]
    res = res / 1000000
    to_snap_coords = to_snap_coords / 1000000
    dist = haversine_distance(to_snap_coords, res)
    with open(output_file, 'w', encoding='utf-8', newline='') as f_out:
        writer = csv.writer(f_out)
        writer.writerow(['uid', 'x_snap', 'y_snap', 'x_ref', 'y_ref', 'dist'])
        writer.writerows(
            [[i, res[i][1], res[i][0], to_snap_coords[i][1], to_snap_coords[i][0], dist[i]] for i in range(len(res))])

if __name__ == '__main__':
    try:
        osrm_node_file = sys.argv[1]
        to_snap_filepath = sys.argv[2]
        output_file = sys.argv[3]
    except IndexError:
        print('Usage:\n{}  <file.osrm.nodes>  <file_to_snap.csv>  <result.csv>'
              .format(sys.argv[0]))
        sys.exit()
    s_t=time()
    snap(osrm_node_file, to_snap_filepath, output_file)
    print('{:.2f}s\nResults saved in {}'.format(time()-s_t, output_file))
	# -- coding: utf-8 --
	"""
	@author: mthh
	-------------
	Example of loading node coordinates from an osrm or osrm.node file
	in order to search the nearest OSRM node of an input points dataset.
	"""
	import csv
	import sys
	import numpy as np
	from time import time
	from struct import unpack
	from scipy.spatial import cKDTree
	from multiprocessing.pool import ThreadPool


	def haversine_distance(locs1, locs2):
	# (lat, lon) (lat, lon)
	locs1 = locs1 * 0.0174532925
	locs2 = locs2 * 0.0174532925
	cos_lat1 = np.cos(locs1[..., 0])
	cos_lat2 = np.cos(locs2[..., 0])
	cos_lat_d = np.cos(locs1[..., 0] - locs2[..., 0])
	cos_lon_d = np.cos(locs1[..., 1] - locs2[..., 1])
	return 6367 * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))

	def load_osrm_nodes(osrm_node_path):
	"""
	Load an .osrm.nodes (or .osrm) file and return a numpy array of (x, y).
	(Loading from the .osrm.nodes file is faster)

	Parameters
	----------
	osrm_node_path: String
	The path of the osrm.nodes file
	"""
	with open(osrm_node_path, 'rb') as f_osrm:
	if 'osrm.nodes' in osrm_node_path[-10:]:
	node_dtype = np.dtype(
	[('lat-long', (np.int32, 2)), ('Id', ('<I',1))]
	)
	nb_nodes = unpack('<I', f_osrm.read(4))[0]
	osrm_nodes = np.fromfile(f_osrm, dtype=node_dtype, count=nb_nodes)
	osrm_xy = osrm_nodes['lat-long']

	elif 'osrm' in osrm_node_path[-4:]:
	node_dtype = np.dtype(
	[('lat', '<i'), ('long', '<i'), ('Id', '<I'), ('Flags', '<I')]
	)
	f_osrm.seek(156)
	nb_nodes = unpack('<I', f_osrm.read(4))[0]
	osrm_nodes = np.fromfile(f_osrm, dtype=node_dtype, count=nb_nodes)
	osrm_xy = np.array(
	[(i[0], i[1]) for i in osrm_nodes[['lat', 'long']]])

	print("{} OSRM nodes".format(nb_nodes))
	return osrm_xy

	def load_coords_to_snap(to_snap_filepath):
	"""
	Read the points to snap from a csv file and return a numpy array of (x, y).
	Assuming the points are in a csv (with a header line) and the rows are like
	longitude, latitude, id
	Parameters
	----------
	to_snap_filepath: String
	The path of the csv file (containing pts coordinates)
	"""
	with open(to_snap_filepath, 'r', encoding='utf-8') as fpts:
	reader = csv.reader(fpts)
	next(reader) # Skip the header
	coords = np.array([(round(float(row[1]), 6)*1000000,
	round(float(row[0]), 6)*1000000) for row in reader])
	print("{} pts to snap".format(len(coords)))
	return coords

	def snap(osrm_node_file, to_snap_filepath, output_file):
	"""
	Parameters
	----------
	osrm_node_file: String
	The path of the osrm.nodes file
	to_snap_filepath: String
	The path of the csv file (containing pts coordinates)
	output_file: String
	The path for the result csv file to write.
	"""
	ref_coords = load_osrm_nodes(osrm_node_file)
	to_snap_coords = load_coords_to_snap(to_snap_filepath)
	tree = cKDTree(ref_coords)
	_, idx = tree.query(to_snap_coords, k=1)
	res = tree.data[idx]
	res = res / 1000000
	to_snap_coords = to_snap_coords / 1000000
	dist = haversine_distance(to_snap_coords, res)
	with open(output_file, 'w', encoding='utf-8', newline='') as f_out:
	writer = csv.writer(f_out)
	writer.writerow(['uid', 'x_snap', 'y_snap', 'x_ref', 'y_ref', 'dist'])
	writer.writerows(
	[[i, res[i][1], res[i][0], to_snap_coords[i][1], to_snap_coords[i][0], dist[i]] for i in range(len(res))])

	def snap_thread_pool_load(osrm_node_file, to_snap_filepath, output_file):
	pool = ThreadPool(processes=4)
	p1 = pool.apply_async(load_osrm_nodes, [osrm_node_file])
	p2 = pool.apply_async(load_coords_to_snap, [to_snap_filepath])
	to_snap_coords = p2.get()
	ref_coords = p1.get()
	tree = cKDTree(ref_coords)
	_, idx = tree.query(to_snap_coords, k=1)
	res = tree.data[idx]
	res = res / 1000000
	to_snap_coords = to_snap_coords / 1000000
	dist = haversine_distance(to_snap_coords, res)
	with open(output_file, 'w', encoding='utf-8', newline='') as f_out:
	writer = csv.writer(f_out)
	writer.writerow(['uid', 'x_snap', 'y_snap', 'x_ref', 'y_ref', 'dist'])
	writer.writerows(
	[[i, res[i][1], res[i][0], to_snap_coords[i][1], to_snap_coords[i][0], dist[i]] for i in range(len(res))])

	if __name__ == '__main__':
	try:
	osrm_node_file = sys.argv[1]
	to_snap_filepath = sys.argv[2]
	output_file = sys.argv[3]
	except IndexError:
	print('Usage:\n{} <file.osrm.nodes> <file_to_snap.csv> <result.csv>'
	.format(sys.argv[0]))
	sys.exit()
	s_t=time()
	snap(osrm_node_file, to_snap_filepath, output_file)
	print('{:.2f}s\nResults saved in {}'.format(time()-s_t, output_file))