fscottfoti/easy_pandana_script.py

## easy_pandana_script.py
"""
Steps to install environment

1)
Get Anaconda (python with includes packages) and install it
https://www.continuum.io/downloads

2)
Get network file and put it in the same directory as this file
http://urbanforecast.com/data/2015_06_01_osm_bayarea4326.h5

3)
Install Pandana - if you let Anaconda add python to your path just run
pip install -U pandana

If you encounter problems, make sure you're using Anaconda

"""


import pandas as pd
import numpy as np
import pandana as pdna
import pyproj
import time


def coord_convert(x, y, srcsrid=3857, tgtsrid=4326):
    bng = pyproj.Proj("+init=EPSG:%d" % srcsrid)
    wgs84 = pyproj.Proj("+init=EPSG:%d" % tgtsrid)
    try:
        lon, lat = pyproj.transform(bng, wgs84, x, y)
    except:
        # this only happens because there are unescaped commas
        lon, lat = np.nan, np.nan
    return lon, lat


def convert_df(df, xname='x', yname='y', srcsrid=3857, tgtsrid=4326):
    x = df[xname].values
    y = df[yname].values
    for i in range(len(df.index)):
        if i % 25000 == 0: print i
        x[i], y[i] = coord_convert(x[i], y[i], srcsrid=srcsrid, tgtsrid=tgtsrid)
    df[xname] = x
    df[yname] = y
    return df

# read csv
df = pd.read_csv("poi.csv",
                 names=["poi_id", "cat", "subcat", "name", "x", "y"],
                 low_memory=False)

print "Converting CRS, will drop names with commas in the name field"
t1 = time.time()
df = convert_df(df)
print df.head()
print "Took {:.2f}s".format(time.time() - t1)

newdf = df.dropna(subset=["x"]).dropna(subset=["y"])
print "Dropping {} rows because names have commas".format(len(df)-len(newdf))
df = newdf

# see docs here for detailed api info
# http://udst.github.io/pandana/tutorial.html

print "\n\nLoading network"
t1 = time.time()
st = pd.HDFStore("2015_06_01_osm_bayarea4326.h5", "r")
nodes, edges = st.nodes, st.edges
net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"],
                   edges[["weight"]])
print "Took {:.2f}s".format(time.time() - t1)

# initialize 1 category and up to 10 returned pois
net.init_pois(num_categories=1, max_dist=3000, max_pois=10)

# set the cafe locations
print "\n\nInitializing POIs"
t1 = time.time()
# filter to cafes
cafes = df.query("cat == 9376")
print "There are {} cafes".format(len(cafes))
net.set_pois("cafes", cafes.x, cafes.y)
print "Took {:.2f}s".format(time.time() - t1)

# run the query
print "\n\nRunning queries"
t1 = time.time()
output = net.nearest_pois(3000, "cafes", num_pois=10)
# add back the locations
output["x"], output["y"] = nodes["x"], nodes["y"]
output.index.name = "node_id"
print "Took {:.2f}s".format(time.time() - t1)

print output.head()

output.to_csv("nearest_cafes.csv")

## output.log
Converting CRS, will drop names with commas in the name field
0
25000
50000
75000
100000
125000
150000
175000
200000
225000
250000
275000
300000
325000
350000
375000
400000
425000
450000
475000
500000
525000
550000
575000
600000
625000
650000
675000
700000
725000
750000
775000
800000
825000
850000
875000
900000
                     poi_id   cat   subcat                             name  \
0  ï»¿68400002832950.00000000  7302  7302005           Always A Good Ride Llc
1   68400004491250.00000000  7302  7302005                Barking Elf Ranch
2   68400010247164.00000000  7302  7302005   Crossroads Ranch Riding Stable
3   68400007041822.00000000  7303  7303006                 1 Percent Condos
4   68400024624325.00000000  7303  7303003           1 San Ramon Apartments

         x        y
0 -117.582  33.6243
1 -117.188  33.0879
2 -122.674  38.3049
3 -117.127  32.7557
4 -119.765  36.8143
Took 43.08s
Dropping 2362 rows because names have commas


Loading network
Generating contraction hierarchies with 4 threads.
Setting CH node vector of size 226060
Setting CH edge vector of size 287161
[info src/contraction_hierarchies/src/libch.cpp:205] Range graph removed 8884 edges of 574322
. 10% . 20% . 30% . 40% . 50% . 60% . 70% . 80% . 90% . 100%
Closing remaining open files:2015_06_01_osm_bayarea4326.h5...done
Took 8.30s


Initializing POIs
There are 13103 cafes
Took 0.08s


Running queries
Took 0.87s
                  1            2            3            4            5  \
node_id
8        660.786987  3000.000000  3000.000000  3000.000000  3000.000000
9        322.532013  2685.018066  2784.846924  2882.115967  2882.115967
10         0.000000  2362.486084  2462.314941  2559.583984  2559.583984
11       218.505005  2143.980957  2243.810059  2341.079102  2341.079102
12       235.199005  2127.287109  2227.115967  2324.385010  2324.385010

                   6            7            8            9      10  \
node_id
8        3000.000000  3000.000000  3000.000000  3000.000000  3000.0
9        2943.325928  2993.888916  3000.000000  3000.000000  3000.0
10       2620.793945  2671.356934  2802.144043  3000.000000  3000.0
11       2402.289062  2452.852051  2583.638916  2932.696045  3000.0
12       2385.594971  2436.157959  2566.945068  2916.001953  3000.0

                  x          y
node_id
8       -121.546785  36.997092
9       -121.548863  36.999641
10      -121.550719  37.002132
11      -121.550795  37.004075
12      -121.550784  37.004226
	"""
	Steps to install environment

	1)
	Get Anaconda (python with includes packages) and install it
	https://www.continuum.io/downloads

	2)
	Get network file and put it in the same directory as this file
	http://urbanforecast.com/data/2015_06_01_osm_bayarea4326.h5

	3)
	Install Pandana - if you let Anaconda add python to your path just run
	pip install -U pandana

	If you encounter problems, make sure you're using Anaconda

	"""


	import pandas as pd
	import numpy as np
	import pandana as pdna
	import pyproj
	import time


	def coord_convert(x, y, srcsrid=3857, tgtsrid=4326):
	bng = pyproj.Proj("+init=EPSG:%d" % srcsrid)
	wgs84 = pyproj.Proj("+init=EPSG:%d" % tgtsrid)
	try:
	lon, lat = pyproj.transform(bng, wgs84, x, y)
	except:
	# this only happens because there are unescaped commas
	lon, lat = np.nan, np.nan
	return lon, lat


	def convert_df(df, xname='x', yname='y', srcsrid=3857, tgtsrid=4326):
	x = df[xname].values
	y = df[yname].values
	for i in range(len(df.index)):
	if i % 25000 == 0: print i
	x[i], y[i] = coord_convert(x[i], y[i], srcsrid=srcsrid, tgtsrid=tgtsrid)
	df[xname] = x
	df[yname] = y
	return df

	# read csv
	df = pd.read_csv("poi.csv",
	names=["poi_id", "cat", "subcat", "name", "x", "y"],
	low_memory=False)

	print "Converting CRS, will drop names with commas in the name field"
	t1 = time.time()
	df = convert_df(df)
	print df.head()
	print "Took {:.2f}s".format(time.time() - t1)

	newdf = df.dropna(subset=["x"]).dropna(subset=["y"])
	print "Dropping {} rows because names have commas".format(len(df)-len(newdf))
	df = newdf

	# see docs here for detailed api info
	# http://udst.github.io/pandana/tutorial.html

	print "\n\nLoading network"
	t1 = time.time()
	st = pd.HDFStore("2015_06_01_osm_bayarea4326.h5", "r")
	nodes, edges = st.nodes, st.edges
	net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"],
	edges[["weight"]])
	print "Took {:.2f}s".format(time.time() - t1)

	# initialize 1 category and up to 10 returned pois
	net.init_pois(num_categories=1, max_dist=3000, max_pois=10)

	# set the cafe locations
	print "\n\nInitializing POIs"
	t1 = time.time()
	# filter to cafes
	cafes = df.query("cat == 9376")
	print "There are {} cafes".format(len(cafes))
	net.set_pois("cafes", cafes.x, cafes.y)
	print "Took {:.2f}s".format(time.time() - t1)

	# run the query
	print "\n\nRunning queries"
	t1 = time.time()
	output = net.nearest_pois(3000, "cafes", num_pois=10)
	# add back the locations
	output["x"], output["y"] = nodes["x"], nodes["y"]
	output.index.name = "node_id"
	print "Took {:.2f}s".format(time.time() - t1)

	print output.head()

	output.to_csv("nearest_cafes.csv")
	Converting CRS, will drop names with commas in the name field
	0
	25000
	50000
	75000
	100000
	125000
	150000
	175000
	200000
	225000
	250000
	275000
	300000
	325000
	350000
	375000
	400000
	425000
	450000
	475000
	500000
	525000
	550000
	575000
	600000
	625000
	650000
	675000
	700000
	725000
	750000
	775000
	800000
	825000
	850000
	875000
	900000
	poi_id cat subcat name \
	0 ï»¿68400002832950.00000000 7302 7302005 Always A Good Ride Llc
	1 68400004491250.00000000 7302 7302005 Barking Elf Ranch
	2 68400010247164.00000000 7302 7302005 Crossroads Ranch Riding Stable
	3 68400007041822.00000000 7303 7303006 1 Percent Condos
	4 68400024624325.00000000 7303 7303003 1 San Ramon Apartments

	x y
	0 -117.582 33.6243
	1 -117.188 33.0879
	2 -122.674 38.3049
	3 -117.127 32.7557
	4 -119.765 36.8143
	Took 43.08s
	Dropping 2362 rows because names have commas


	Loading network
	Generating contraction hierarchies with 4 threads.
	Setting CH node vector of size 226060
	Setting CH edge vector of size 287161
	[info src/contraction_hierarchies/src/libch.cpp:205] Range graph removed 8884 edges of 574322
	. 10% . 20% . 30% . 40% . 50% . 60% . 70% . 80% . 90% . 100%
	Closing remaining open files:2015_06_01_osm_bayarea4326.h5...done
	Took 8.30s


	Initializing POIs
	There are 13103 cafes
	Took 0.08s


	Running queries
	Took 0.87s
	1 2 3 4 5 \
	node_id
	8 660.786987 3000.000000 3000.000000 3000.000000 3000.000000
	9 322.532013 2685.018066 2784.846924 2882.115967 2882.115967
	10 0.000000 2362.486084 2462.314941 2559.583984 2559.583984
	11 218.505005 2143.980957 2243.810059 2341.079102 2341.079102
	12 235.199005 2127.287109 2227.115967 2324.385010 2324.385010

	6 7 8 9 10 \
	node_id
	8 3000.000000 3000.000000 3000.000000 3000.000000 3000.0
	9 2943.325928 2993.888916 3000.000000 3000.000000 3000.0
	10 2620.793945 2671.356934 2802.144043 3000.000000 3000.0
	11 2402.289062 2452.852051 2583.638916 2932.696045 3000.0
	12 2385.594971 2436.157959 2566.945068 2916.001953 3000.0

	x y
	node_id
	8 -121.546785 36.997092
	9 -121.548863 36.999641
	10 -121.550719 37.002132
	11 -121.550795 37.004075
	12 -121.550784 37.004226