Skip to content

Instantly share code, notes, and snippets.

@statsccpr
Last active April 22, 2016 22:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save statsccpr/0e135f425d5541a42882fb3a4f8c53f4 to your computer and use it in GitHub Desktop.
Save statsccpr/0e135f425d5541a42882fb3a4f8c53f4 to your computer and use it in GitHub Desktop.
gis_demography_exer_fin_pub.ipynb
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# this only has to be done once (as the root ubuntu user)
# get 'gdal' needed for 'rgdal' and 'rgeos'
# http://robinlovelace.net/r/2013/11/26/installing-rgdal-on-ubuntu.html
# aptitude better package manager than apt-get for gdal
# sudo apt-get update
# sudo apt-get install aptitude
# sudo aptitude install libgdal-dev
# sudo aptitude install libproj-dev
# choose rstudio as source package repository
# options(repos = c(CRAN = "http://cran.rstudio.com"));
# install.packages('deldir')
# install.packages('dplyr')
# install.packages('jsonlite',dependencies=TRUE)
# install.packages('maptools')
# install.packages('rgeos')
# install.packages('rgdal',dependencies=TRUE)
# install.packages('leaflet')
library(dplyr)
library(jsonlite)
library(maptools); gpclibPermit()
library(deldir)
library(rgeos)
library(rgdal)
library(leaflet)
sessionInfo()
# an [r] command to tell linux shell to 'download' a file from a url
# system() --> wget --> url
# download
# system('wget http://www2.census.gov/geo/tiger/TIGER2015/TRACT/tl_2015_06_tract.zip')
# unzip
# system('mkdir -p ~/notebooks/gis_demog_misc')
# system('unzip /notebooks/gis_demog_misc/tl_2015_06_tract.zip -d ~/notebooks/gis_demog_misc/tl_2015_06_tract')
# to bring up r's help manual, use syntax "?function_name"
?readShapeSpatial
#################
# shapefile - tract
#################
# library(maptools)
# directory location of shapefiles
shapefile_location = '~/notebooks/gis_demog_misc/tl_2015_06_tract/'
ca_shp <- readShapeSpatial(paste0(shapefile_location,'tl_2015_06_tract.shp'))
# specify projection
proj4string(ca_shp) <- "+proj=longlat +datum=WGS84"
plot(ca_shp)
# notice how long it takes, shapefiles are "bloated"
names(ca_shp)
# just subset to la county
la_county = subset(ca_shp, ca_shp$COUNTYFP == '037')
plot(la_county)
#################
# shapefile - tract
#################
# plot(la_county)
# locator to interactively choose points of convex hull for custom bbox
# Click points to draw your own custom boundary then hit keyboard 'esc' to finalize
# chull_locat = locator()
# plot(chull(chull_locat))
# use the new custom boundary points to crop out ignored areas
# library(spatstat)
# bbox_cust = bounding.box.xy(chull_locat)
# bb_poly <- as((bbox_cust), "SpatialPolygons")
# proj4string(bb_poly) <- CRS(proj4string(la_county))
# library(raster)
# la_county_zoom = raster::crop(la_county,bb_poly)
# plot(la_county_zoom)
# save(la_county_zoom,file='/notebooks/gis_demog_misc/la_county_zoom_tvmagic.RData')
load("~/notebooks/gis_demog_misc/la_county_zoom_tvmagic.RData")
plot(la_county_zoom)
####################
# get la county bdry
# ?rgeos::gUnionCascaded to dissolve interior polygons
####################
# plot(la_county_zoom)
# library(rgeos)
la_county_bdry = rgeos::gUnionCascaded(la_county_zoom)
plot(la_county_bdry)
# Plug in your API Key (Census API)
api_key_census = 'foo_your_key' # blank out
base_url_census = 'http://api.census.gov/data/2015/pdb/tract?get='
query = paste0(base_url_census,
'Tot_Population_CEN_2010&for=tract:*&in=state:06+county:037&key=',
api_key_census)
# library(jsonlite)
dat_raw_census = fromJSON(query)
str(dat_raw_census)
# use first row of matrix as variables' name (column names) for data frame
colnames(dat_raw_census) = dat_raw_census[1,]
dat_raw_census = dat_raw_census[-1,]
str(dat_raw_census)
head(dat_raw_census)
dat_census = data.frame(Tot_Population_CEN_2010 = as.numeric(dat_raw_census[,1]),
state = as.character(dat_raw_census[,2]),
county = as.character(dat_raw_census[,3]),
tract = as.character(dat_raw_census[,4]),
stringsAsFactors=FALSE
)
head(dat_census)
# Plug in your API Key (Google Radar API)
api_key_radar = 'foo_your_key' # blank out
api_key=api_key_radar
keyword='in-n-out'
types='food'
radius='50000' # 50000 meters (about 31 miles)
# location='34.072422,-118.0776237' # 'lat,lng' order # rosemead # landmark3
# Google's API documentation tells us to treat
# each gps point as a text string of "latitude,longitude"
# Since we're going to 'glue' the string into a website URL
landmark1 = "33.78619,-117.8751"
landmark2 = "33.92515,-117.5191"
landmark3 = "34.20306,-118.3817"
class(landmark1)
# result example for 1 landmark
location = landmark1
call_url = paste0('https://maps.googleapis.com/maps/api/place/radarsearch/json?',
'location=',location, # 'lat,lng' order
'&radius=',radius,
'&types=',types,
'&keyword=',keyword,
'&key=',api_key
)
# library(jsonlite)
# ?jsonlite
dat_raw = fromJSON(call_url)
str(dat_raw)
dat_raw$results$geometry$location$lat
dat_raw$results$geometry$location$lng
plot(y=dat_raw$results$geometry$location$lat,
x=dat_raw$results$geometry$location$lng
)
# wrap into function so we can iterate over the 3 landmarks
get_gps_custom_func = function(your_landmark){
location = your_landmark
call_url = paste0('https://maps.googleapis.com/maps/api/place/radarsearch/json?',
'location=',location, # 'lat,lng' order
'&radius=',radius,
'&types=',types,
'&keyword=',keyword,
'&key=',api_key
)
library(jsonlite)
# ?jsonlite
dat_raw = fromJSON(call_url)
# str(dat_raw)
dat_relevant = cbind(dat_raw$results[-1],
location.lat=(dat_raw$results$geometry$location$lat), # explicit redundancy helpful (already in results)
location.lng=(dat_raw$results$geometry$location$lng)
)
return(dat_relevant)
}
list(landmark1,landmark2,landmark3)
# apply our custom function to iterate over the list of 3 landmarks
# basically a 'for-loop' but cleaner syntax
out_landmark_123 = lapply(list(landmark1,landmark2,landmark3),
get_gps_custom_func
)
str(out_landmark_123)
# ?rbind_all row binds the elements of the input list into a single output data frame
dat_all = rbind_all(out_landmark_123)
class(dat_all)
dim(dat_all)
head(dat_all)
names(dat_all)
nrow(dat_all)
dat_all$place_id %>% unique() %>% length()
# alternative to determine unique()
# duplicated(dat_all$place_id)
# !(duplicated(dat_all$place_id))
# since we used 3 landmarks, their 5000 km circles will overlap
# remove duplicate in-n-out via unique identifier 'place_id'
names(dat_all)
# library(dplyr)
dat_all_uniq = dat_all %>%
dplyr::distinct(place_id) %>% # alternative to uniqe() and !duplicated()
dplyr::select(location.lng,location.lat,place_id)
# note: now in (lng,lat) order, for later processing (later on will expect data.frame() format)
# we could easily convert it here as well
head(dat_all_uniq)
class(dat_all_uniq)
dim(dat_all_uniq)
# use google 'place_id' as row names, to keep track of points
row.names(dat_all_uniq) = dat_all_uniq$place_id
# with row names assigned, no need for place_id as a column
dat_latlng = dat_all_uniq %>%
select(-place_id)
head(dat_latlng)
row.names(dat_latlng)
# we explicitly save the points later when we migrate
# save(dat_latlng,file='/notebooks/gis_demog_misc/dat_latlng.RData')
class(la_county_zoom)
?SpatialPolygonsDataFrame
str(la_county_zoom)
# 2213 data rows
# 2213 polygons
# load("/notebooks/gis_demog_misc/la_county_zoom_tvmagic.RData")
# plot(la_county_zoom)
# the spatial-object came with pre-existing data in its data slot
str(la_county_zoom@data)
dim(la_county_zoom@data)
head(la_county_zoom@data)
# our census data that we wish to merge, then inject
dim(dat_census)
head(dat_census)
# make new variable 'GEOID' for census dataset
# eg glue together: state,county,tract
dat_census2 = dat_census %>%
mutate(GEOID=paste0(state,county,tract)) %>%
select(GEOID,TRACTCE=tract,Tot_Population_CEN_2010) # rename 'tract' to 'TRACTCE' to agree with naming convention
head(dat_census2)
head(la_county_zoom@data)
names(la_county_zoom)
class(la_county_zoom)
names(dat_census2)
class(dat_census2)
dim(la_county_zoom) # less number of tracts (than census api) because we 'zoomed' in via custom points
dim(dat_census2)
?spCbind
dat_census2 %>% filter(GEOID == '06037800204')
# 'spooky behavior' that number of tract mismatch error is not an issue when using spCbind
# obviously two observations in census table have tract '800204' (geoid '06037800204') (1 is NA 1 is 6740)
# see the two blank tracts in western pan handle
# if we explicitly remove the one that is missing NA
# now, just one blank tract (the correct NA one we skipped)
ind_error = which((dat_census2$GEOID == '06037800204') & is.na(dat_census2$Tot_Population_CEN_2010 == TRUE))
dat_census3 = dat_census2[-ind_error,]
dat_census3 %>% filter(GEOID == '06037800204')
# la_county_zoom[865,'GEOID']
# la_county_zoom[866,'GEOID']
# plot(la_county_zoom[865,])
# plot(la_county_zoom[866,])
dat_census2 %>% filter(GEOID == '06037800204')
# dat_census2_subset %>% filter(GEOID == '06037800204') # this did matching, which default removed the 6740, while keeping NA
dat_census3 %>% filter(GEOID == '06037800204')
# annoying that we have to dive into rowname of spdf too
# but good assertion check to make sure polygon ID linked to Data ID correctly
xx = la_county_zoom # spatial polygon data frame
xtra = dat_census3 # data frame of new variables
# we just wrote the two lines above
# to shoehorn our right hand side objects to the naming convention of examples
# all of below is copy/paste example from ?spCbind
# explicitly get rows that have matching GEOID
o = match(xx$GEOID, xtra$GEOID)
length(o)
dim(xtra)
xtra1 <- xtra[o,]
dim(xtra1)
# use geoid as rowname of spatialfoo
# hidden annoyance
xx <- spChFIDs(xx, as.character(xx$GEOID))
# row.names(xx)
#rownames of new dataframe
row.names(xtra1) <- xx$GEOID
# cbind spdf xx to dataframe xtra1
xx1 <- spCbind(xx, xtra1)
la_county_zoom_new = xx1
spplot(la_county_zoom_new,z='Tot_Population_CEN_2010')
# we explicitly save the spdf later when we migrate
# save(la_county_zoom_new,file='/notebooks/gis_demog_misc/la_county_zoom_new.RData')
# library(deldir)
# using bbox of poly
voronoipolygons = function(x, poly) {
require(deldir)
if (.hasSlot(x, 'coords')) {
crds <- x@coords
} else
crds = x
# bb = bbox(poly) # when is bb used, not used
rw = as.numeric(t(bbox(poly))) # using poly's bounding box for output of deldir
z <- deldir(crds[, 1], crds[, 2],rw=rw)
w <- tile.list(z)
polys <- vector(mode = 'list', length = length(w))
require(sp)
for (i in seq(along = polys)) {
# each polygon in tile list, extract coordinates
pcrds <- cbind(w[[i]]$x, w[[i]]$y)
pcrds <- rbind(pcrds, pcrds[1,]) # closed poly
cust_ind = w[[i]]$ptNum # original index that deldir outputs
cust_ind2 = row.names(x[cust_ind,]) # use original index, to look up google place id
polys[[i]] <- Polygons(list(Polygon(pcrds)), ID = cust_ind2) # use place id as polygon id
}
SP = SpatialPolygons(polys)
# return deldir::voronoi poly as spdf
voronoi = SpatialPolygonsDataFrame(SP,
data = data.frame(x = crds[, 1],
y = crds[, 2],
row.names = sapply(slot(SP, 'polygons'),
function(x)
slot(x, 'ID')
)
)
)
return(voronoi)
}
# the table of gps points expects data.frame(dat_latlng) format
class(dat_latlng)
dat_latlng = data.frame(dat_latlng)
class(dat_latlng)
plot(la_county_bdry)
points(dat_latlng)
plot(la_county_bdry)
temp_sp = SpatialPoints(data.frame(dat_latlng))
raster::crs(temp_sp) <- raster::crs(la_county_bdry)
pts_in_la = raster::crop(temp_sp,la_county_bdry)
points(pts_in_la)
head(pts_in_la)
length(pts_in_la)
length(unique(row.names(pts_in_la)))
names(la_county_zoom_new)
plot(la_county_bdry)
points(pts_in_la)
vor_spdf = voronoipolygons(pts_in_la,la_county_zoom_new)
plot(vor_spdf)
points(pts_in_la)
row.names(vor_spdf)
?sp::aggregate
names(la_county_zoom_new)
# argument 1 is areal unit source
# argument 2 is target spatial support
# areaWeighted = TRUE, uses proportion of area overlap to weight attribute
# outputs spdf
# default function is 'mean()', hence santa monica 8000+ pop is smoothed by surrounding lower pop
# option to choose different function, sum(), to get direct counts (but cannot areaWeighted='TRUE')
# but if use 'areaWeighted = True', forced to use mean() for aggregation function
vor_agg_spdf = aggregate(la_county_zoom_new['Tot_Population_CEN_2010'], # Tot_Population_CEN_2010
by=vor_spdf,
areaWeighted = TRUE)
# spplot(vor_agg_spdf)
## Use LA County Boundary to crop our voronoi polygons
# avoid ?gIntersection(poly_1,poly_2, byid=TRUE)
# spooky behavior with mixing up ids
# even with: option in gIntersection(...,id=row.names(vor_agg_spdf))
# instead, use ?raster::crop
## Crop to the vor_agg_spdf extent, then plot
# crs(vor_agg_spdf) <- raster::crs(la_county_bdry)
vor_agg_spdf_crop = raster::crop(vor_agg_spdf, la_county_bdry)
# row.names(vor_agg_spdf_crop)
plot(la_county_bdry)
spplot(vor_agg_spdf)
spplot(vor_agg_spdf_crop)
# str(la_county_zoom_new)
# str(pts_in_la)
# str(vor_agg_spdf_crop)
save(la_county_zoom_new,file='~/notebooks/gis_demog_misc/la_county_zoom_new.RData')
save(pts_in_la,file='~/notebooks/gis_demog_misc/pts_in_la.RData')
save(vor_agg_spdf_crop,file='~/notebooks/gis_demog_misc/vor_agg_spdf_crop.RData')
#####################
# read in the previously exported data
#####################
# jaga: linux environment
# load(file='~/notebooks/gis_demog_misc/la_county_zoom_new.RData')
# load(file='~/notebooks/gis_demog_misc/pts_in_la.RData')
# load(file='~/notebooks/gis_demog_misc/vor_agg_spdf_crop.RData')
# windows environment
# load('C:\\Users\\mtzen\\Documents\\in-n-out\\pts_in_la.RData')
# load('C:\\Users\\mtzen\\Documents\\in-n-out\\la_county_zoom_new.RData')
# load('C:\\Users\\mtzen\\Documents\\in-n-out\\vor_agg_spdf_crop.RData')
## not really needed
# load('C:\\Users\\mtzen\\Documents\\in-n-out\\la_county_zoom_tvmagic.RData')
# load('C:\\Users\\mtzen\\Documents\\in-n-out\\dat_latlng.RData')
#####################
# lexis: linux environment
# Exercises: finish the code below out yourself
#####################
# load('')
library(leaflet)
#############
# Example 1: choropleths of tracts
# https://rstudio.github.io/leaflet/raster.html
#############
names(la_county_zoom_new)
# define 'pal_func' to control colors
palet_func = colorQuantile(
domain = la_county_zoom_new$Tot_Population_CEN_2010,
palette = "Reds",
na.color = "transparent"
)
# build map
leaflet(la_county_zoom_new) %>%
addPolygons(
stroke = FALSE, fillOpacity = 0.5, smoothFactor = 0,
color = ~palet_func(Tot_Population_CEN_2010)
) %>%
addLegend("bottomleft",
pal=palet_func,
values = ~Tot_Population_CEN_2010,
title = "Census 2010 Pop",
opacity = 1
)
# library(leaflet)
#############
# Example 1: Voronoi, Simpler
# https://rstudio.github.io/leaflet/raster.html
#############
# names(vor_agg_spdf_crop)
# names(pts_in_la)
# define 'palet_func' to control colors
palet_func = colorQuantile(
domain = vor_agg_spdf_crop$Tot_Population_CEN_2010,
palette = "Blues",
na.color = "transparent"
)
# build map
leaflet(vor_agg_spdf_crop) %>%
addPolygons(
color = ~palet_func(Tot_Population_CEN_2010),
fillOpacity = 0.6,
stroke = FALSE,
smoothFactor = 0 # optional smooth of polygon line
) %>%
addCircles(lng = pts_in_la$location.lng,
lat = pts_in_la$location.lat,
color='red',
weight = 5
) %>%
addLegend("bottomleft",
pal=palet_func,
values = ~Tot_Population_CEN_2010,
title = "Census 2010 Pop",
opacity = 1
) %>%
addProviderTiles("Stamen.Toner")
# On Your Own, Using above code example, try making different maps
# ?leaflet
# ?addPolygons
# https://rstudio.github.io/leaflet/
#############
# Example 2: Voronoi, More Options
# https://rstudio.github.io/leaflet/raster.html
#############
names(vor_agg_spdf_crop)
names(pts_in_la)
names(la_county_zoom_new)
# define 'palet_func' to control colors
palet_func = colorQuantile(
domain = vor_agg_spdf_crop$Tot_Population_CEN_2010,
palette = "Blues",
na.color = "transparent"
)
# build map
leaflet(vor_agg_spdf_crop) %>%
# cosmetics: black bounding box for underlying background
addRectangles(
lng1 = -118.95172, lat1 = 33.67688,
lng2 = -117.6464, lat2 = 34.3834,
stroke=FALSE, fillColor = "black",
fillOpacity = 1
) %>%
# gps
addCircles(
lng = pts_in_la$location.lng,
lat = pts_in_la$location.lat,
weight = 10,
fillOpacity = 0,
color = 'cyan'
) %>%
# voronoi polygons
addPolygons(
stroke = TRUE,
weight = 5,
color = 'cyan',
fillOpacity = 0,
smoothFactor = 0
) %>%
# tract polygons (overlay it on map)
# note: 'la_county_zoom' is second spdf source
addPolygons(data = la_county_zoom_new,
stroke = TRUE,
color = 'orange',
weight = 1) %>%
# show voronoi aggregated data
addPolygons(data = vor_agg_spdf_crop,
stroke = FALSE, fillOpacity = 0.8, smoothFactor = 0,
color = ~palet_func(Tot_Population_CEN_2010)
) %>%
# finishing sprinkles
addLegend("bottomleft",
pal=palet_func,
values = ~Tot_Population_CEN_2010,
title = "Census 2010 Pop",
opacity = 1
) %>%
# http://leaflet-extras.github.io/leaflet-providers/preview/
addProviderTiles("Stamen.Toner")
# addProviderTiles("OpenTopoMap")
# addProviderTiles("NASAGIBS.ViirsEarthAtNight2012") # zoom out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment