Skip to content

Instantly share code, notes, and snippets.

View simonkassel's full-sized avatar

Simon Kassel simonkassel

View GitHub Profile
# Spatial Test Set for Logistic Regression
# Author: Simon Kassel
tool_exec <- function(in_params, out_params){
# Install and load necessary packages
arc.progress_label("Loading packages...")
for (p in c("caret", "sp", "plyr", "maptools", "ggplot2", "dplyr")) {
if (!requireNamespace(p, quietly = TRUE))
install.packages(p)
suppressMessages(library(p, character.only = TRUE))
# R MAPPING TUTORIAL ------------------------------------------------------
# Simon Kassel & Ken Steif
# GLOBAL OPTIONS ----------------------------------------------------------
options(scipen = "999")
options(stringsAsFactors = FALSE)
# PACKAGES ----------------------------------------------------------------
for (p in c("ggplot2", "ggmap", "RColorBrewer", "maptools",
#Let's look at the distribution of home values
home_value_hist <- ggplot(sf, aes(SalePrice)) +
geom_histogram(fill=palette_1_colors) +
xlab("Sale Price($)") + ylab("Count") +
scale_fill_manual(values=pallete_1_colors) +
plotTheme() +
labs(x="Sale Price($)", y="Count", title="Distribution of San Francisco home prices",
subtitle="Nominal prices (2009 - 2015)",
caption="Source: San Francisco Office of the Assessor-Recorder\n@KenSteif & @SimonKassel") +
scale_x_continuous(labels = comma) + scale_y_continuous(labels = comma)
# Seems like there are some outliers so lets remove anything greater than 2.5 st. deviations from the mean
sf <- sf[which(sf$SalePrice < mean(sf$SalePrice) + (2.5 * sd(sf$SalePrice))), ]
#violin plots
home_value_violin <- ggplot(sf, aes(x=SaleYr, y=SalePrice, fill=SaleYr)) + geom_violin(color = "grey50") +
xlab("Sale Price($)") + ylab("Count") +
scale_fill_manual(values=palette_7_colors) +
stat_summary(fun.y=mean, geom="point", size=2, colour="white") +
plotTheme() + theme(legend.position="none") +
scale_y_continuous(labels = comma) +
# Create a bouding box
bbox <- neighb@bbox
# Add a margin between the extent of our data the edge of the basemap
sf_bbox <- c(left = bbox[1, 1] - .01, bottom = bbox[2, 1] - .005,
right = bbox[1, 2] + .01, top = bbox[2, 2] + .005)
# General basemap
basemap <- get_stamenmap(
bbox = sf_bbox,
zoom = 13,
maptype = "toner-lite")
# Let's map the sale prices per year
prices_mapped_by_year <- ggmap(basemap) +
geom_point(data = sf, aes(x = long, y = lat, color = SalePrice),
size = .25, alpha = 0.6) +
facet_wrap(~SaleYr, scales = "fixed", ncol = 4) +
coord_map() +
mapTheme() + theme(legend.position = c(.85, .25)) +
scale_color_gradientn("Sale Price",
colors = palette_8_colors,
labels = scales::dollar_format(prefix = "$")) +
# Thats a lot of information. Let's subset to get just two years. We'll stack them this time and increase the point size
prices_mapped_2009_2015 <- ggmap(basemap) +
geom_point(data = subset(sf, sf$SaleYr == 2015 | sf$SaleYr == 2009), aes(x = long, y = lat, color = SalePrice),
size = 1, alpha = 0.75) +
facet_wrap(~SaleYr, scales = "fixed", ncol = 1) +
coord_map() +
mapTheme() +
scale_color_gradientn("Sale Price",
colors = palette_8_colors,
labels = scales::dollar_format(prefix = "$")) +
# Let's look at just one neighborhood: The Mission District
# Data set of just sales for the "Inner Mission neighborhood"
missionSales <- sf[which(sf$Neighborhood == "Inner Mission"), ]
# We'll need a new basemap at the appropriate scale
centroid_lon <- median(missionSales$long)
centroid_lat <- median(missionSales$lat)
missionBasemap <- get_map(location = c(lon = centroid_lon, lat = centroid_lat), source = "stamen",
maptype = "toner-lite", zoom = 15)
# We are going to transform our data from the property to the neighborhood level.
sf.summarized <- ddply(sf, c("Neighborhood", "SaleYr"), summarise,
medianPrice = median(SalePrice),
saleCount = length(SaleYr),
sdPrice = sd(SalePrice),
minusSd = medianPrice - sdPrice,
plusSD = medianPrice + sdPrice,
.progress = "text")
# Take a look at the resulting dataset
head(sf.summarized, 10)
# Plot neighborhood median home value over time
neighb_map <- ggmap(basemap) +
geom_polygon(data = sf.summarized_tidy,
aes(x = long, y = lat, group = group, fill = medianPrice),
colour = "white", alpha = 0.75, size = 0.25) +
scale_fill_gradientn("Neighborhood \nMedian \nSale Price",
colors = palette_8_colors,
labels = scales::dollar_format(prefix = "$")) +
mapTheme() + theme(legend.position = c(.85, .25)) + coord_map() +
facet_wrap(~SaleYr, nrow = 2) +