This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Spatial Test Set for Logistic Regression | |
# Author: Simon Kassel | |
tool_exec <- function(in_params, out_params){ | |
# Install and load necessary packages | |
arc.progress_label("Loading packages...") | |
for (p in c("caret", "sp", "plyr", "maptools", "ggplot2", "dplyr")) { | |
if (!requireNamespace(p, quietly = TRUE)) | |
install.packages(p) | |
suppressMessages(library(p, character.only = TRUE)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# R MAPPING TUTORIAL ------------------------------------------------------ | |
# Simon Kassel & Ken Steif | |
# GLOBAL OPTIONS ---------------------------------------------------------- | |
options(scipen = "999") | |
options(stringsAsFactors = FALSE) | |
# PACKAGES ---------------------------------------------------------------- | |
for (p in c("ggplot2", "ggmap", "RColorBrewer", "maptools", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Let's look at the distribution of home values | |
home_value_hist <- ggplot(sf, aes(SalePrice)) + | |
geom_histogram(fill=palette_1_colors) + | |
xlab("Sale Price($)") + ylab("Count") + | |
scale_fill_manual(values=pallete_1_colors) + | |
plotTheme() + | |
labs(x="Sale Price($)", y="Count", title="Distribution of San Francisco home prices", | |
subtitle="Nominal prices (2009 - 2015)", | |
caption="Source: San Francisco Office of the Assessor-Recorder\n@KenSteif & @SimonKassel") + | |
scale_x_continuous(labels = comma) + scale_y_continuous(labels = comma) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Seems like there are some outliers so lets remove anything greater than 2.5 st. deviations from the mean | |
sf <- sf[which(sf$SalePrice < mean(sf$SalePrice) + (2.5 * sd(sf$SalePrice))), ] | |
#violin plots | |
home_value_violin <- ggplot(sf, aes(x=SaleYr, y=SalePrice, fill=SaleYr)) + geom_violin(color = "grey50") + | |
xlab("Sale Price($)") + ylab("Count") + | |
scale_fill_manual(values=palette_7_colors) + | |
stat_summary(fun.y=mean, geom="point", size=2, colour="white") + | |
plotTheme() + theme(legend.position="none") + | |
scale_y_continuous(labels = comma) + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a bouding box | |
bbox <- neighb@bbox | |
# Add a margin between the extent of our data the edge of the basemap | |
sf_bbox <- c(left = bbox[1, 1] - .01, bottom = bbox[2, 1] - .005, | |
right = bbox[1, 2] + .01, top = bbox[2, 2] + .005) | |
# General basemap | |
basemap <- get_stamenmap( | |
bbox = sf_bbox, | |
zoom = 13, | |
maptype = "toner-lite") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's map the sale prices per year | |
prices_mapped_by_year <- ggmap(basemap) + | |
geom_point(data = sf, aes(x = long, y = lat, color = SalePrice), | |
size = .25, alpha = 0.6) + | |
facet_wrap(~SaleYr, scales = "fixed", ncol = 4) + | |
coord_map() + | |
mapTheme() + theme(legend.position = c(.85, .25)) + | |
scale_color_gradientn("Sale Price", | |
colors = palette_8_colors, | |
labels = scales::dollar_format(prefix = "$")) + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Thats a lot of information. Let's subset to get just two years. We'll stack them this time and increase the point size | |
prices_mapped_2009_2015 <- ggmap(basemap) + | |
geom_point(data = subset(sf, sf$SaleYr == 2015 | sf$SaleYr == 2009), aes(x = long, y = lat, color = SalePrice), | |
size = 1, alpha = 0.75) + | |
facet_wrap(~SaleYr, scales = "fixed", ncol = 1) + | |
coord_map() + | |
mapTheme() + | |
scale_color_gradientn("Sale Price", | |
colors = palette_8_colors, | |
labels = scales::dollar_format(prefix = "$")) + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's look at just one neighborhood: The Mission District | |
# Data set of just sales for the "Inner Mission neighborhood" | |
missionSales <- sf[which(sf$Neighborhood == "Inner Mission"), ] | |
# We'll need a new basemap at the appropriate scale | |
centroid_lon <- median(missionSales$long) | |
centroid_lat <- median(missionSales$lat) | |
missionBasemap <- get_map(location = c(lon = centroid_lon, lat = centroid_lat), source = "stamen", | |
maptype = "toner-lite", zoom = 15) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We are going to transform our data from the property to the neighborhood level. | |
sf.summarized <- ddply(sf, c("Neighborhood", "SaleYr"), summarise, | |
medianPrice = median(SalePrice), | |
saleCount = length(SaleYr), | |
sdPrice = sd(SalePrice), | |
minusSd = medianPrice - sdPrice, | |
plusSD = medianPrice + sdPrice, | |
.progress = "text") | |
# Take a look at the resulting dataset | |
head(sf.summarized, 10) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plot neighborhood median home value over time | |
neighb_map <- ggmap(basemap) + | |
geom_polygon(data = sf.summarized_tidy, | |
aes(x = long, y = lat, group = group, fill = medianPrice), | |
colour = "white", alpha = 0.75, size = 0.25) + | |
scale_fill_gradientn("Neighborhood \nMedian \nSale Price", | |
colors = palette_8_colors, | |
labels = scales::dollar_format(prefix = "$")) + | |
mapTheme() + theme(legend.position = c(.85, .25)) + coord_map() + | |
facet_wrap(~SaleYr, nrow = 2) + |
OlderNewer