Created
August 24, 2017 04:38
-
-
Save aaronschiff/a016ff945864f7dd90d60d39c2342200 to your computer and use it in GitHub Desktop.
Simple graphs and maps of Christchurch property sales data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Map Christchurch property data | |
# ----------------------------------------------------------------------------- | |
# Setup | |
rm(list = ls()) | |
library(magrittr) | |
library(lubridate) | |
library(tidyverse) | |
library(scales) | |
library(sf) | |
library(viridis) | |
source("clean-ggplot-theme.R") | |
# ----------------------------------------------------------------------------- | |
# ----------------------------------------------------------------------------- | |
# Process data | |
# Read data | |
dat <- read_csv("dat-chc-combined.csv") %>% | |
mutate(year_price_agreed = year(date_price_agreed)) | |
# Throw away data before 2009 and after 2016 | |
dat %<>% filter(year_price_agreed > 2008, | |
year_price_agreed < 2017) | |
# Calculate geocoding rate and total sales by year | |
annual_info <- dat %>% | |
mutate(is_geocoded = ifelse(!is.na(latitude) & !is.na(longitude), 1, 0)) %>% | |
group_by(year_price_agreed) %>% | |
summarise(geocoded = sum(is_geocoded), | |
total = n()) %>% | |
mutate(rate = geocoded / total) %>% | |
arrange(year_price_agreed) | |
# ----------------------------------------------------------------------------- | |
# ----------------------------------------------------------------------------- | |
# Graphs | |
all_sales_dots <- dat %>% | |
ggplot() + | |
geom_point(aes(x = date_price_agreed, y = gross_sale_price), | |
colour = rgb(0, 0, 0, 0.1), | |
fill = rgb(0, 0, 0, 0.05), | |
size = 1) + | |
scale_x_date(breaks = date_breaks("1 year"), | |
labels = date_format("%Y")) + | |
scale_y_continuous(limits = c(0, 3000000), | |
breaks = seq(0, 3000000, 500000), | |
labels = comma) + | |
xlab("") + | |
ylab("") + | |
ggtitle("Gross selling price") + | |
clean_theme(base_size = 24) | |
png("outputs/all-sales-dots.png", width = 1600, height = 1000) | |
print(all_sales_dots) | |
dev.off() | |
all_sales_dots_log <- dat %>% | |
ggplot() + | |
geom_point(aes(x = date_price_agreed, y = gross_sale_price), | |
colour = rgb(0, 0, 0, 0.1), | |
fill = rgb(0, 0, 0, 0.05), | |
size = 1) + | |
geom_smooth(aes(x = date_price_agreed, y = gross_sale_price), se = FALSE, colour = "yellow") + | |
geom_hline(yintercept = 0) + | |
scale_x_date(breaks = date_breaks("1 year"), | |
labels = date_format("%Y")) + | |
scale_y_log10(breaks = c(0, 1, 100000, 250000, 500000, 1000000, 2000000, 5000000, 10000000), | |
labels = comma) + | |
xlab("") + | |
ylab("") + | |
ggtitle("Gross selling price (log scale)") + | |
clean_theme(base_size = 24) | |
png("outputs/all-sales-dots-log.png", width = 1600, height = 1000) | |
print(all_sales_dots_log) | |
dev.off() | |
monthly_sales_count <- dat %>% | |
mutate(date_month = ymd(paste(year(date_price_agreed), month(date_price_agreed), "1", sep = "-"))) %>% | |
group_by(date_month) %>% | |
summarise(sales_count = n()) %>% | |
ggplot() + | |
geom_bar(aes(x = date_month, y = sales_count), | |
stat = "identity", | |
width = 20) + | |
scale_x_date(breaks = date_breaks("1 year") , | |
labels = date_format("%Y")) + | |
scale_y_continuous(breaks = seq(0, 1750, 250), | |
labels = comma) + | |
xlab("") + | |
ylab("") + | |
ggtitle("Monthly sales count") + | |
clean_theme(base_size = 24) | |
png("outputs/monthly-sales-count.png", width = 1600, height = 1000) | |
print(monthly_sales_count) | |
dev.off() | |
# ----------------------------------------------------------------------------- | |
# ----------------------------------------------------------------------------- | |
# Mapping of geocoded sales | |
# Randomly drop data between 2009 and 2016 to compensate for geocoding rate variation | |
annual_info %<>% | |
mutate(num_rows_to_remove = round((rate - min(rate)) * total)) | |
dat_adjusted <- dat %>% | |
filter(!is.na(latitude), !is.na(longitude)) | |
for (y in 2009:2016) { | |
if (as.numeric(annual_info[annual_info$year_price_agreed == y, "num_rows_to_remove"]) > 0) { | |
dat_adjusted <- dat_adjusted[-sample(which(dat_adjusted$year_price_agreed == y), | |
as.numeric(annual_info[annual_info$year_price_agreed == y, "num_rows_to_remove"])), ] | |
} | |
} | |
# Background setup | |
coastline <- st_read("nz-coastlines-and-islands-polygons-topo-150k/nz-coastlines-and-islands-polygons-topo-150k.shp") | |
rivers <- st_read("nz-river-polygons-topo-150k/nz-river-polygons-topo-150k.shp") | |
lakes <- st_read("nz-lake-polygons-topo-150k/nz-lake-polygons-topo-150k.shp") | |
roads <- st_read("lds-nz-road-centerlines-topo-150k-SHP/nz-road-centerlines-topo-150k.shp") | |
# Colour setup | |
colour_breaks <- c(0, 250000, 500000, 1000000, 5000000, Inf) | |
colour_palette <- plasma(length(colour_breaks) - 1, begin = 0.25) | |
dat_adjusted$colour <- colour_palette[as.numeric(cut(dat_adjusted$gross_sale_price, breaks = colour_breaks))] | |
land_colour <- "black" | |
water_colour <- rgb(220/255, 220/255, 220/255) | |
roads_colour <- rgb(140/255, 140/255, 140/255) | |
text_colour <- "black" | |
# Small multiples dot map by year | |
png("outputs/map-combined.png", width = 1600, height = 12800) | |
par(mfrow = c(8, 1), | |
bg = water_colour, | |
family = "Helvetica") | |
for (y in 2009:2016) { | |
map_dat <- dat_adjusted %>% | |
filter(year_price_agreed == y) | |
plot(0, 0, | |
type = "n", | |
ylim = c(5167500, 5192500), | |
xlim = c(1560000, 1585000), | |
axes = FALSE) | |
plot(coastline, col = land_colour, border = NA, add = TRUE) | |
points(map_dat$longitude, map_dat$latitude, | |
pch = 19, | |
cex = 1.2, | |
col = map_dat$colour) | |
plot(rivers, col = water_colour, border = water_colour, add = TRUE) | |
#plot(lakes, col = water_colour, border = water_colour, add = TRUE) | |
plot(roads, col = roads_colour, add = TRUE) | |
legend(x = 1579000, | |
y = 5193000, | |
legend = c("$0 to $250k", | |
"$250k to $500k", | |
"$500k to $1m", | |
"$1m to $5m", | |
"Over $5m"), | |
title = as.character(y), | |
fill = colour_palette, | |
border = NA, | |
text.col = text_colour, | |
cex = 4, | |
xjust = 0, | |
bty = "n") | |
} | |
dev.off() | |
# ----------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment