Skip to content

Instantly share code, notes, and snippets.

@aaronschiff
Created August 24, 2017 04:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aaronschiff/a016ff945864f7dd90d60d39c2342200 to your computer and use it in GitHub Desktop.
Save aaronschiff/a016ff945864f7dd90d60d39c2342200 to your computer and use it in GitHub Desktop.
Simple graphs and maps of Christchurch property sales data
# Map Christchurch property data
# -----------------------------------------------------------------------------
# Setup
rm(list = ls())
library(magrittr)
library(lubridate)
library(tidyverse)
library(scales)
library(sf)
library(viridis)
source("clean-ggplot-theme.R")
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Process data
# Read data
dat <- read_csv("dat-chc-combined.csv") %>%
mutate(year_price_agreed = year(date_price_agreed))
# Throw away data before 2009 and after 2016
dat %<>% filter(year_price_agreed > 2008,
year_price_agreed < 2017)
# Calculate geocoding rate and total sales by year
annual_info <- dat %>%
mutate(is_geocoded = ifelse(!is.na(latitude) & !is.na(longitude), 1, 0)) %>%
group_by(year_price_agreed) %>%
summarise(geocoded = sum(is_geocoded),
total = n()) %>%
mutate(rate = geocoded / total) %>%
arrange(year_price_agreed)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Graphs
all_sales_dots <- dat %>%
ggplot() +
geom_point(aes(x = date_price_agreed, y = gross_sale_price),
colour = rgb(0, 0, 0, 0.1),
fill = rgb(0, 0, 0, 0.05),
size = 1) +
scale_x_date(breaks = date_breaks("1 year"),
labels = date_format("%Y")) +
scale_y_continuous(limits = c(0, 3000000),
breaks = seq(0, 3000000, 500000),
labels = comma) +
xlab("") +
ylab("") +
ggtitle("Gross selling price") +
clean_theme(base_size = 24)
png("outputs/all-sales-dots.png", width = 1600, height = 1000)
print(all_sales_dots)
dev.off()
all_sales_dots_log <- dat %>%
ggplot() +
geom_point(aes(x = date_price_agreed, y = gross_sale_price),
colour = rgb(0, 0, 0, 0.1),
fill = rgb(0, 0, 0, 0.05),
size = 1) +
geom_smooth(aes(x = date_price_agreed, y = gross_sale_price), se = FALSE, colour = "yellow") +
geom_hline(yintercept = 0) +
scale_x_date(breaks = date_breaks("1 year"),
labels = date_format("%Y")) +
scale_y_log10(breaks = c(0, 1, 100000, 250000, 500000, 1000000, 2000000, 5000000, 10000000),
labels = comma) +
xlab("") +
ylab("") +
ggtitle("Gross selling price (log scale)") +
clean_theme(base_size = 24)
png("outputs/all-sales-dots-log.png", width = 1600, height = 1000)
print(all_sales_dots_log)
dev.off()
monthly_sales_count <- dat %>%
mutate(date_month = ymd(paste(year(date_price_agreed), month(date_price_agreed), "1", sep = "-"))) %>%
group_by(date_month) %>%
summarise(sales_count = n()) %>%
ggplot() +
geom_bar(aes(x = date_month, y = sales_count),
stat = "identity",
width = 20) +
scale_x_date(breaks = date_breaks("1 year") ,
labels = date_format("%Y")) +
scale_y_continuous(breaks = seq(0, 1750, 250),
labels = comma) +
xlab("") +
ylab("") +
ggtitle("Monthly sales count") +
clean_theme(base_size = 24)
png("outputs/monthly-sales-count.png", width = 1600, height = 1000)
print(monthly_sales_count)
dev.off()
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Mapping of geocoded sales
# Randomly drop data between 2009 and 2016 to compensate for geocoding rate variation
annual_info %<>%
mutate(num_rows_to_remove = round((rate - min(rate)) * total))
dat_adjusted <- dat %>%
filter(!is.na(latitude), !is.na(longitude))
for (y in 2009:2016) {
if (as.numeric(annual_info[annual_info$year_price_agreed == y, "num_rows_to_remove"]) > 0) {
dat_adjusted <- dat_adjusted[-sample(which(dat_adjusted$year_price_agreed == y),
as.numeric(annual_info[annual_info$year_price_agreed == y, "num_rows_to_remove"])), ]
}
}
# Background setup
coastline <- st_read("nz-coastlines-and-islands-polygons-topo-150k/nz-coastlines-and-islands-polygons-topo-150k.shp")
rivers <- st_read("nz-river-polygons-topo-150k/nz-river-polygons-topo-150k.shp")
lakes <- st_read("nz-lake-polygons-topo-150k/nz-lake-polygons-topo-150k.shp")
roads <- st_read("lds-nz-road-centerlines-topo-150k-SHP/nz-road-centerlines-topo-150k.shp")
# Colour setup
colour_breaks <- c(0, 250000, 500000, 1000000, 5000000, Inf)
colour_palette <- plasma(length(colour_breaks) - 1, begin = 0.25)
dat_adjusted$colour <- colour_palette[as.numeric(cut(dat_adjusted$gross_sale_price, breaks = colour_breaks))]
land_colour <- "black"
water_colour <- rgb(220/255, 220/255, 220/255)
roads_colour <- rgb(140/255, 140/255, 140/255)
text_colour <- "black"
# Small multiples dot map by year
png("outputs/map-combined.png", width = 1600, height = 12800)
par(mfrow = c(8, 1),
bg = water_colour,
family = "Helvetica")
for (y in 2009:2016) {
map_dat <- dat_adjusted %>%
filter(year_price_agreed == y)
plot(0, 0,
type = "n",
ylim = c(5167500, 5192500),
xlim = c(1560000, 1585000),
axes = FALSE)
plot(coastline, col = land_colour, border = NA, add = TRUE)
points(map_dat$longitude, map_dat$latitude,
pch = 19,
cex = 1.2,
col = map_dat$colour)
plot(rivers, col = water_colour, border = water_colour, add = TRUE)
#plot(lakes, col = water_colour, border = water_colour, add = TRUE)
plot(roads, col = roads_colour, add = TRUE)
legend(x = 1579000,
y = 5193000,
legend = c("$0 to $250k",
"$250k to $500k",
"$500k to $1m",
"$1m to $5m",
"Over $5m"),
title = as.character(y),
fill = colour_palette,
border = NA,
text.col = text_colour,
cex = 4,
xjust = 0,
bty = "n")
}
dev.off()
# -----------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment