Skip to content

Instantly share code, notes, and snippets.

@geotheory
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save geotheory/9b03e9c5d56595e7e8a1 to your computer and use it in GitHub Desktop.
Save geotheory/9b03e9c5d56595e7e8a1 to your computer and use it in GitHub Desktop.
This script demo's how to process the 2011 UK Census shapefiles and data to produce graphics used in http://geotheory.co.uk/blog/2014/06/23/gender-in-urban-workplaces/
require(maptools)
require(fields)
require(ggplot2)
require(stringr)
bng = "+init=epsg:27700"
workdir = '[change to working directory path]'
setwd(workdir)
R.utils::mkdirs('wz')
workdir = paste0(workdir, '/wz')
setwd(workdir)
# download & unzip (may have to do manually)
wz_centroids_url = 'https://geoportal.statistics.gov.uk/Docs/Boundaries/Workplace_zones_(E+W)_2011_Population_Weighted_Centroids.zip'
wz_boundaries_url = 'https://geoportal.statistics.gov.uk/Docs/Boundaries/Workplace_zones_(E+W)_2011_Boundaries_(Generalised_Clipped).zip'
download.file(wz_centroids_url, paste0(workdir, "/workplace_zones_centroids.zip"), method="internal", mode="wb")
download.file(wz_boundaries_url, paste0(workdir, "/workplace_zones_boundaries.zip"), method="internal", mode="wb")
unzip('workplace_zones_centroids.zip'); unzip('workplace_zones_boundaries.zip')
unlink(c('workplace_zones_centroids.zip','workplace_zones_boundaries.zip')) # delete zips
# read in and tidy up shapefiles
wz = readShapePoints("WZ_2011_EW_PWC.shp", proj4string=CRS(bng)) # centroids
wz = wz[order(wz$WZ11CD),] # reorder by zone code
names(wz) = 'code'
head(wz@data)
wzb = readShapePoly("WZ_2011_EW_BGC.shp", proj4string=CRS(bng)) # boundaries
wzb = wzb[order(wzb$WZ11CD),] # reorder
dim(wzb) # data table dimensions
head(wzb@data)
all(wzb$WZ11CD == wz$code) # check codes match other data
wzb = wzb[,c(1,3)] # remove non-required data columns
names(wzb) = c('code','name')
head(wzb@data)
# download file 'census workplace zones 2011' from http://www.nomisweb.co.uk/census/2011/wp1101ew
# copy file ('bulk.csv') to working directory
datain = read.csv('bulk.csv', stringsAsFactors=F)
View(datain)
names(datain)[1:6] = c('year','geog','code','total','men','women')
datain[1:6,3:6] # data we will use
datain = datain[order(datain$code),] # reorder
# check data and shapefile zones match up
all(datain$code == wz$code)
# extract centroid coordinates from wz spatial object
wz_centroids = data.frame(coordinates(wz))
names(wz_centroids) = c('e','n') # eastings and northings
head(wz_centroids)
# working data
d = cbind(datain[,3:6], wz_centroids)
# round coordinates and aggregate to 1km resolution
d$e_round = round(d$e, -3)
d$n_round = round(d$n, -3)
d_agg = aggregate(total ~ e_round + n_round, d, sum)
d_agg = d_agg[order(d_agg$total, decreasing=T),]
dim(d_agg); head(d_agg)
# algorithm to find 28 highest density km squares not within a 20 km
# radius of a higher-density square
topcities = d_agg[1,] # first entry is top of results
k=1 # to increase to 2 before first use
for(i in 2:28){
while(TRUE){
k = k+1
p = d_agg[k,] # new d_agg row
tst = FALSE # test variable for checking if new d_agg row is within dist radius of an existing entry
# iterate through results already compiled
for(j in 1:nrow(topcities)){
m = t(matrix(c(p[,1:2], topcities[j,1:2]),2)) # matrix of 2 point coordinates
dis = as.numeric(dist(m))
if(dis < 20000) tst = TRUE # distance threshold in meters
}
if(tst) next else { # next moves while on to next loop
topcities[i,] = d_agg[k,] # new entry has been found
break # fully breaks while loop
}
}
}
symbols(topcities[,1:2], squares=sqrt(topcities$total), bg='#ff000050', asp=T) # see how they distribute
names(topcities) = c('e','n','pop')
# get city names from boundaries data (I then re-named some manually)
for(i in 1:nrow(topcities)){
# calc distances to centroids
dists = t(rdist(topcities[i,1:2], wz_centroids))[,1]
mindist = min(dists)
topcities$name[i] = as.character(wzb$name[grep(mindist, dists)])
}
# let's see where they are in relation to the data
par(mai=c(0,0,0,0)) # remove plot margins
plot(d$e, d$n, pch='.', asp=T)
points(topcities$e, topcities$n, col='red', pch=16)
text(topcities[,1:2], labels=topcities$name, col='blue', cex=.7)
# calculate quantile categories for colour scaling later on
N = 7
d$femprop = 100 * d$women / d$total
brks = quantile(d$femprop, probs = seq(0, 1, length.out=N+1))
qtls = cut(d$femprop, brks, include.lowest=T, labels = 1:N)
qtl_labs = levels(cut(d$femprop, brks, include.lowest=T))
d$women_labs = qtl_labs[qtls]
d$women_labs = ordered(d$women_labs, levels=rev(qtl_labs))
d_uk = d # save full England & Wales dataset
# filter 'd' table to records within 3.5kms of a top city
# apply vectorises the code to avoid slow loop (i.e. MUCH quicker..)
d$city = apply(d, 1, function(row){
distx = abs(as.numeric(row[5]) - topcities$e)
disty = abs(as.numeric(row[6]) - topcities$n)
if(any(distx <= 3500 & disty <= 3500)) {
dists = sqrt(distx^2 + disty^2)
n = match(min(dists), dists)
return(topcities$name[n])
} else return("")
})
d = d[d$city != '',]
plot(d$e, d$n, pch='.', asp=T)
# create custom theme
customtheme = theme_bw()
customtheme$axis.text = element_blank()
customtheme$axis.text = element_blank()
customtheme$axis.ticks = element_blank()
customtheme$axis.ticks = element_blank()
customtheme$aspect.ratio = 1
# create a colour scale
cols = colorRampPalette(c("navy", "grey70", 'pink'))(N+1)
# run the plot
ggplot(d, aes(e, n, col=factor(women_labs))) + geom_point(size=1.5) +
scale_colour_manual(values=cols[as.numeric(d$women_labs)]) +
facet_wrap(~ city, scales='free', nrow=4) + customtheme
# density plot (not correct as calculating on square areas not circles, but to demo method..)
d$dist = apply(d, 1, function(row) rdist(topcities[topcities$name == as.character(row[11]), 1:2], t(as.numeric(row[5:6])))[1,1])
ggplot(d, aes(dist, femprop)) + geom_smooth(method="loess")
ggplot(d, aes(dist, femprop)) + geom_smooth(method="loess") + facet_wrap(~ city, scales='free_y')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment