Skip to content

Instantly share code, notes, and snippets.

@davidaarmstrong
Created November 13, 2019 20:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidaarmstrong/db06462803bbb5b56f9465624c355d7b to your computer and use it in GitHub Desktop.
Save davidaarmstrong/db06462803bbb5b56f9465624c355d7b to your computer and use it in GitHub Desktop.
This code maps Canada Census Subdivisions from the 2016 Census onto Federal Electoral Districts from the 2013 representation order.
library(dplyr)
library(sf)
library(rgeos)
library(rgdal)
## Download: https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/bound-limit-2016-eng.cfm
## choose ArcGIS (.shp) and Federal Electoral Districts (2013 Representation Order)
## read in electoral district boundaries.
eb <- read_sf("FED_CA_2_2_ENG.shp")
## Download the digital map boundaries for the 2016 Census Subdivisions from here: https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/bound-limit-2016-eng.cfm
## Choose digital boundary files for census subdivisions
cb <- read_sf("lcsd000b16a_e/lcsd000b16a_e.shp")
## Download the Census Data
## I used this API: https://github.com/mountainMath/cancensus
## Look at the Readme page for the GitHub Repository to see how to obtain and incorporate the API Key.
library(cancensus)
## this is an example set of vectors
v <- c("v_CA16_5054", "v_CA16_5060", "v_CA16_5078", "v_CA16_5081", "v_CA16_5090", "v_CA16_3957", "v_CA16_401", "v_CA16_407", "v_CA16_2207")
## list the regions and filter them to the CSDs
regs <- list_census_regions("CA16") %>% filter(level == "CSD")
## initialize a list
cd <- list()
## Force R to download 100 regions at a time
s <- seq(1,5201,by=100)
s[1] <- 0
s[length(s)] <- nrow(regs)
for(i in 2:length(s)){
cd[[i]] <- get_census('CA16', regions = list(CSD=regs$region[(s[(i-1)]+1):s[i]]), level="CSD", vectors=v)
}
## Put all of the regions together
cd <- do.call(rbind, cd)
## Join the downloaded census data to the data in the census geographic data
## first, remove columns that will be duplicates
cd <- cd[,-(8:10)]
names(cd)[1] <- "CSDUID"
## merge datasets
cb <- left_join(cb, cd)
## Initialize a matrix to hold the mapping results
mat <- array(0, dim = c(nrow(eb), nrow(cb)))
## create an observation counter in each dataset
eb$obs <- 1:nrow(eb)
cb$obs <- 1:nrow(cb)
## convert both objects to spatial objects
eb <- as(eb, "Spatial")
cb <- as(cb, "Spatial")
## save the data from both objects
ebd <- eb@data
cbd <- cb@data
## Simplify and buffer to clear up problems
## with rGEOS errors
eb <- gSimplify(eb, tol=0.00001)
eb <- gBuffer(eb, byid=TRUE, width=0)
cb <- gSimplify(cb, tol=0.00001)
cb <- gBuffer(cb, byid=TRUE, width=0)
## NOTE: the loop below would take about 20 hours to run
## on a single process.
## loop over electoral boundaries
for(i in 1:length(eb)){
## loop over census subdivision boundaries
for(j in 1:length(cb)){
## subset boundaries so that each contains a single polygon set
tmp.eb <- eb[i]
tmp.csd <- cb[j]
## Find the intersection of the two polygons
inter <- gIntersection(tmp.eb, tmp.csd)
## If the intersection isn't null, create find the area of the intersectiono
if(!is.null(inter)){
r <- try(gArea(inter))
if(class(r) != "try-error"){
## if the area function didn't fail, put the result in the appropriate
## cell in the matrix
mat[i,j] <- r
}
}
}
# print a counter for electoral boundaries (347 in total)
cat(i, ", ", sep="")
}
## Next, calculate the area of the original census divisions
csd.areas <- gArea(cb, byid=TRUE)
## Find the proportion of overlap from each census subdivision in each electoral district
mat2 <- t(apply(mat, 1, function(x)x/csd.areas))
## take the census boundary data from above and use
## only those columns that represent the vectors downloaded
## from the census API
cd.mat <- as.matrix(cbd[,21:33])
## change missing data to 0. There were 14
## subdivisions that were in the map boundary file
## that were not in the data downloaded from the census.
## These are the only missing observations
cd.mat[which(is.na(cd.mat), arr.ind=TRUE)] <- 0
## Reapportion the data
FED.data <- mat2 %*% cd.mat
## Make the object a data frame and set the variable names
FED.data <- as.data.frame(FED.data)
names(FED.data) <- names(cbd)[21:33]
## Connect the new census-based FED data to the original data from the
## geographic boundary file.
FED_census <- cbind(ebd, FED.data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment