Created
November 13, 2019 20:38
-
-
Save davidaarmstrong/db06462803bbb5b56f9465624c355d7b to your computer and use it in GitHub Desktop.
This code maps Canada Census Subdivisions from the 2016 Census onto Federal Electoral Districts from the 2013 representation order.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(sf) | |
library(rgeos) | |
library(rgdal) | |
## Download: https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/bound-limit-2016-eng.cfm | |
## choose ArcGIS (.shp) and Federal Electoral Districts (2013 Representation Order) | |
## read in electoral district boundaries. | |
eb <- read_sf("FED_CA_2_2_ENG.shp") | |
## Download the digital map boundaries for the 2016 Census Subdivisions from here: https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/bound-limit-2016-eng.cfm | |
## Choose digital boundary files for census subdivisions | |
cb <- read_sf("lcsd000b16a_e/lcsd000b16a_e.shp") | |
## Download the Census Data | |
## I used this API: https://github.com/mountainMath/cancensus | |
## Look at the Readme page for the GitHub Repository to see how to obtain and incorporate the API Key. | |
library(cancensus) | |
## this is an example set of vectors | |
v <- c("v_CA16_5054", "v_CA16_5060", "v_CA16_5078", "v_CA16_5081", "v_CA16_5090", "v_CA16_3957", "v_CA16_401", "v_CA16_407", "v_CA16_2207") | |
## list the regions and filter them to the CSDs | |
regs <- list_census_regions("CA16") %>% filter(level == "CSD") | |
## initialize a list | |
cd <- list() | |
## Force R to download 100 regions at a time | |
s <- seq(1,5201,by=100) | |
s[1] <- 0 | |
s[length(s)] <- nrow(regs) | |
for(i in 2:length(s)){ | |
cd[[i]] <- get_census('CA16', regions = list(CSD=regs$region[(s[(i-1)]+1):s[i]]), level="CSD", vectors=v) | |
} | |
## Put all of the regions together | |
cd <- do.call(rbind, cd) | |
## Join the downloaded census data to the data in the census geographic data | |
## first, remove columns that will be duplicates | |
cd <- cd[,-(8:10)] | |
names(cd)[1] <- "CSDUID" | |
## merge datasets | |
cb <- left_join(cb, cd) | |
## Initialize a matrix to hold the mapping results | |
mat <- array(0, dim = c(nrow(eb), nrow(cb))) | |
## create an observation counter in each dataset | |
eb$obs <- 1:nrow(eb) | |
cb$obs <- 1:nrow(cb) | |
## convert both objects to spatial objects | |
eb <- as(eb, "Spatial") | |
cb <- as(cb, "Spatial") | |
## save the data from both objects | |
ebd <- eb@data | |
cbd <- cb@data | |
## Simplify and buffer to clear up problems | |
## with rGEOS errors | |
eb <- gSimplify(eb, tol=0.00001) | |
eb <- gBuffer(eb, byid=TRUE, width=0) | |
cb <- gSimplify(cb, tol=0.00001) | |
cb <- gBuffer(cb, byid=TRUE, width=0) | |
## NOTE: the loop below would take about 20 hours to run | |
## on a single process. | |
## loop over electoral boundaries | |
for(i in 1:length(eb)){ | |
## loop over census subdivision boundaries | |
for(j in 1:length(cb)){ | |
## subset boundaries so that each contains a single polygon set | |
tmp.eb <- eb[i] | |
tmp.csd <- cb[j] | |
## Find the intersection of the two polygons | |
inter <- gIntersection(tmp.eb, tmp.csd) | |
## If the intersection isn't null, create find the area of the intersectiono | |
if(!is.null(inter)){ | |
r <- try(gArea(inter)) | |
if(class(r) != "try-error"){ | |
## if the area function didn't fail, put the result in the appropriate | |
## cell in the matrix | |
mat[i,j] <- r | |
} | |
} | |
} | |
# print a counter for electoral boundaries (347 in total) | |
cat(i, ", ", sep="") | |
} | |
## Next, calculate the area of the original census divisions | |
csd.areas <- gArea(cb, byid=TRUE) | |
## Find the proportion of overlap from each census subdivision in each electoral district | |
mat2 <- t(apply(mat, 1, function(x)x/csd.areas)) | |
## take the census boundary data from above and use | |
## only those columns that represent the vectors downloaded | |
## from the census API | |
cd.mat <- as.matrix(cbd[,21:33]) | |
## change missing data to 0. There were 14 | |
## subdivisions that were in the map boundary file | |
## that were not in the data downloaded from the census. | |
## These are the only missing observations | |
cd.mat[which(is.na(cd.mat), arr.ind=TRUE)] <- 0 | |
## Reapportion the data | |
FED.data <- mat2 %*% cd.mat | |
## Make the object a data frame and set the variable names | |
FED.data <- as.data.frame(FED.data) | |
names(FED.data) <- names(cbd)[21:33] | |
## Connect the new census-based FED data to the original data from the | |
## geographic boundary file. | |
FED_census <- cbind(ebd, FED.data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment