Skip to content

Instantly share code, notes, and snippets.

@jamestrimble
Last active January 17, 2017 19:03
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jamestrimble/6917136 to your computer and use it in GitHub Desktop.
Save jamestrimble/6917136 to your computer and use it in GitHub Desktop.
An R script to prepare the data for ukdataexplorer.com/census/ Based on work by Alex Singleton, http://www.alex-singleton.com/2011-census-open-atlas-project/
# Script to create data for http://ukdataexplorer.com/census/
# Author James Trimble, james.trimble at yahoo.co.uk
#
# This code is based on Alex Singleton's work:
# http://rpubs.com/alexsingleton/openatlas
# http://www.alex-singleton.com/2011-census-open-atlas-project/
#
# The script takes around an hour to run. It could be made faster!
# The script requires a lookup table and shapefile from
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page
# The shapefile of wards neds to be converted to lat-long coordinates,
# for example using the four commented lines of code below.
# See https://stat.ethz.ch/pipermail/r-sig-geo/2013-February/017656.html
# ogrListLayers('WD_DEC_2011_EW_BGC.shp')
# map <- readOGR('WD_DEC_2011_EW_BGC.shp', "WD_DEC_2011_EW_BGC")
# map <- spTransform(map,CRS("+init=epsg:4326"))
# writeOGR(map, 'wards.shp', layer="wards", driver="ESRI Shapefile")
library(rgdal)
library(classInt)
library(plyr)
working_dir <- 'working_directory'
setwd(working_dir)
# Create a list of tables
table_list <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW",
"KS107EW", "KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA",
"KS208WA", "KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW",
"KS405EW", "KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW",
"KS606EW", "KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW",
"KS613EW")
table_list <- table_list[substr(table_list,6,7)!="WA"]
# Create a lookup-array of statistical populations (the denominator for percentages)
stat_populations <- c()
for (n in table_list) {
fn <- paste(n, 'META0.CSV', sep='')
d <- read.csv(fn)
stat_populations[n] <- as.character(d$StatisticalPopulation[1])
}
table_list <- tolower(paste(table_list, "_2011_ward", sep = ""))
# Download Files
for (n in 1:length(table_list)) {
file <- as.character(table_list[n])
temp <- tempfile(fileext = ".zip")
download.file(paste("http://www.nomisweb.co.uk/output/census/2011/", file,
".zip", sep = ""), temp)
unzip(temp, junkpaths = TRUE)
unlink(temp)
csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DATA.CSV", sep = "")
assign(file, read.csv(csv_file))
}
Variable_Desc <- NULL
for (n in 1:length(table_list)) {
file <- as.character(table_list[n])
csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DESC0.CSV", sep = "")
temp <- read.csv(csv_file)
Variable_Desc <- rbind(Variable_Desc, temp)
remove(temp)
}
# Create lookup
Key_Statistics_2011 <- as.data.frame(ks101ew_2011_ward[, 1])
colnames(Key_Statistics_2011) <- "GeographyCode"
# Merge Loop
for (n in 1:length(table_list)) {
Key_Statistics_2011 <- data.frame(Key_Statistics_2011, get(as.character(table_list[n]))[match(get(as.character(table_list[n]))[,
"GeographyCode"], get(as.character(table_list[n]))[, "GeographyCode"]),
])
Key_Statistics_2011$GeographyCode.1 <- NULL
}
# Create key stats table List
a <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW", "KS107EW",
"KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA", "KS208WA",
"KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW", "KS405EW",
"KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW", "KS606EW",
"KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW", "KS613EW")
table_lookup <- c("Usual resident population", "Age structure", "Marital and civil partnership status",
"Living arrangements", "Household composition",
"Adults not in employment and dependent children and persons with long-term health problem or disability for all households",
"Lone parent households with dependent children", "Ethnic group", "National identity",
"Country of birth", "Passports held", "Household language", "Welsh language skills",
"Welsh language profile", "Religion", "Health and provision of unpaid care",
"Dwellings, household spaces and accommodation type", "Tenure", "Rooms, bedrooms and central heating",
"Car or van availability", "Communal establishment residents", "Qualifications and students",
"Economic activity", "Economic activity - Males", "Economic activity - Females",
"Hours worked", "Industry", "Industry - Males", "Industry - Females", "Occupation",
"Occupation - Males", "Occupation - Females", "NS-SeC", "NS-SeC - Males",
"NS-SeC - Females")
names(table_lookup) <- a
# download from
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page
ward_reg_lkup <- read.csv('lookup/CTRY11_GOR10_CTY11_LAD11_WD11_UK_LU.csv')
create_files_for_region <- function(region_name, ward_list_for_region) {
short_region_name <- tolower(gsub(' ', '', region_name))
dir_name <- paste('region_output/', short_region_name, sep='')
dir.create(dir_name)
data_dir_name <- paste(dir_name, '/data', sep='')
dir.create(data_dir_name)
data_for_region <- Key_Statistics_2011[Key_Statistics_2011$GeographyCode %in% ward_list_for_region,]
Variable_Desc <- Variable_Desc[Variable_Desc$ColumnVariableMeasurementUnit!="Count", ]
json_str <- '[\n'
sep_str <- ''
for (i in 1:nrow(Variable_Desc)) {
col_code <- as.character(Variable_Desc[i, "ColumnVariableCode"])
if ( length(unique(as.numeric(as.character(data_for_region[, col_code])))) < 3 ) {
next # skip this variable if there are less than three different values
}
brks <- classIntervals(as.numeric(as.character(data_for_region[, col_code])), n = 6, style = "fisher",
unique = TRUE)$brks
var_json_str <- paste(
'',
"{\"code\": \"",
Variable_Desc[i, "ColumnVariableCode"],
"\",\n \"table\": \"",
table_lookup[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
"\",\n \"table_denominator\": \"",
stat_populations[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
"\",\n \"unit\": \"",
Variable_Desc[i, "ColumnVariableMeasurementUnit"],
"\",\n \"desc\": \"",
Variable_Desc[i, "ColumnVariableDescription"],
"\",\n \"breaks\": [",
paste(brks, collapse=","),
"]\n}",
sep='')
json_str <- paste(json_str, sep_str, var_json_str, sep='')
sep_str <- ',\n'
data_to_save <- data_for_region[, c(1, which(names(data_for_region)==col_code))]
data_json <- '{\n'
sep_str2 <- ''
for (j in 1:nrow(data_to_save)) {
data_val <- data_to_save[j, 2]
if (data_val != '..') {
data_json <- paste(
data_json,
sep_str2,
' "',
data_to_save[j, 1],
'": ',
data_val,
'\n',
sep = '')
sep_str2 <- ','
}
}
data_json = paste(data_json, '}', sep='')
fileConn<-file(paste(data_dir_name, '/', col_code, '.json', sep=''))
writeLines(data_json, fileConn)
close(fileConn)
}
json_str <- paste(json_str, ']', sep='')
fileConn<-file(paste(data_dir_name, '/', 'info.json', sep=''))
writeLines(json_str, fileConn)
close(fileConn)
map <- map[map@data$WD11CD %in% ward_list_for_region, ]
writeOGR(map, paste(data_dir_name, "/wards.json", sep=''), layer=paste("wards1", short_region_name, sep=''), driver="GeoJSON")
}
map <- readOGR('geo/wards/wards.shp', 'wards')
englandRegionNames <- levels(ward_reg_lkup$GOR10NM)
englandRegionNames <- englandRegionNames[englandRegionNames != '']
for (region_name in englandRegionNames) {
wards_in_region <- ward_reg_lkup[ward_reg_lkup$GOR10NM==region_name, ]$WD11CD
create_files_for_region(region_name, wards_in_region)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment