Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
An R script to prepare the data for ukdataexplorer.com/census/ Based on work by Alex Singleton, http://www.alex-singleton.com/2011-census-open-atlas-project/
# Script to create data for http://ukdataexplorer.com/census/
# Author James Trimble, james.trimble at yahoo.co.uk
#
# This code is based on Alex Singleton's work:
# http://rpubs.com/alexsingleton/openatlas
# http://www.alex-singleton.com/2011-census-open-atlas-project/
#
# The script takes around an hour to run. It could be made faster!
# The script requires a lookup table and shapefile from
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page
# The shapefile of wards neds to be converted to lat-long coordinates,
# for example using the four commented lines of code below.
# See https://stat.ethz.ch/pipermail/r-sig-geo/2013-February/017656.html
# ogrListLayers('WD_DEC_2011_EW_BGC.shp')
# map <- readOGR('WD_DEC_2011_EW_BGC.shp', "WD_DEC_2011_EW_BGC")
# map <- spTransform(map,CRS("+init=epsg:4326"))
# writeOGR(map, 'wards.shp', layer="wards", driver="ESRI Shapefile")
library(rgdal)
library(classInt)
library(plyr)
working_dir <- 'working_directory'
setwd(working_dir)
# Create a list of tables
table_list <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW",
"KS107EW", "KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA",
"KS208WA", "KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW",
"KS405EW", "KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW",
"KS606EW", "KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW",
"KS613EW")
table_list <- table_list[substr(table_list,6,7)!="WA"]
# Create a lookup-array of statistical populations (the denominator for percentages)
stat_populations <- c()
for (n in table_list) {
fn <- paste(n, 'META0.CSV', sep='')
d <- read.csv(fn)
stat_populations[n] <- as.character(d$StatisticalPopulation[1])
}
table_list <- tolower(paste(table_list, "_2011_ward", sep = ""))
# Download Files
for (n in 1:length(table_list)) {
file <- as.character(table_list[n])
temp <- tempfile(fileext = ".zip")
download.file(paste("http://www.nomisweb.co.uk/output/census/2011/", file,
".zip", sep = ""), temp)
unzip(temp, junkpaths = TRUE)
unlink(temp)
csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DATA.CSV", sep = "")
assign(file, read.csv(csv_file))
}
Variable_Desc <- NULL
for (n in 1:length(table_list)) {
file <- as.character(table_list[n])
csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DESC0.CSV", sep = "")
temp <- read.csv(csv_file)
Variable_Desc <- rbind(Variable_Desc, temp)
remove(temp)
}
# Create lookup
Key_Statistics_2011 <- as.data.frame(ks101ew_2011_ward[, 1])
colnames(Key_Statistics_2011) <- "GeographyCode"
# Merge Loop
for (n in 1:length(table_list)) {
Key_Statistics_2011 <- data.frame(Key_Statistics_2011, get(as.character(table_list[n]))[match(get(as.character(table_list[n]))[,
"GeographyCode"], get(as.character(table_list[n]))[, "GeographyCode"]),
])
Key_Statistics_2011$GeographyCode.1 <- NULL
}
# Create key stats table List
a <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW", "KS107EW",
"KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA", "KS208WA",
"KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW", "KS405EW",
"KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW", "KS606EW",
"KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW", "KS613EW")
table_lookup <- c("Usual resident population", "Age structure", "Marital and civil partnership status",
"Living arrangements", "Household composition",
"Adults not in employment and dependent children and persons with long-term health problem or disability for all households",
"Lone parent households with dependent children", "Ethnic group", "National identity",
"Country of birth", "Passports held", "Household language", "Welsh language skills",
"Welsh language profile", "Religion", "Health and provision of unpaid care",
"Dwellings, household spaces and accommodation type", "Tenure", "Rooms, bedrooms and central heating",
"Car or van availability", "Communal establishment residents", "Qualifications and students",
"Economic activity", "Economic activity - Males", "Economic activity - Females",
"Hours worked", "Industry", "Industry - Males", "Industry - Females", "Occupation",
"Occupation - Males", "Occupation - Females", "NS-SeC", "NS-SeC - Males",
"NS-SeC - Females")
names(table_lookup) <- a
# download from
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page
ward_reg_lkup <- read.csv('lookup/CTRY11_GOR10_CTY11_LAD11_WD11_UK_LU.csv')
create_files_for_region <- function(region_name, ward_list_for_region) {
short_region_name <- tolower(gsub(' ', '', region_name))
dir_name <- paste('region_output/', short_region_name, sep='')
dir.create(dir_name)
data_dir_name <- paste(dir_name, '/data', sep='')
dir.create(data_dir_name)
data_for_region <- Key_Statistics_2011[Key_Statistics_2011$GeographyCode %in% ward_list_for_region,]
Variable_Desc <- Variable_Desc[Variable_Desc$ColumnVariableMeasurementUnit!="Count", ]
json_str <- '[\n'
sep_str <- ''
for (i in 1:nrow(Variable_Desc)) {
col_code <- as.character(Variable_Desc[i, "ColumnVariableCode"])
if ( length(unique(as.numeric(as.character(data_for_region[, col_code])))) < 3 ) {
next # skip this variable if there are less than three different values
}
brks <- classIntervals(as.numeric(as.character(data_for_region[, col_code])), n = 6, style = "fisher",
unique = TRUE)$brks
var_json_str <- paste(
'',
"{\"code\": \"",
Variable_Desc[i, "ColumnVariableCode"],
"\",\n \"table\": \"",
table_lookup[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
"\",\n \"table_denominator\": \"",
stat_populations[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
"\",\n \"unit\": \"",
Variable_Desc[i, "ColumnVariableMeasurementUnit"],
"\",\n \"desc\": \"",
Variable_Desc[i, "ColumnVariableDescription"],
"\",\n \"breaks\": [",
paste(brks, collapse=","),
"]\n}",
sep='')
json_str <- paste(json_str, sep_str, var_json_str, sep='')
sep_str <- ',\n'
data_to_save <- data_for_region[, c(1, which(names(data_for_region)==col_code))]
data_json <- '{\n'
sep_str2 <- ''
for (j in 1:nrow(data_to_save)) {
data_val <- data_to_save[j, 2]
if (data_val != '..') {
data_json <- paste(
data_json,
sep_str2,
' "',
data_to_save[j, 1],
'": ',
data_val,
'\n',
sep = '')
sep_str2 <- ','
}
}
data_json = paste(data_json, '}', sep='')
fileConn<-file(paste(data_dir_name, '/', col_code, '.json', sep=''))
writeLines(data_json, fileConn)
close(fileConn)
}
json_str <- paste(json_str, ']', sep='')
fileConn<-file(paste(data_dir_name, '/', 'info.json', sep=''))
writeLines(json_str, fileConn)
close(fileConn)
map <- map[map@data$WD11CD %in% ward_list_for_region, ]
writeOGR(map, paste(data_dir_name, "/wards.json", sep=''), layer=paste("wards1", short_region_name, sep=''), driver="GeoJSON")
}
map <- readOGR('geo/wards/wards.shp', 'wards')
englandRegionNames <- levels(ward_reg_lkup$GOR10NM)
englandRegionNames <- englandRegionNames[englandRegionNames != '']
for (region_name in englandRegionNames) {
wards_in_region <- ward_reg_lkup[ward_reg_lkup$GOR10NM==region_name, ]$WD11CD
create_files_for_region(region_name, wards_in_region)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment