jamestrimble/census_data_for_online_map.r

## census_data_for_online_map.r
# Script to create data for http://ukdataexplorer.com/census/
# Author James Trimble, james.trimble at yahoo.co.uk
#
# This code is based on Alex Singleton's work:
# http://rpubs.com/alexsingleton/openatlas
# http://www.alex-singleton.com/2011-census-open-atlas-project/
#
# The script takes around an hour to run. It could be made faster!
# The script requires a lookup table and shapefile from
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page

# The shapefile of wards neds to be converted to lat-long coordinates,
# for example using the four commented lines of code below.
# See https://stat.ethz.ch/pipermail/r-sig-geo/2013-February/017656.html
# ogrListLayers('WD_DEC_2011_EW_BGC.shp')
# map <- readOGR('WD_DEC_2011_EW_BGC.shp', "WD_DEC_2011_EW_BGC")
# map <- spTransform(map,CRS("+init=epsg:4326"))
# writeOGR(map, 'wards.shp', layer="wards", driver="ESRI Shapefile")


library(rgdal)
library(classInt)
library(plyr)

working_dir <- 'working_directory'
setwd(working_dir)

# Create a list of tables
table_list <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW",
    "KS107EW", "KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA",
    "KS208WA", "KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW",
    "KS405EW", "KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW",
    "KS606EW", "KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW",
    "KS613EW")
table_list <- table_list[substr(table_list,6,7)!="WA"]

# Create a lookup-array of statistical populations (the denominator for percentages)
stat_populations <- c()
for (n in table_list) {
  fn <- paste(n, 'META0.CSV', sep='')
  d <- read.csv(fn)
  stat_populations[n] <- as.character(d$StatisticalPopulation[1])
}

table_list <- tolower(paste(table_list, "_2011_ward", sep = ""))

# Download Files
for (n in 1:length(table_list)) {
    file <- as.character(table_list[n])
    temp <- tempfile(fileext = ".zip")
    download.file(paste("http://www.nomisweb.co.uk/output/census/2011/", file,
        ".zip", sep = ""), temp)
    unzip(temp, junkpaths = TRUE)
    unlink(temp)
    csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DATA.CSV", sep = "")
    assign(file, read.csv(csv_file))
}

Variable_Desc <- NULL
for (n in 1:length(table_list)) {
    file <- as.character(table_list[n])
    csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DESC0.CSV", sep = "")
    temp <- read.csv(csv_file)
    Variable_Desc <- rbind(Variable_Desc, temp)
    remove(temp)
}

# Create lookup
Key_Statistics_2011 <- as.data.frame(ks101ew_2011_ward[, 1])
colnames(Key_Statistics_2011) <- "GeographyCode"

# Merge Loop
for (n in 1:length(table_list)) {
    Key_Statistics_2011 <- data.frame(Key_Statistics_2011, get(as.character(table_list[n]))[match(get(as.character(table_list[n]))[,
        "GeographyCode"], get(as.character(table_list[n]))[, "GeographyCode"]),
        ])
    Key_Statistics_2011$GeographyCode.1 <- NULL
}

# Create key stats table List
a <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW", "KS107EW",
    "KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA", "KS208WA",
    "KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW", "KS405EW",
    "KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW", "KS606EW",
    "KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW", "KS613EW")
table_lookup <- c("Usual resident population", "Age structure", "Marital and civil partnership status",
    "Living arrangements", "Household composition",
    "Adults not in employment and dependent children and persons with long-term health problem or disability for all households",
    "Lone parent households with dependent children", "Ethnic group", "National identity",
    "Country of birth", "Passports held", "Household language", "Welsh language skills",
    "Welsh language profile", "Religion", "Health and provision of unpaid care",
    "Dwellings, household spaces and accommodation type", "Tenure", "Rooms, bedrooms and central heating",
    "Car or van availability", "Communal establishment residents", "Qualifications and students",
    "Economic activity", "Economic activity - Males", "Economic activity - Females",
    "Hours worked", "Industry", "Industry - Males", "Industry - Females", "Occupation",
    "Occupation - Males", "Occupation - Females", "NS-SeC", "NS-SeC - Males",
    "NS-SeC - Females")
names(table_lookup) <- a

# download from
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page
ward_reg_lkup <- read.csv('lookup/CTRY11_GOR10_CTY11_LAD11_WD11_UK_LU.csv')

create_files_for_region <- function(region_name, ward_list_for_region) {
      short_region_name <- tolower(gsub(' ', '', region_name))
      dir_name <- paste('region_output/', short_region_name, sep='')
      dir.create(dir_name)
      data_dir_name <- paste(dir_name, '/data', sep='')
      dir.create(data_dir_name)

    data_for_region <- Key_Statistics_2011[Key_Statistics_2011$GeographyCode %in% ward_list_for_region,]

	Variable_Desc <- Variable_Desc[Variable_Desc$ColumnVariableMeasurementUnit!="Count", ]

	json_str <- '[\n'
	sep_str <- ''
	for (i in 1:nrow(Variable_Desc)) {
	   col_code <- as.character(Variable_Desc[i, "ColumnVariableCode"])

	   if ( length(unique(as.numeric(as.character(data_for_region[, col_code])))) < 3 ) {
		 next  # skip this variable if there are less than three different values
	   }
	   brks <- classIntervals(as.numeric(as.character(data_for_region[, col_code])), n = 6, style = "fisher",
					unique = TRUE)$brks
	   var_json_str <- paste(
				 '',
				 "{\"code\": \"",
				 Variable_Desc[i, "ColumnVariableCode"],
				 "\",\n \"table\": \"",
				 table_lookup[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
				 "\",\n \"table_denominator\": \"",
				 stat_populations[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
				 "\",\n \"unit\": \"",
				 Variable_Desc[i, "ColumnVariableMeasurementUnit"],
				 "\",\n \"desc\": \"",
				 Variable_Desc[i, "ColumnVariableDescription"],
				 "\",\n \"breaks\": [",
				 paste(brks, collapse=","),
				 "]\n}",
				 sep='')
	   json_str <- paste(json_str, sep_str, var_json_str, sep='')
	   sep_str <- ',\n'

	   data_to_save <- data_for_region[, c(1, which(names(data_for_region)==col_code))]

	   data_json <- '{\n'
	   sep_str2 <- ''
	   for (j in 1:nrow(data_to_save)) {
		 data_val <- data_to_save[j, 2]
		 if (data_val != '..') {
		   data_json <- paste(
			  data_json,
			  sep_str2,
			  ' "',
			  data_to_save[j, 1],
			  '": ',
			  data_val,
			  '\n',
			  sep = '')
		   sep_str2 <- ','
		 }
	   }
	   data_json = paste(data_json, '}', sep='')
	   fileConn<-file(paste(data_dir_name, '/', col_code, '.json', sep=''))
	   writeLines(data_json, fileConn)
	   close(fileConn)
	}
	json_str <- paste(json_str, ']', sep='')
	fileConn<-file(paste(data_dir_name, '/', 'info.json', sep=''))
	writeLines(json_str, fileConn)
	close(fileConn)

	map <- map[map@data$WD11CD %in% ward_list_for_region, ]
	writeOGR(map, paste(data_dir_name, "/wards.json", sep=''), layer=paste("wards1", short_region_name, sep=''), driver="GeoJSON")
}

map <- readOGR('geo/wards/wards.shp', 'wards')
englandRegionNames <- levels(ward_reg_lkup$GOR10NM)
englandRegionNames <- englandRegionNames[englandRegionNames != '']

for (region_name in englandRegionNames) {
    wards_in_region <- ward_reg_lkup[ward_reg_lkup$GOR10NM==region_name, ]$WD11CD
    create_files_for_region(region_name, wards_in_region)
}
	# Script to create data for http://ukdataexplorer.com/census/
	# Author James Trimble, james.trimble at yahoo.co.uk
	#
	# This code is based on Alex Singleton's work:
	# http://rpubs.com/alexsingleton/openatlas
	# http://www.alex-singleton.com/2011-census-open-atlas-project/
	#
	# The script takes around an hour to run. It could be made faster!
	# The script requires a lookup table and shapefile from
	# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page

	# The shapefile of wards neds to be converted to lat-long coordinates,
	# for example using the four commented lines of code below.
	# See https://stat.ethz.ch/pipermail/r-sig-geo/2013-February/017656.html
	# ogrListLayers('WD_DEC_2011_EW_BGC.shp')
	# map <- readOGR('WD_DEC_2011_EW_BGC.shp', "WD_DEC_2011_EW_BGC")
	# map <- spTransform(map,CRS("+init=epsg:4326"))
	# writeOGR(map, 'wards.shp', layer="wards", driver="ESRI Shapefile")


	library(rgdal)
	library(classInt)
	library(plyr)

	working_dir <- 'working_directory'
	setwd(working_dir)

	# Create a list of tables
	table_list <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW",
	"KS107EW", "KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA",
	"KS208WA", "KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW",
	"KS405EW", "KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW",
	"KS606EW", "KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW",
	"KS613EW")
	table_list <- table_list[substr(table_list,6,7)!="WA"]

	# Create a lookup-array of statistical populations (the denominator for percentages)
	stat_populations <- c()
	for (n in table_list) {
	fn <- paste(n, 'META0.CSV', sep='')
	d <- read.csv(fn)
	stat_populations[n] <- as.character(d$StatisticalPopulation[1])
	}

	table_list <- tolower(paste(table_list, "_2011_ward", sep = ""))

	# Download Files
	for (n in 1:length(table_list)) {
	file <- as.character(table_list[n])
	temp <- tempfile(fileext = ".zip")
	download.file(paste("http://www.nomisweb.co.uk/output/census/2011/", file,
	".zip", sep = ""), temp)
	unzip(temp, junkpaths = TRUE)
	unlink(temp)
	csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DATA.CSV", sep = "")
	assign(file, read.csv(csv_file))
	}

	Variable_Desc <- NULL
	for (n in 1:length(table_list)) {
	file <- as.character(table_list[n])
	csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DESC0.CSV", sep = "")
	temp <- read.csv(csv_file)
	Variable_Desc <- rbind(Variable_Desc, temp)
	remove(temp)
	}

	# Create lookup
	Key_Statistics_2011 <- as.data.frame(ks101ew_2011_ward[, 1])
	colnames(Key_Statistics_2011) <- "GeographyCode"

	# Merge Loop
	for (n in 1:length(table_list)) {
	Key_Statistics_2011 <- data.frame(Key_Statistics_2011, get(as.character(table_list[n]))[match(get(as.character(table_list[n]))[,
	"GeographyCode"], get(as.character(table_list[n]))[, "GeographyCode"]),
	])
	Key_Statistics_2011$GeographyCode.1 <- NULL
	}

	# Create key stats table List
	a <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW", "KS107EW",
	"KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA", "KS208WA",
	"KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW", "KS405EW",
	"KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW", "KS606EW",
	"KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW", "KS613EW")
	table_lookup <- c("Usual resident population", "Age structure", "Marital and civil partnership status",
	"Living arrangements", "Household composition",
	"Adults not in employment and dependent children and persons with long-term health problem or disability for all households",
	"Lone parent households with dependent children", "Ethnic group", "National identity",
	"Country of birth", "Passports held", "Household language", "Welsh language skills",
	"Welsh language profile", "Religion", "Health and provision of unpaid care",
	"Dwellings, household spaces and accommodation type", "Tenure", "Rooms, bedrooms and central heating",
	"Car or van availability", "Communal establishment residents", "Qualifications and students",
	"Economic activity", "Economic activity - Males", "Economic activity - Females",
	"Hours worked", "Industry", "Industry - Males", "Industry - Females", "Occupation",
	"Occupation - Males", "Occupation - Females", "NS-SeC", "NS-SeC - Males",
	"NS-SeC - Females")
	names(table_lookup) <- a

	# download from
	# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page
	ward_reg_lkup <- read.csv('lookup/CTRY11_GOR10_CTY11_LAD11_WD11_UK_LU.csv')

	create_files_for_region <- function(region_name, ward_list_for_region) {
	short_region_name <- tolower(gsub(' ', '', region_name))
	dir_name <- paste('region_output/', short_region_name, sep='')
	dir.create(dir_name)
	data_dir_name <- paste(dir_name, '/data', sep='')
	dir.create(data_dir_name)

	data_for_region <- Key_Statistics_2011[Key_Statistics_2011$GeographyCode %in% ward_list_for_region,]

	Variable_Desc <- Variable_Desc[Variable_Desc$ColumnVariableMeasurementUnit!="Count", ]

	json_str <- '[\n'
	sep_str <- ''
	for (i in 1:nrow(Variable_Desc)) {
	col_code <- as.character(Variable_Desc[i, "ColumnVariableCode"])

	if ( length(unique(as.numeric(as.character(data_for_region[, col_code])))) < 3 ) {
	next # skip this variable if there are less than three different values
	}
	brks <- classIntervals(as.numeric(as.character(data_for_region[, col_code])), n = 6, style = "fisher",
	unique = TRUE)$brks
	var_json_str <- paste(
	'',
	"{\"code\": \"",
	Variable_Desc[i, "ColumnVariableCode"],
	"\",\n \"table\": \"",
	table_lookup[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
	"\",\n \"table_denominator\": \"",
	stat_populations[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)],
	"\",\n \"unit\": \"",
	Variable_Desc[i, "ColumnVariableMeasurementUnit"],
	"\",\n \"desc\": \"",
	Variable_Desc[i, "ColumnVariableDescription"],
	"\",\n \"breaks\": [",
	paste(brks, collapse=","),
	"]\n}",
	sep='')
	json_str <- paste(json_str, sep_str, var_json_str, sep='')
	sep_str <- ',\n'

	data_to_save <- data_for_region[, c(1, which(names(data_for_region)==col_code))]

	data_json <- '{\n'
	sep_str2 <- ''
	for (j in 1:nrow(data_to_save)) {
	data_val <- data_to_save[j, 2]
	if (data_val != '..') {
	data_json <- paste(
	data_json,
	sep_str2,
	' "',
	data_to_save[j, 1],
	'": ',
	data_val,
	'\n',
	sep = '')
	sep_str2 <- ','
	}
	}
	data_json = paste(data_json, '}', sep='')
	fileConn<-file(paste(data_dir_name, '/', col_code, '.json', sep=''))
	writeLines(data_json, fileConn)
	close(fileConn)
	}
	json_str <- paste(json_str, ']', sep='')
	fileConn<-file(paste(data_dir_name, '/', 'info.json', sep=''))
	writeLines(json_str, fileConn)
	close(fileConn)

	map <- map[map@data$WD11CD %in% ward_list_for_region, ]
	writeOGR(map, paste(data_dir_name, "/wards.json", sep=''), layer=paste("wards1", short_region_name, sep=''), driver="GeoJSON")
	}

	map <- readOGR('geo/wards/wards.shp', 'wards')
	englandRegionNames <- levels(ward_reg_lkup$GOR10NM)
	englandRegionNames <- englandRegionNames[englandRegionNames != '']

	for (region_name in englandRegionNames) {
	wards_in_region <- ward_reg_lkup[ward_reg_lkup$GOR10NM==region_name, ]$WD11CD
	create_files_for_region(region_name, wards_in_region)
	}