Skip to content

Instantly share code, notes, and snippets.

View giacecco's full-sized avatar

Gianfranco Cecconi giacecco

View GitHub Profile
# load ONS' pcd11_oa11_lsoa11_msoa11_lad11_ew_lu dataset and get the list of output areas for the
# relevant postcodes
onspcenum_oas <- unique((read.csv(pipe("unzip -qc \"/Users/giacecco/Data archive/ons/pcd11_oa11_lsoa11_msoa11_lad11_ew_lu/150725/Postcodes_(Enumeration)_(2011)_to_output_areas_(2011)_to_lower_layer_SOA_(2011)_to_middle_layer_SOA_(2011)_to_local_authority_districts_(2011)_E+W_lookup.zip\" *.csv"), na.strings = c(""), stringsAsFactors = F) %>% filter(PCD7 %in% union(lrpp_postcodes$pcd, oson_postcodes$pcd)))$OA11CD)
# load ONS' NSAL for the relevant output areas
onsnsal_property_no <- collect(tbl(src_postgres("olaf"), sql(paste0("SELECT COUNT(uprn) AS no_of_properties FROM ons_nsal WHERE oa11 IN (", paste0("'", onspcenum_oas, "'", collapse=", "), ")", collapse = ""))))$no_of_properties
@giacecco
giacecco / 1.R
Last active August 29, 2015 14:25
require(dplyr)
require(RPostgreSQL)
target_town <- "SOUTHAMPTON"
# get all postcodes for the target town from LRPP, remembering that the town in LRPP refers to the
# "post town", hence include smaller populated places (hamlets etc.) that are described as
# independent entities in OSON
lrpp_postcodes <- collect(tbl(src_postgres("olaf"), sql(paste0("SELECT DISTINCT pcd FROM lr_pp WHERE town = '", target_town , "'", collapse = ""))))
# calculate how many postcodes per OA and add to above table
postcodes_per_oa <- properties_per_pcd %>% group_by(OA11CD) %>% summarise(postcodes_no = n())
properties_per_pcd <- left_join(properties_per_pcd, postcodes_per_oa, by = c("OA11CD" = "OA11CD"))
# calculate the number of properties per postcode as an even distribution vs the OA
properties_per_pcd$pcd_properties_no <- properties_per_pcd$oa_properties_no / properties_per_pcd$postcodes_no
# how many addresses in my postcode?
properties_per_pcd[properties_per_pcd$PCD7 == "HP4 3JD", c("PCD7", "pcd_properties_no")]
@giacecco
giacecco / 1.R
Last active August 29, 2015 14:25
require(dplyr)
require(RPostgreSQL)
# load the pcd11_oa11_lsoa11_msoa11_lad11_ew_lu dataset
pcenum <- read.csv(pipe("unzip -qc \"/Users/giacecco/Data archive/ons/pcd11_oa11_lsoa11_msoa11_lad11_ew_lu/150725/Postcodes_(Enumeration)_(2011)_to_output_areas_(2011)_to_lower_layer_SOA_(2011)_to_middle_layer_SOA_(2011)_to_local_authority_districts_(2011)_E+W_lookup.zip\" *.csv"), na.strings = c(""), stringsAsFactors = F)
# use NSAL calculate how many properties (UPRNs) per Output Area
properties_per_oa <- collect(tbl(src_postgres("olaf"), sql("SELECT oa11, COUNT(uprn) AS oa_properties_no FROM ons_nsal WHERE ctry ~ '^E|^W' GROUP BY oa11")))
# join the two datasets
SELECT COUNT(pcd) FROM ons_pd WHERE doterm IS NULL;
# what _populated places_ rather than Berkhamsted does OSON list for the same postcodes?
missing_berkhamsted_postcodes_in_oson <- setdiff(target_town_lr_postcodes, target_town_postcodes)
alien_postcodes <- collect(os_on_postcodes %>% filter(postcode %in% missing_berkhamsted_postcodes_in_oson))
unique(alien_postcodes$populated_place)
# how many Berkhamsted postcodes from LRPP are not Berkhamsted postcodes in OSON?
sum(!target_town_lr_postcodes %in% target_town_postcodes)
@giacecco
giacecco / 4.R
Last active August 29, 2015 14:25
# are the LRPP postcode current? how many of them I can't find in OSON _at all_, whatever the
# populated place they're associated to?
sum(!target_town_lr_postcodes %in% collect(os_on_postcodes)$postcode)
sum(!target_town_lr_postcodes %in% collect(os_on_postcodes)$postcode) / length(target_town_lr_postcodes)
# sanity check vs the entries in Land Registry's Price Paid
lr_pp <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT town, pcd FROM lr_pp"))
target_town_lr_postcodes <- collect(lr_pp %>% filter(town == target_town))$pcd
# display how many we got
length(target_town_lr_postcodes)
# display the average number of addresses expected in each postcode
total_addresses_in_town / length(target_town_postcodes)