This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load ONS' pcd11_oa11_lsoa11_msoa11_lad11_ew_lu dataset and get the list of output areas for the | |
# relevant postcodes | |
onspcenum_oas <- unique((read.csv(pipe("unzip -qc \"/Users/giacecco/Data archive/ons/pcd11_oa11_lsoa11_msoa11_lad11_ew_lu/150725/Postcodes_(Enumeration)_(2011)_to_output_areas_(2011)_to_lower_layer_SOA_(2011)_to_middle_layer_SOA_(2011)_to_local_authority_districts_(2011)_E+W_lookup.zip\" *.csv"), na.strings = c(""), stringsAsFactors = F) %>% filter(PCD7 %in% union(lrpp_postcodes$pcd, oson_postcodes$pcd)))$OA11CD) | |
# load ONS' NSAL for the relevant output areas | |
onsnsal_property_no <- collect(tbl(src_postgres("olaf"), sql(paste0("SELECT COUNT(uprn) AS no_of_properties FROM ons_nsal WHERE oa11 IN (", paste0("'", onspcenum_oas, "'", collapse=", "), ")", collapse = ""))))$no_of_properties |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(dplyr) | |
require(RPostgreSQL) | |
target_town <- "SOUTHAMPTON" | |
# get all postcodes for the target town from LRPP, remembering that the town in LRPP refers to the | |
# "post town", hence include smaller populated places (hamlets etc.) that are described as | |
# independent entities in OSON | |
lrpp_postcodes <- collect(tbl(src_postgres("olaf"), sql(paste0("SELECT DISTINCT pcd FROM lr_pp WHERE town = '", target_town , "'", collapse = "")))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate how many postcodes per OA and add to above table | |
postcodes_per_oa <- properties_per_pcd %>% group_by(OA11CD) %>% summarise(postcodes_no = n()) | |
properties_per_pcd <- left_join(properties_per_pcd, postcodes_per_oa, by = c("OA11CD" = "OA11CD")) | |
# calculate the number of properties per postcode as an even distribution vs the OA | |
properties_per_pcd$pcd_properties_no <- properties_per_pcd$oa_properties_no / properties_per_pcd$postcodes_no | |
# how many addresses in my postcode? | |
properties_per_pcd[properties_per_pcd$PCD7 == "HP4 3JD", c("PCD7", "pcd_properties_no")] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(dplyr) | |
require(RPostgreSQL) | |
# load the pcd11_oa11_lsoa11_msoa11_lad11_ew_lu dataset | |
pcenum <- read.csv(pipe("unzip -qc \"/Users/giacecco/Data archive/ons/pcd11_oa11_lsoa11_msoa11_lad11_ew_lu/150725/Postcodes_(Enumeration)_(2011)_to_output_areas_(2011)_to_lower_layer_SOA_(2011)_to_middle_layer_SOA_(2011)_to_local_authority_districts_(2011)_E+W_lookup.zip\" *.csv"), na.strings = c(""), stringsAsFactors = F) | |
# use NSAL calculate how many properties (UPRNs) per Output Area | |
properties_per_oa <- collect(tbl(src_postgres("olaf"), sql("SELECT oa11, COUNT(uprn) AS oa_properties_no FROM ons_nsal WHERE ctry ~ '^E|^W' GROUP BY oa11"))) | |
# join the two datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT COUNT(pcd) FROM ons_pd WHERE doterm IS NULL; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# what _populated places_ rather than Berkhamsted does OSON list for the same postcodes? | |
missing_berkhamsted_postcodes_in_oson <- setdiff(target_town_lr_postcodes, target_town_postcodes) | |
alien_postcodes <- collect(os_on_postcodes %>% filter(postcode %in% missing_berkhamsted_postcodes_in_oson)) | |
unique(alien_postcodes$populated_place) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# how many Berkhamsted postcodes from LRPP are not Berkhamsted postcodes in OSON? | |
sum(!target_town_lr_postcodes %in% target_town_postcodes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# are the LRPP postcode current? how many of them I can't find in OSON _at all_, whatever the | |
# populated place they're associated to? | |
sum(!target_town_lr_postcodes %in% collect(os_on_postcodes)$postcode) | |
sum(!target_town_lr_postcodes %in% collect(os_on_postcodes)$postcode) / length(target_town_lr_postcodes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# sanity check vs the entries in Land Registry's Price Paid | |
lr_pp <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT town, pcd FROM lr_pp")) | |
target_town_lr_postcodes <- collect(lr_pp %>% filter(town == target_town))$pcd | |
# display how many we got | |
length(target_town_lr_postcodes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# display the average number of addresses expected in each postcode | |
total_addresses_in_town / length(target_town_postcodes) |