Skip to content

Instantly share code, notes, and snippets.

View giacecco's full-sized avatar

Gianfranco Cecconi giacecco

View GitHub Profile
@giacecco
giacecco / 1.R
Last active August 29, 2015 14:25
require(dplyr)
require(RPostgreSQL)
target_town <- "BERKHAMSTED"
# I fetch all postcodes and populated_places from OS' Open Names
os_on_postcodes <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names WHERE local_type = 'Postcode' AND populated_place != ''"))
# I filter by target town
# TODO: what to do about those parts of town like Northchurch to Berkhamsted that appear as
# now, how many addresses and what % of the total can I get from LRPP over the estimated total?
lr_pp_with_house_names <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT street, town, paon, saon FROM lr_pp WHERE street IS NOT NULL"))
no_addresses_I_know_from_lrpp <- collect(lr_pp_with_house_names %>% filter(town == target_town) %>% summarise(no_of_addresses = n()))$no_of_addresses
perc_I_know_from_lrpp = no_addresses_I_know_from_lrpp / total_addresses_in_town
# and display what I am missing
total_addresses_in_town - no_addresses_I_know_from_lrpp
1 - perc_I_know_from_lrpp
# read Ordnance Survey's Open Names to get all streets belonging to a 'populated place'
os_on <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names_wog WHERE populated_place != '' AND local_type IN ('Section Of Named Road', 'Named Road', 'Numbered Road', 'Section Of Numbered Road')"))
# filter by target town
target_town_streets <- os_on %>% filter(toupper(populated_place) == toupper(target_town))
# total estimate of no. of streets in the target town
no_of_streets <- collect(target_town_streets %>% summarise(tot = n()))$tot
# display the total estimate of no. of addresses in the target town
@giacecco
giacecco / 2.R
Last active August 29, 2015 14:25
# calculated a weighted average of the max aon vs its frequency
temp <- max_aon_by_street %>% group_by(max_aon) %>% summarise(frequency = n())
representative_max_aons <- weighted.mean(temp$max_aon, temp$frequency)
# ... and display
representative_max_aons
@giacecco
giacecco / 1.R
Last active August 29, 2015 14:25
require(dplyr)
require(RPostgreSQL)
target_town <- "BERKHAMSTED"
# read from Land Registry's Price Paid (LRPP) all addresses whose street names are not NULL and
# PAON or SAON are numeric; if both are, take the PAON only
# the DISTINCT below is important, otherwise down this script the AONs of properties that have
# been sold many times will weight more than the others
lr_pp <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT street, town, aon FROM ((SELECT street, town, CAST(SUBSTRING(paon, '^([0-9]+)') AS NUMERIC) AS aon FROM lr_pp WHERE street IS NOT NULL AND paon ~ '^[0-9]+') UNION (SELECT street, town, CAST(SUBSTRING(saon, '^([0-9]+)') AS NUMERIC) AS aon FROM lr_pp WHERE street IS NOT NULL AND paon !~ '^[0-9]+' AND saon ~ '^[0-9]+')) AS a"))
psql:fix_pdc.sql:34: ERROR: function fix_pdc() does not exist
LINE 1: SELECT fix_pdc();
^
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
DROP TABLE IF EXISTS output;
CREATE TABLE output (no_of_records INTEGER);
SELECT parsel(
'olaf',
'ons_pd',
'id',
'select count(a.id) as no_of_records;',
'output',
'a',
2);
DROP TABLE IF EXISTS pcd_fix;
CREATE TABLE pcd_fix AS
SELECT old_pcd, new_pcd, distance_meters, distance_degrees
FROM
(SELECT pcd AS old_pcd, geom
FROM ons_pd
WHERE doterm IS NOT NULL) AS a,
LATERAL (SELECT pcd AS new_pcd, ST_Distance_Sphere(a.geom, geom) AS distance_meters, ST_Distance(a.geom, geom) AS distance_degrees
SELECT COUNT(*)
FROM lr_9501_1504
WHERE pcd IN (SELECT pcd FROM temp_retired_postcodes);
SELECT COUNT(pcd) FROM ons_pd
WHERE doterm IS NOT NULL AND COUNTRY IN ('e', 'w');