Gianfranco Cecconi giacecco

## 1.R
require(dplyr)
require(RPostgreSQL)

target_town <- "BERKHAMSTED"

# I fetch all postcodes and populated_places from OS' Open Names
os_on_postcodes <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names WHERE local_type = 'Postcode' AND populated_place != ''"))

# I filter by target town
# TODO: what to do about those parts of town like Northchurch to Berkhamsted that appear as

## 4.R
# now, how many addresses and what % of the total can I get from LRPP over the estimated total?
lr_pp_with_house_names <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT street, town, paon, saon FROM lr_pp WHERE street IS NOT NULL"))
no_addresses_I_know_from_lrpp <- collect(lr_pp_with_house_names %>% filter(town == target_town) %>% summarise(no_of_addresses = n()))$no_of_addresses
perc_I_know_from_lrpp = no_addresses_I_know_from_lrpp / total_addresses_in_town
# and display what I am missing
total_addresses_in_town - no_addresses_I_know_from_lrpp
1 - perc_I_know_from_lrpp

## 3.R
# read Ordnance Survey's Open Names to get all streets belonging to a 'populated place'
os_on <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names_wog WHERE populated_place != '' AND local_type IN ('Section Of Named Road', 'Named Road', 'Numbered Road', 'Section Of Numbered Road')"))

# filter by target town
target_town_streets <- os_on %>% filter(toupper(populated_place) == toupper(target_town))

# total estimate of no. of streets in the target town
no_of_streets <- collect(target_town_streets %>% summarise(tot = n()))$tot

# display the total estimate of no. of addresses in the target town

## 2.R
# calculated a weighted average of the max aon vs its frequency
temp <- max_aon_by_street %>% group_by(max_aon) %>% summarise(frequency = n())
representative_max_aons <- weighted.mean(temp$max_aon, temp$frequency)
# ... and display
representative_max_aons

## 1.R
require(dplyr)
require(RPostgreSQL)

target_town <- "BERKHAMSTED"

# read from Land Registry's Price Paid (LRPP) all addresses whose street names are not NULL and
# PAON or SAON are numeric; if both are, take the PAON only
# the DISTINCT below is important, otherwise down this script the AONs of properties that have
# been sold many times will weight more than the others
lr_pp <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT street, town, aon FROM ((SELECT street, town, CAST(SUBSTRING(paon, '^([0-9]+)') AS NUMERIC) AS aon FROM lr_pp WHERE street IS NOT NULL AND paon ~ '^[0-9]+') UNION (SELECT street, town, CAST(SUBSTRING(saon, '^([0-9]+)') AS NUMERIC) AS aon FROM lr_pp WHERE street IS NOT NULL AND paon !~ '^[0-9]+' AND saon ~ '^[0-9]+')) AS a"))

## error message
psql:fix_pdc.sql:34: ERROR:  function fix_pdc() does not exist
LINE 1: SELECT fix_pdc();
               ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.

## gist:fbca4b24274696ea054b
DROP TABLE IF EXISTS output;
CREATE TABLE output (no_of_records INTEGER);
SELECT parsel(
	'olaf',
	'ons_pd',
	'id',
	'select count(a.id) as no_of_records;',
	'output',
	'a',
	2);

## gist:05d1917f386ee8965b75
DROP TABLE IF EXISTS pcd_fix;
CREATE TABLE pcd_fix AS
	SELECT old_pcd, new_pcd, distance_meters, distance_degrees
	FROM

		(SELECT pcd AS old_pcd, geom
		FROM ons_pd
		WHERE doterm IS NOT NULL) AS a,

		LATERAL (SELECT pcd AS new_pcd, ST_Distance_Sphere(a.geom, geom) AS distance_meters, ST_Distance(a.geom, geom) AS distance_degrees

## gist:56379f5aa4e5e374c629
SELECT COUNT(*)
FROM lr_9501_1504
WHERE pcd IN (SELECT pcd FROM temp_retired_postcodes);

## gist:f2fcec35ff310afde93f
SELECT COUNT(pcd) FROM ons_pd
WHERE doterm IS NOT NULL AND COUNTRY IN ('e', 'w');
	require(dplyr)
	require(RPostgreSQL)

	target_town <- "BERKHAMSTED"

	# I fetch all postcodes and populated_places from OS' Open Names
	os_on_postcodes <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names WHERE local_type = 'Postcode' AND populated_place != ''"))

	# I filter by target town
	# TODO: what to do about those parts of town like Northchurch to Berkhamsted that appear as
	# now, how many addresses and what % of the total can I get from LRPP over the estimated total?
	lr_pp_with_house_names <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT street, town, paon, saon FROM lr_pp WHERE street IS NOT NULL"))
	no_addresses_I_know_from_lrpp <- collect(lr_pp_with_house_names %>% filter(town == target_town) %>% summarise(no_of_addresses = n()))$no_of_addresses
	perc_I_know_from_lrpp = no_addresses_I_know_from_lrpp / total_addresses_in_town
	# and display what I am missing
	total_addresses_in_town - no_addresses_I_know_from_lrpp
	1 - perc_I_know_from_lrpp
	# read Ordnance Survey's Open Names to get all streets belonging to a 'populated place'
	os_on <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names_wog WHERE populated_place != '' AND local_type IN ('Section Of Named Road', 'Named Road', 'Numbered Road', 'Section Of Numbered Road')"))

	# filter by target town
	target_town_streets <- os_on %>% filter(toupper(populated_place) == toupper(target_town))

	# total estimate of no. of streets in the target town
	no_of_streets <- collect(target_town_streets %>% summarise(tot = n()))$tot

	# display the total estimate of no. of addresses in the target town
	# calculated a weighted average of the max aon vs its frequency
	temp <- max_aon_by_street %>% group_by(max_aon) %>% summarise(frequency = n())
	representative_max_aons <- weighted.mean(temp$max_aon, temp$frequency)
	# ... and display
	representative_max_aons
	psql:fix_pdc.sql:34: ERROR: function fix_pdc() does not exist
	LINE 1: SELECT fix_pdc();
	^
	HINT: No function matches the given name and argument types. You might need to add explicit type casts.
	DROP TABLE IF EXISTS output;
	CREATE TABLE output (no_of_records INTEGER);
	SELECT parsel(
	'olaf',
	'ons_pd',
	'id',
	'select count(a.id) as no_of_records;',
	'output',
	'a',
	2);
	DROP TABLE IF EXISTS pcd_fix;
	CREATE TABLE pcd_fix AS
	SELECT old_pcd, new_pcd, distance_meters, distance_degrees
	FROM

	(SELECT pcd AS old_pcd, geom
	FROM ons_pd
	WHERE doterm IS NOT NULL) AS a,

	LATERAL (SELECT pcd AS new_pcd, ST_Distance_Sphere(a.geom, geom) AS distance_meters, ST_Distance(a.geom, geom) AS distance_degrees
	SELECT COUNT(*)
	FROM lr_9501_1504
	WHERE pcd IN (SELECT pcd FROM temp_retired_postcodes);
	SELECT COUNT(pcd) FROM ons_pd
	WHERE doterm IS NOT NULL AND COUNTRY IN ('e', 'w');