Skip to content

Instantly share code, notes, and snippets.

@timstallmann
Created July 3, 2018 14:22
Show Gist options
  • Save timstallmann/e6b3c360fd89122b9dc26c13c841b7e3 to your computer and use it in GitHub Desktop.
Save timstallmann/e6b3c360fd89122b9dc26c13c841b7e3 to your computer and use it in GitHub Desktop.
DataWorksNC: PostGIS Proximity Analysis Examples
-- Create some indices in state plane on the census geometry
CREATE INDEX idx_bg_geom_state_plane on geom.blockgroup USING gist (ST_Transform(geom, 102719));
CREATE INDEX idx_tract_geom_state_plane on geom.tract USING gist (ST_Transform(geom, 102719));
-- Import data from staging into final tables.
-- Create table of dwelling units per blockgroup / per tract
CREATE MATERIALIZED VIEW nbhd_correlates.dwelling_units__blockgroup__2017 AS
SELECT bg.gid as gid, bg.geom as geom, bg.fips as fips, sum(du.sum_du) as dwelling_units FROM geom.blockgroup bg JOIN
staging.parcels_2018_dwellingunits du ON st_intersects(st_transform(bg.geom, 102719), du.geom)
group by bg.gid, bg.fips, bg.geom;
-- Basic format of the query
SELECT bg.geom as geom, bg.fips as fips, sum(du.sum_du) as r_prox_clinic FROM geom.blockgroup bg JOIN
(SELECT DISTINCT ON (parcels.gid) parcels.gid, parcels.sum_du as sum_du, parcels.geom as geom FROM staging.parcels_2018_dwellingunits parcels JOIN staging.clinics_532018 clinics
ON st_dwithin(parcels.geom, clinics.geom, 1320)) du
ON ST_Intersects(ST_Transform(bg.geom,102719), du.geom) group by bg.fips, bg.geom;
-- Alternate form which is more easily reworked as function
SELECT bg.geom as geom, bg.fips as fips, (SELECT sum(du.sum_du) FROM (
SELECT DISTINCT ON (parcels.gid) parcels.gid, parcels.sum_du as sum_du, parcels.geom as geom FROM staging.parcels_2018_dwellingunits parcels, staging.pharmacies_2018 pharmacies
WHERE ST_Intersects(ST_Transform(bg.geom,102719), parcels.geom) AND ST_Intersects(ST_Transform(bg.geom,102719), st_buffer(pharmacies.geom, 1320)) and st_dwithin(parcels.geom, pharmacies.geom, 1320)) du) as prox_pharmacy FROM geom.blockgroup bg;
-- Copy dwelling units into their own table
DROP TABLE IF EXISTS geom.parcels;
CREATE TABLE geom.parcels (
gid SERIAL PRIMARY KEY,
geom geometry(geometry, 102719),
data_year integer,
parcel_id varchar(25),
pin varchar(20),
dwelling_units bigint
);
INSERT INTO geom.parcels(geom, data_year, parcel_id, pin, dwelling_units)
SELECT geom, 2018, parcel_id, pin, sum_du FROM staging.parcels_2018_dwellingunits;
CREATE INDEX idx_parcels_geom on geom.parcels USING gist(geom);
COMMENT ON TABLE geom.parcels IS 'Durham parcels dataset. Does not contain full attribute information, that information will be stored elsewhere.';
COMMENT ON COLUMN geom.parcels.dwelling_units IS 'Number of dwelling units on this parcel.';
-- Functional version of query which can be reused
CREATE OR REPLACE FUNCTION nbhd_correlates.dwelling_units__proximity_count(poly geometry, points_tbl regclass, dist numeric, year bigint DEFAULT 2018, OUT result numeric)
RETURNS numeric AS $$
BEGIN
EXECUTE 'SELECT sum(du.dwelling_units) FROM ' ||
'(SELECT DISTINCT ON (parcels.gid) parcels.gid, parcels.dwelling_units as dwelling_units, parcels.geom as geom ' ||
'FROM geom.parcels parcels, ' ||
points_tbl || ' points WHERE points.data_year=$3 AND parcels.data_year=$3 ' ||
'AND ST_Intersects(ST_Transform($1, 102719), parcels.geom) ' ||
'AND ST_Intersects(ST_Transform($1, 102719), ST_Buffer(points.geom, $2)) ' ||
'AND st_dwithin(parcels.geom, points.geom, $2)) du ' ||
''
INTO result USING poly, dist, year;
end;
$$ LANGUAGE plpgsql;
-- Functional version of query which gives total number of dwelling units
CREATE OR REPLACE FUNCTION nbhd_correlates.dwelling_units__count(poly geometry, year bigint DEFAULT 2018, OUT result numeric)
RETURNS numeric AS $$
BEGIN
EXECUTE 'SELECT sum(du.dwelling_units) FROM ' ||
'(SELECT * ' ||
'FROM geom.parcels parcels ' ||
'WHERE ST_Intersects(ST_Transform($1, 102719), parcels.geom) ' ||
'AND parcels.data_year=$2 ) du '
INTO result USING poly, year;
end;
$$ LANGUAGE plpgsql;
CREATE SCHEMA point_features;
GRANT ALL ON SCHEMA point_features TO staff;
GRANT USAGE ON SCHEMA point_features TO public;
GRANT ALL ON ALL TABLES IN SCHEMA point_features TO staff;
GRANT SELECT on ALL TABLES in SCHEMA point_features to public;
ALTER DEFAULT PRIVILEGES IN SCHEMA point_features GRANT ALL ON TABLES TO staff;
ALTER DEFAULT PRIVILEGES IN SCHEMA point_features GRANT SELECT ON TABLES TO public;
ALTER DEFAULT PRIVILEGES FOR ROLE tim IN SCHEMA point_features GRANT SELECT ON TABLES TO PUBLIC;
ALTER DEFAULT PRIVILEGES FOR ROLE john IN SCHEMA point_features GRANT SELECT ON TABLES TO PUBLIC;
-- Add point features from John's data
DROP TABLE IF EXISTS point_features.clinics;
CREATE TABLE point_features.clinics (
gid SERIAL PRIMARY KEY,
geom geometry(geometry, 102719),
data_year integer,
name varchar(254),
type varchar(254),
street_address varchar(254),
street_address_2 varchar(254),
city varchar(254),
state varchar(254),
zip bigint,
phone varchar(254),
affiliation varchar(254)
);
INSERT INTO point_features.clinics(geom, data_year, name, type, street_address, street_address_2, city, state, zip, phone, affiliation)
SELECT
geom, '2018', name, type_, streetadd, office, city, state, zip, phone, affiliatio
FROM staging.clinics_532018;
CREATE INDEX idx_clinics_geom ON point_features.clinics USING gist(geom);
COMMENT ON TABLE point_features.clinics IS 'Locations of clinics and urgent care facilities in Durham County, as provided by ';
COMMENT ON COLUMN point_features.clinics.type IS 'Health system which this facility is associated with, or Independent for unaffiliated clinics.';
DROP TABLE IF EXISTS point_features.pharmacies;
CREATE TABLE point_features.pharmacies (
gid SERIAL PRIMARY KEY,
geom geometry(geometry, 102719),
data_year integer,
name varchar(254),
street_address varchar(254),
city varchar(254),
state varchar(2),
zip bigint,
contact_name varchar(254),
contact_phone varchar(15),
contact_title varchar(254),
notes varchar(254),
naics_code numeric,
naics_description varchar(254),
sic_code numeric,
sic_description varchar(254)
);
INSERT INTO point_features.pharmacies(geom, data_year, name, street_address, city, state, zip, contact_name, contact_phone, contact_title, notes, naics_code, naics_description, sic_code, sic_description)
SELECT geom, 2018, coname as name, coaddr_1 as street_address, cocity_1 as city, 'NC', cozip_1 as zip, contact_na as contact_name, contact_ph as contact_phone, title_desc as contact_title, notes1 as notes, naics_code, naics_desc, sic_code, sic_desc
FROM staging.pharmacies_2018;
CREATE INDEX idx_pharmacies_geom ON point_features.pharmacies USING gist(geom);
COMMENT ON TABLE point_features.pharmacies IS 'Location of pharmacies in Durham County, as provided by ____';
DROP TABLE IF EXISTS point_features.bus_stops;
CREATE TABLE point_features.bus_stops (
gid SERIAL PRIMARY KEY,
geom geometry(geometry, 102719),
data_year integer,
name varchar(254),
onstreet varchar(254),
atstreet varchar(254),
city varchar(254),
state varchar(10),
zip varchar(15),
system varchar(100),
bus_line_id bigint,
bus_line_number varchar(25),
bus_line_name varchar(254),
has_bench boolean,
has_shelter boolean,
has_lighting boolean,
has_garbage_can boolean,
has_telephone boolean,
has_bicycle_rack boolean
);
INSERT INTO point_features.bus_stops(geom, data_year, name, onstreet, atstreet, city, state, zip, system, bus_line_id, bus_line_number, bus_line_name, has_bench, has_shelter, has_lighting, has_garbage_can, has_telephone, has_bicycle_rack)
SELECT geom, 2018, stopname as name, onstreet, atstreet, city, state, zipcode as zip, 'GoDurham'::varchar(100) as system, lineid as bus_line_id, lineabbr as bus_line_number, linename as bus_line_name,
bench::boolean as has_bench, shelter::boolean as has_shelter, lighting::boolean as has_lighting, garbage::boolean as has_garbage_can,
telephone::boolean as has_telephone, bicycle::boolean as has_bicycle_rack
FROM staging.godurham_stops20180201;
CREATE INDEX idx_bus_stops_geom ON point_features.bus_stops USING gist(geom);
INSERT INTO point_features.bus_stops(data_year, geom, name, onstreet, atstreet, city, state, zip, system, bus_line_id, bus_line_number, bus_line_name, has_bench, has_shelter, has_lighting, has_garbage_can, has_telephone, has_bicycle_rack)
SELECT 2018, geom, stopname, onstreet, atstreet, city, state, zipcode, 'GoTriangle', lineid, lineabbr, linename, bench::boolean, shelter::boolean, lighting::boolean, garbage::boolean, telephone::boolean, bicycle::boolean FROM staging.gotriangle_stops20180201;
-- Add function to substitute zero for null values
CREATE OR REPLACE FUNCTION numval_or_zero(v_input numeric)
RETURNS numeric AS $$
BEGIN
IF v_input IS NULL THEN
RETURN 0;
ELSE
RETURN v_input;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
DROP MATERIALIZED VIEW IF EXISTS nbhd_correlates.healthcare_access__tract__2018;
CREATE MATERIALIZED VIEW nbhd_correlates.healthcare_access__tract__2018 AS
SELECT gid, geom, fips, numval_or_zero(clinic_proximity)/nullif(du,0) as Clinic_Proximity, numval_or_zero(bus_stop_proximity)/nullif(du,0) as Bus_Stop_Proximity, numval_or_zero(pharmacy_proximity)/nullif(du, 0) AS Pharmacy_Proximity FROM
(SELECT gid, geom, fips,
nbhd_correlates.dwelling_units__count(geom) as du,
nbhd_correlates.dwelling_units__proximity_count(geom, 'point_features.clinics', 1320) as clinic_proximity,
nbhd_correlates.dwelling_units__proximity_count(geom, 'point_features.bus_stops', 1320) as bus_stop_proximity,
nbhd_correlates.dwelling_units__proximity_count(geom, 'point_features.pharmacies', 1320) as pharmacy_proximity
FROM geom.tract) AS c;
DROP TABLE IF EXISTS point_features.epa_registered_air_emitters;
CREATE TABLE point_features.epa_registered_air_emitters (
gid SERIAL PRIMARY KEY,
geom geometry(geometry, 102719),
data_year integer,
frs_id numeric,
name varchar(254),
street_address varchar(254),
city varchar(254),
state varchar(2),
zip varchar(15),
site_type varchar(50),
epa_programs varchar(254),
epa_interests varchar(254),
naics_codes varchar(254),
naics_descriptions varchar(254),
sic_codes varchar(254),
sic_descriptions varchar(254),
date_created date,
date_updated date
);
INSERT INTO point_features.epa_registered_air_emitters(geom, data_year, frs_id, name, street_address, city, state, zip, site_type, epa_programs, epa_interests, naics_codes, naics_descriptions, sic_codes, sic_descriptions, date_created, date_updated)
SELECT geom, 2018, registry_i, primary_na, location_a, city_name, state_code, postal_cod, site_type_, pgm_sys_ac, interest_t, naics_code, naics_co_1, sic_codes, sic_code_d, create_dat, update_dat
FROM staging.durhamcounty_air_epa_2018 WHERE site_type_ <> 'MONITORING STATION';
CREATE INDEX idx_epa_registered_air_emitters_geom ON point_features.epa_registered_air_emitters USING gist(geom);
COMMENT ON TABLE point_features.epa_registered_air_emitters IS 'FRS-registered facilities reporting air emissions according to EPA dataset';
COMMENT ON COLUMN point_features.epa_registered_air_emitters.date_created IS 'created_date from EPA source data';
COMMENT ON COLUMN point_features.epa_registered_air_emitters.date_updated IS 'updated_date from EPA source data';
DROP MATERIALIZED VIEW IF EXISTS nbhd_correlates.environmental_stress__tract__2018;
CREATE MATERIALIZED VIEW nbhd_correlates.environmental_stress__tract__2018 AS
SELECT gid, geom, fips, numval_or_zero(air_proximity)/nullif(du,0) as Air_Polluter_Proximity FROM
(SELECT gid, geom, fips,
nbhd_correlates.dwelling_units__count(geom) as du,
nbhd_correlates.dwelling_units__proximity_count(geom, 'point_features.epa_registered_air_emitters', 2640) as air_proximity
FROM geom.tract) AS c;
CREATE VIEW staging.air_polluter_proximity AS SELECT gid, geom, fips,
nbhd_correlates.dwelling_units__count(geom) as du,
nbhd_correlates.dwelling_units__proximity_count(geom, 'point_features.epa_registered_air_emitters', 2640) as air_proximity
FROM geom.tract;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment