Skip to content

Instantly share code, notes, and snippets.

@pramsey
Last active June 16, 2020 06:03
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pramsey/f2493c045c22964b10c1e17d3a652ddb to your computer and use it in GitHub Desktop.
Save pramsey/f2493c045c22964b10c1e17d3a652ddb to your computer and use it in GitHub Desktop.
PGConf.eu Keynote Notes

Spatial SQL and PostGIS

This set of instructions goes with this presentation deck.

Installing PostGIS

Community Source Code

https://postgis.net/

PGDG Yum Repository

https://yum.postgresql.org/

yum install postgis25_11

Ubuntu GIS

https://wiki.ubuntu.com/UbuntuGIS

sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt-get install postgis

GeoCoding

Set up our working database for this talk:

CREATE DATABASE pgconf;
\c pgconf
CREATE EXTENSION postgis;
CREATE LANGUAGE plpythonu;

Create example table:

CREATE TABLE customers (
  name TEXT,
  address TEXT,
  geom GEOMETRY(POINT, 4326)
);

INSERT INTO customers (name, address) VALUES 
  ('Paul', '144 Simcoe Street, Victoria, BC, Canada'),
  ('Hyatt', '655 Burrard St, Vancouver, BC'),
  ('Parc 55', '55 Cyril Magnin St, San Francisco, CA, USA');

Create example geocoding function:

CREATE OR REPLACE FUNCTION geocode(address text)
RETURNS text AS
$$
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent="plpythonu")
    location = geolocator.geocode(address)
    wkt = 'SRID=4326;POINT(%g %g)' % ((location.longitude, location.latitude))
    return wkt 
$$
LANGUAGE plpythonu VOLATILE
COST 1000;

Try it out:

SELECT ST_AsGeoJson(geocode('144 Simcoe Street, Victoria, BC'));
  
SELECT geocode(address), address 
FROM customers
WHERE name = 'Paul';

UPDATE customers 
SET geom = geocode(address);

Language abstractions over geocoding APIs:

Loading GIS Data

Starbucks Global Locations

Create a table suitable to hold the data

DROP TABLE IF EXISTS starbucks CASCADE;
CREATE TABLE starbucks (
  store_id  integer PRIMARY KEY,
  city      text,
  name      text,
  country   text,
  lon       float8,
  lat       float8,
  geom      geometry(Point, 4326)
  );

Pipe the data directly from opendata.socrata.com to the COPY command:

-- Input data use mm/dd/yyyy format for dates
SET DATESTYLE = US;             

-- Input data are comma delimited
COPY starbucks (city, name, country, lon, lat, store_id) 
FROM PROGRAM 
  'curl http://s3.cleverelephant.ca/starbucks.csv.gz | gunzip'
WITH (
  FORMAT csv,
  HEADER true
);
                               
-- Add empty geometry column                                                          
ALTER TABLE starbucks 
  ADD COLUMN geom geometry(point, 4326);

UPDATE starbucks 
  SET geom = ST_SetSRID(ST_MakePoint(lon, lat), 4326);

CREATE INDEX starbucks_geom_idx 
  ON starbucks USING GIST (geom);

Natural Earth Populated Places and Admin

Load admin1 with shp2pgsql

Unzip the file and load using shp2pgsql:

shp2pgsql -s 4326 \
  -I -D \
  ne_10m_admin_1_states_provinces \
  admin1 \
  | psql pgconf

Load popplaces using ogr2ogr

Unzip the file and load using ogr2ogr:

ogr2ogr \
  -f "PostgreSQL" \
  -nlt POINT \
  -nln popplaces \
  -lco GEOMETRY_NAME=geom \
  PG:"dbname=pgconf host=localhost" \
  ne_10m_populated_places.shp

Simple Query

Who has the most Starbucks?

SELECT a.name, count(*)
FROM admin1 a
JOIN starbucks s
  ON ST_Intersects(a.geom, s.geom)
GROUP BY a.name
ORDER BY count DESC
LIMIT 10

Census Data

Viewing GIS Data

Viewers

Architectures

Analyzing GIS Data

Simple Spatial Queries

Distance of all Starbucks locations from Pike Place Public Market

SELECT a.*, 
  ST_Distance(a.geom, b.geom) 
    AS distance 
FROM starbucks a, starbucks b 
WHERE b.name = 'Pike Place'

Compare geometry and geography distance calculations

WITH pts AS (
  SELECT 'POINT(-122.3421 47.6101)' AS p,
         'POINT( 139.7609 35.7381)' AS q
) 
SELECT ST_Distance(p, q) AS d_geom,
       ST_Distance(
         p::geography, 
         q::geography) AS d_geog
FROM pts;

Add geography column to table

ALTER TABLE starbucks
  ADD COLUMN geog Geography(Point, 4326); 

UPDATE starbucks
  SET geog = geom::geography;

CREATE INDEX starbucks_geog_x
  ON starbucks USING GIST (geog);

Calculate distances with geography

SELECT a.*, 
  ST_Distance(a.geog, b.geog) 
    AS distance 
FROM starbucks a, starbucks b 
WHERE b.name = 'Pike Place'

Find nearest Starbucks query using geography

WITH pt AS (
  SELECT ST_SetSRID(
    ST_MakePoint(-122.3421, 47.6101), 
    4326) AS pt
)
SELECT name, 
       ST_Distance(pt, geog) 
FROM starbucks, pt
ORDER BY geog <-> pt
LIMIT 5

Find Starbucks within a radius

WITH pt AS (
  SELECT ST_SetSRID(
    ST_MakePoint(-122.3421, 47.6101), 
    4326) AS pt
)
SELECT name, 
       ST_Distance(pt, geog) 
FROM starbucks, pt
WHERE ST_DWithin(geog, pt, 1000);

Generate a buffer circle

For displaying a circle on the starbucks map, we generate the circle around Pike Place:

WITH pt AS (
  SELECT ST_SetSRID(
    ST_MakePoint(-122.3421, 47.6101), 
    4326) AS pt
)
SELECT ST_Buffer(pt::geography, 1000)
FROM pt

Sociodemographic Queries

Load Data

CREATE TABLE us_tracts_income (
    geom geometry(MultiPolygon,4326),
    gisjoin text,
    name10 character varying(7),
    households integer,
    households_over_100k integer,
    households_over_150k integer
);

COPY us_tracts_income (geom, gisjoin, name10, households, households_over_100k, households_over_150k) 
FROM PROGRAM 
  'curl http://s3.cleverelephant.ca/us_tracts_income.csv.bz2 | bunzip2'
WITH (
  FORMAT csv,
  HEADER false
);

CREATE INDEX us_tracts_income_geom_x ON us_tracts_income USING GIST (geom);

Nationwide Income Levels

SELECT 
  100.0 * Sum(households_over_100k) /
          Sum(households) 
  AS pct_over_100k 
FROM us_tracts_income;

Starbucks Income Levels

WITH starbucks_tracts AS (
  SELECT DISTINCT ON (gisjoin) t.*
  FROM us_tracts_income t
  JOIN starbucks s
  ON ST_Intersects(s.geom, t.geom)
)
SELECT 100.0 * Sum(households_over_100k) /
               Sum(households) AS pct_over_100k
FROM starbucks_tracts;

Multi-Starbucks Income Levels

WITH starbucks_multi AS (
  SELECT a.store_id, a.geom, count(*)
  FROM starbucks a
  JOIN starbucks b
    ON ST_DWithin(a.geog, b.geog, 500)
  WHERE a.country = 'US' AND b.country = 'US'
  GROUP BY a.store_id, a.geom
),
starbucks_tracts AS (
  SELECT DISTINCT ON (gisjoin) t.*
  FROM us_tracts_income t
  JOIN starbucks_multi s
  ON ST_Intersects(s.geom, t.geom)
)
SELECT 100.0 * Sum(households_over_100k) /
               Sum(households) AS pct_over_100k
FROM starbucks_tracts;

Group admin1 table to admin0

CREATE TABLE admin0 AS 
SELECT 
  ST_Union(geom) AS geom, 
  admin AS name
FROM admin1 
GROUP BY 2;

Generate International Border Lines

CREATE TABLE admin0_borders AS 
SELECT 
  ST_Intersection(a.geom, b.geom) 
FROM admin0 a 
JOIN admin0 b 
ON ST_Intersects(a.geom, b.geom) 
WHERE a.name > b.name;

Cluster Starbucks with DBScan

CREATE TABLE starbucks_clustered AS 
SELECT s.*, ST_ClusterDBScan(ST_Transform(geom,2163), 1, 100) OVER () 
FROM starbucks 
WHERE s.country = 'US';

Cluster Starbucks with DBScan

CREATE TABLE starbucks_clustered AS 
SELECT s.*, ST_ClusterKMeans(ST_Transform(geom,2163), 5) OVER () 
FROM starbucks 
WHERE s.country = 'US';

Demographic Clustering

CREATE TABLE us_tracts_demographic AS 
SELECT 
  100.0*u.households_over_100k/u.households AS pct_over_100k, 
  u.gisjoin, p.name,
  st_distance(p.geom::geography, u.geom::geography) AS distance 
FROM us_tracts_income u 
CROSS JOIN LATERAL (
  SELECT p.* from popplaces p where p.pop_max > 200000
  ORDER BY p.geom <-> u.geomlimit 1) AS p
WHERE u.households > 0;

CREATE TABLE us_tracts_demographic_clusters AS
WITH clusters AS (
  SELECT ST_clusterkmeans(ST_MakePoint(prop_over_100k, distance), 5) OVER () AS cluster, gisjoin
  FROM us_tracts_demographic
  )
SELECT u.geom, c.cluster
FROM us_tracts u 
JOIN clusters c USING (gisjoin);

Dividing a Polygon into Equal Parts

CREATE TABLE peru AS 
  SELECT *
  FROM admin0
  WHERE name = 'Peru'

CREATE TABLE peru_pts AS
  SELECT (ST_Dump(ST_GeneratePoints(geom, 2000))).geom AS geom
  FROM peru
  WHERE name = 'Peru'

CREATE TABLE peru_pts_clustered AS
  SELECT geom, ST_ClusterKMmeans(geom, 10) over () AS cluster
  FROM peru_pts;
  
CREATE TABLE peru_centers AS
  SELECT cluster, ST_Centroid(ST_collect(geom)) AS geom
  FROM peru_pts_clustered
  GROUP BY cluster;
  
CREATE TABLE peru_voronoi AS
  SELECT (ST_Dump(ST_VoronoiPolygons(ST_collect(geom)))).geom AS geom
  FROM peru_centers;    
  
CREATE TABLE peru_divided AS
  SELECT ST_Intersection(a.geom, b.geom) AS geom
  FROM peru a
  CROSS JOIN peru_voronoi b;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment