This set of instructions goes with this presentation deck.
- Tobler's First Law of Geography
- 2016 Precinct Level Presidential Election Results
- Canada Geographic Information System
- Learn about early GIS by watching "Data for Decision"
- geojson.io
yum install postgis25_11
https://wiki.ubuntu.com/UbuntuGIS
sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt-get install postgis
Set up our working database for this talk:
CREATE DATABASE pgconf;
\c pgconf
CREATE EXTENSION postgis;
CREATE LANGUAGE plpythonu;
Create example table:
CREATE TABLE customers (
name TEXT,
address TEXT,
geom GEOMETRY(POINT, 4326)
);
INSERT INTO customers (name, address) VALUES
('Paul', '144 Simcoe Street, Victoria, BC, Canada'),
('Hyatt', '655 Burrard St, Vancouver, BC'),
('Parc 55', '55 Cyril Magnin St, San Francisco, CA, USA');
Create example geocoding function:
CREATE OR REPLACE FUNCTION geocode(address text)
RETURNS text AS
$$
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="plpythonu")
location = geolocator.geocode(address)
wkt = 'SRID=4326;POINT(%g %g)' % ((location.longitude, location.latitude))
return wkt
$$
LANGUAGE plpythonu VOLATILE
COST 1000;
Try it out:
SELECT ST_AsGeoJson(geocode('144 Simcoe Street, Victoria, BC'));
SELECT geocode(address), address
FROM customers
WHERE name = 'Paul';
UPDATE customers
SET geom = geocode(address);
Language abstractions over geocoding APIs:
- Original data file: https://github.com/mmcloughlin/starbucks
- Converted to CSV: http://s3.cleverelephant.ca/starbucks.csv.gz
Create a table suitable to hold the data
DROP TABLE IF EXISTS starbucks CASCADE;
CREATE TABLE starbucks (
store_id integer PRIMARY KEY,
city text,
name text,
country text,
lon float8,
lat float8,
geom geometry(Point, 4326)
);
Pipe the data directly from opendata.socrata.com to the COPY command:
-- Input data use mm/dd/yyyy format for dates
SET DATESTYLE = US;
-- Input data are comma delimited
COPY starbucks (city, name, country, lon, lat, store_id)
FROM PROGRAM
'curl http://s3.cleverelephant.ca/starbucks.csv.gz | gunzip'
WITH (
FORMAT csv,
HEADER true
);
-- Add empty geometry column
ALTER TABLE starbucks
ADD COLUMN geom geometry(point, 4326);
UPDATE starbucks
SET geom = ST_SetSRID(ST_MakePoint(lon, lat), 4326);
CREATE INDEX starbucks_geom_idx
ON starbucks USING GIST (geom);
- Natural Earth 10m-admin-1-states-provinces
- Natural Earth 10m-populated-places
Unzip the file and load using shp2pgsql
:
shp2pgsql -s 4326 \
-I -D \
ne_10m_admin_1_states_provinces \
admin1 \
| psql pgconf
Unzip the file and load using ogr2ogr
:
ogr2ogr \
-f "PostgreSQL" \
-nlt POINT \
-nln popplaces \
-lco GEOMETRY_NAME=geom \
PG:"dbname=pgconf host=localhost" \
ne_10m_populated_places.shp
Who has the most Starbucks?
SELECT a.name, count(*)
FROM admin1 a
JOIN starbucks s
ON ST_Intersects(a.geom, s.geom)
GROUP BY a.name
ORDER BY count DESC
LIMIT 10
- Original data from: https://data2.nhgis.org/
- https://qgis.org
- https://github.com/NYCPlanning/labs-postgis-preview
- https://github.com/lukasmartinelli/postgis-editor
-
PostGIS formats
-
Client side web maps
-
Server side renderers
SELECT a.*,
ST_Distance(a.geom, b.geom)
AS distance
FROM starbucks a, starbucks b
WHERE b.name = 'Pike Place'
WITH pts AS (
SELECT 'POINT(-122.3421 47.6101)' AS p,
'POINT( 139.7609 35.7381)' AS q
)
SELECT ST_Distance(p, q) AS d_geom,
ST_Distance(
p::geography,
q::geography) AS d_geog
FROM pts;
ALTER TABLE starbucks
ADD COLUMN geog Geography(Point, 4326);
UPDATE starbucks
SET geog = geom::geography;
CREATE INDEX starbucks_geog_x
ON starbucks USING GIST (geog);
SELECT a.*,
ST_Distance(a.geog, b.geog)
AS distance
FROM starbucks a, starbucks b
WHERE b.name = 'Pike Place'
WITH pt AS (
SELECT ST_SetSRID(
ST_MakePoint(-122.3421, 47.6101),
4326) AS pt
)
SELECT name,
ST_Distance(pt, geog)
FROM starbucks, pt
ORDER BY geog <-> pt
LIMIT 5
WITH pt AS (
SELECT ST_SetSRID(
ST_MakePoint(-122.3421, 47.6101),
4326) AS pt
)
SELECT name,
ST_Distance(pt, geog)
FROM starbucks, pt
WHERE ST_DWithin(geog, pt, 1000);
For displaying a circle on the starbucks map, we generate the circle around Pike Place:
WITH pt AS (
SELECT ST_SetSRID(
ST_MakePoint(-122.3421, 47.6101),
4326) AS pt
)
SELECT ST_Buffer(pt::geography, 1000)
FROM pt
CREATE TABLE us_tracts_income (
geom geometry(MultiPolygon,4326),
gisjoin text,
name10 character varying(7),
households integer,
households_over_100k integer,
households_over_150k integer
);
COPY us_tracts_income (geom, gisjoin, name10, households, households_over_100k, households_over_150k)
FROM PROGRAM
'curl http://s3.cleverelephant.ca/us_tracts_income.csv.bz2 | bunzip2'
WITH (
FORMAT csv,
HEADER false
);
CREATE INDEX us_tracts_income_geom_x ON us_tracts_income USING GIST (geom);
SELECT
100.0 * Sum(households_over_100k) /
Sum(households)
AS pct_over_100k
FROM us_tracts_income;
WITH starbucks_tracts AS (
SELECT DISTINCT ON (gisjoin) t.*
FROM us_tracts_income t
JOIN starbucks s
ON ST_Intersects(s.geom, t.geom)
)
SELECT 100.0 * Sum(households_over_100k) /
Sum(households) AS pct_over_100k
FROM starbucks_tracts;
WITH starbucks_multi AS (
SELECT a.store_id, a.geom, count(*)
FROM starbucks a
JOIN starbucks b
ON ST_DWithin(a.geog, b.geog, 500)
WHERE a.country = 'US' AND b.country = 'US'
GROUP BY a.store_id, a.geom
),
starbucks_tracts AS (
SELECT DISTINCT ON (gisjoin) t.*
FROM us_tracts_income t
JOIN starbucks_multi s
ON ST_Intersects(s.geom, t.geom)
)
SELECT 100.0 * Sum(households_over_100k) /
Sum(households) AS pct_over_100k
FROM starbucks_tracts;
CREATE TABLE admin0 AS
SELECT
ST_Union(geom) AS geom,
admin AS name
FROM admin1
GROUP BY 2;
CREATE TABLE admin0_borders AS
SELECT
ST_Intersection(a.geom, b.geom)
FROM admin0 a
JOIN admin0 b
ON ST_Intersects(a.geom, b.geom)
WHERE a.name > b.name;
CREATE TABLE starbucks_clustered AS
SELECT s.*, ST_ClusterDBScan(ST_Transform(geom,2163), 1, 100) OVER ()
FROM starbucks
WHERE s.country = 'US';
CREATE TABLE starbucks_clustered AS
SELECT s.*, ST_ClusterKMeans(ST_Transform(geom,2163), 5) OVER ()
FROM starbucks
WHERE s.country = 'US';
CREATE TABLE us_tracts_demographic AS
SELECT
100.0*u.households_over_100k/u.households AS pct_over_100k,
u.gisjoin, p.name,
st_distance(p.geom::geography, u.geom::geography) AS distance
FROM us_tracts_income u
CROSS JOIN LATERAL (
SELECT p.* from popplaces p where p.pop_max > 200000
ORDER BY p.geom <-> u.geomlimit 1) AS p
WHERE u.households > 0;
CREATE TABLE us_tracts_demographic_clusters AS
WITH clusters AS (
SELECT ST_clusterkmeans(ST_MakePoint(prop_over_100k, distance), 5) OVER () AS cluster, gisjoin
FROM us_tracts_demographic
)
SELECT u.geom, c.cluster
FROM us_tracts u
JOIN clusters c USING (gisjoin);
CREATE TABLE peru AS
SELECT *
FROM admin0
WHERE name = 'Peru'
CREATE TABLE peru_pts AS
SELECT (ST_Dump(ST_GeneratePoints(geom, 2000))).geom AS geom
FROM peru
WHERE name = 'Peru'
CREATE TABLE peru_pts_clustered AS
SELECT geom, ST_ClusterKMmeans(geom, 10) over () AS cluster
FROM peru_pts;
CREATE TABLE peru_centers AS
SELECT cluster, ST_Centroid(ST_collect(geom)) AS geom
FROM peru_pts_clustered
GROUP BY cluster;
CREATE TABLE peru_voronoi AS
SELECT (ST_Dump(ST_VoronoiPolygons(ST_collect(geom)))).geom AS geom
FROM peru_centers;
CREATE TABLE peru_divided AS
SELECT ST_Intersection(a.geom, b.geom) AS geom
FROM peru a
CROSS JOIN peru_voronoi b;