-
-
Save jeremybmerrill/0837d7d7eb6609c16b6c to your computer and use it in GitHub Desktop.
How to create the data used in my Amtrak Northeast Corridor curves story: http://www.nytimes.com/interactive/2015/06/24/upshot/amtrak-crash-was-on-one-of-curviest-stretches.html?_r=0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DBNAME="rail_lines2" # the name of the database you're using. you can name this whatever you want. | |
#### | |
# | |
# You need GNU make to run this; execute in a Terminal `make -f curviness.make`. | |
# Everything else should happen by itself, assuming you have postgresql and PostGIS installed. | |
# Installation instructions for those tools, for your platform (Mac, Windows, Linux) are available on the web. | |
# you also need `wget` installed, but you probably already do. | |
# | |
#### | |
all: curviest_segments_on_nec_with_overlapping_segments.csv overlapping_segments.csv nec_segments_shingled_tenths.geojson | |
echo "curviest_segments_on_nec_with_overlapping_segments.csv has the curviest segments, but many overlap. Deduping these with overlapping_segments.csv is left as an exercise for the reader" | |
dbtables/rail_lines: inputs/rail_lines/rail_lines.shp | dbtables | |
# load into the database the FRA's rail network shapefile. | |
# It's at about 1:100k resolution. Railroads are divided into segments, | |
# ranging in length from a few hundred feet to a few miles long. | |
# This division isn't meaningful, it's an artifact of the digitization | |
# process. They are divided into subdivisions and categorized by what | |
# type of traffic (Amtrak, commuter, other or freight only) passes over | |
# them. | |
psql -d $(DBNAME) -c "DROP TABLE IF EXISTS rail_lines" | |
shp2pgsql -s 4269 $< | psql -q -d $(DBNAME) | |
psql -d $(DBNAME) -c "alter table rail_lines add column curviness numeric;" | |
psql -d $(DBNAME) -c "update rail_lines set curviness = ST_Length(geom) / ST_Distance(ST_StartPoint(ST_GeometryN(geom, 1)), ST_EndPoint(ST_GeometryN(geom, 1))) where ST_Distance(ST_StartPoint(ST_GeometryN(geom, 1)), ST_EndPoint(ST_GeometryN(geom, 1))) > 0;" | |
# manually fixing segments that don't match: some of this will get covered by the long miscreant-fixing query below, but not all of it. | |
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'METROPOLITAN' where gid in (46364, 46195, 47123, 49474, 46245, 46246, 154869, 46243, 46195, 46465, 46365, 46378, 46195, 46182, 46193, 46196, 80539, 46707, 104180);" | |
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'MID-ATLANTIC' where gid in (103927, 137442, 148092 );" | |
# turns out there are two Metropolitan divisions -- they might change the name sometime soon, apparently. | |
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'METROPOLITAN-NEC' where subdiv = 'METROPOLITAN' and stateab in ('NY', 'NJ', 'PA');" | |
# some segments have a null subdivision. we fix that here. (it's not meaningful) | |
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'PHILLY-HARRISBURG-MAINLINE-JBM' where fraarcid in (236282, 236277, 159980,204776,204821,207719,207333,207335,207463,272633,272634,236366,236285,236324,236304,236305,236292,236300,236308,236309,236321,236361,236368,236370,236372,236373,236392,236393,236430,236415,236416,236424,236434,236435,236447,236457,236463,266913,252808,254946,264431,266938,266943,271141,272635,272636,272637,272638,273690,273691,274778,274779,274862,274863,274864);" | |
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'EMPIRE-JBM' where fraarcid in (259498, 210996, 210997, 211003, 210964, 277974,277975, 210978, 102703,102700, 102711,150905,61342,167438,210967,210980,211004,278677,277976,277977,279569,279571);" | |
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'NEW HAVEN LINE' where fraarcid in (278679,278680, 210995, 210999);" | |
# find all solitary nulls (surrounded by the same subdiv on both sides) | |
psql -d $(DBNAME) -c "$$FINDSOLITARYNULLS" | |
touch $@ | |
define FINDSOLITARYNULLS | |
update rail_lines this | |
set subdiv = nextarc.subdiv | |
from rail_lines nextarc, rail_lines prevarc | |
where ( (this.tofranode = nextarc.frfranode or this.tofranode = nextarc.tofranode) and | |
(this.frfranode = prevarc.frfranode or this.frfranode = prevarc.tofranode) | |
) and | |
this.subdiv is null and nextarc.subdiv is not null and prevarc.subdiv is not null and | |
nextarc.subdiv = prevarc.subdiv and | |
this.passngr like 'A%' and this.net = 'M' and | |
nextarc.passngr like 'A%' and nextarc.net = 'M' and | |
prevarc.passngr like 'A%' and prevarc.net = 'M'; | |
endef | |
export FINDSOLITARYNULLS | |
# joining the original segments into single LineStrings | |
dbtables/division_lines: dbtables/rail_lines | dbtables | |
psql -d "$(DBNAME)" -c "drop table if exists division_lines;" | |
psql -d "$(DBNAME)" -c "create table division_lines as select ST_Transform(ST_LineMerge(ST_Union(geom)), 2263) as geom, subdiv as name from rail_lines where passngr like 'A%' and net = 'M' group by subdiv;" | |
touch $@ | |
define SHINGLEBYTENTHS | |
create table shingled_tenths as | |
SELECT name, | |
-- take a substring if the length reamining in the segment is greater than 528 feet (1/10 of a mile) | |
-- otherwise take the remainder | |
ST_LineSubstring(geom, 528*n/length, | |
CASE | |
WHEN 528*(n+10) < length THEN 528*(n+10)/length | |
ELSE 1 | |
END) as geom | |
FROM | |
(SELECT name, | |
-- reverse the vertex order so that the upstream end of each | |
-- linestring will be the remainder | |
ST_LineMerge(ST_Reverse(geom)) AS geom, | |
ST_Length(geom) As length | |
FROM division_lines ts | |
) t | |
CROSS JOIN generate_series(0,10000) n # 10000 is arbitrary, but must be greater than the number of segments you're getting | |
WHERE n*528/length < 1; | |
endef | |
export SHINGLEBYTENTHS | |
# "shingle" each line into mile-long segments, overlapping, starting every tenth of a mile | |
dbtables/shingled_tenths: dbtables/division_lines | dbtables | |
psql -d "$(DBNAME)" -c "DROP TABLE IF EXISTS $(shell basename $@)" | |
psql -d "$(DBNAME)" -c "$$SHINGLEBYTENTHS" | |
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add column length numeric; " | |
psql -d "$(DBNAME)" -c "update $(shell basename $@) set length = ST_Length(geom);" | |
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add column id serial; " | |
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add primary key(id);" | |
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add column curviness numeric;" | |
psql -d "$(DBNAME)" -c "update $(shell basename $@) set curviness = ST_Length(geom) / ST_Distance(ST_StartPoint(geom), ST_EndPoint(geom)) where ST_Distance(ST_StartPoint(geom), ST_EndPoint(geom)) > 0;" | |
touch $@ | |
dbtables/curviest_segments_in_nec_with_overlapping_segments: dbtables/shingled_tenths dbtables/amtrk_sta | |
psql -d "$(DBNAME)" -c "DROP TABLE IF EXISTS $(shell basename $@)" | |
psql -d "$(DBNAME)" -c "$$CURVIESTSEGMENTS" | |
touch $@ | |
curviest_segments_on_nec_with_overlapping_segments.csv: dbtables/curviest_segments_in_nec_with_overlapping_segments | |
psql -d "$(DBNAME)" -c "\COPY $(shell basename $<) to '$@' with csv header;" | |
define CURVIESTSEGMENTS | |
create table curviest_segments_in_nec_with_overlapping_segments as | |
(select name, id, curviness | |
from shingled_tenths | |
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND') | |
order by curviness desc limit 300 | |
) EXCEPT | |
(select name, id, curviness | |
from shingled_tenths | |
inner join amtrk_sta | |
on ST_Intersects(shingled_tenths.geom, ST_Buffer(ST_Transform(amtrk_sta.geom, 2263), 10560)) | |
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND') and | |
amtrk_sta.state in ('DC', 'MD', 'NJ', 'DE', 'PA', 'NY', 'CT', 'RI', 'MA') | |
-- uncomment this to exclude stations skipped by some Northeast regionals | |
-- and amtrk_sta.stncode not in ('ABE', 'NRK', 'PHN', 'NBK', 'PJC', 'CWH', | |
-- 'NRO', 'BRP', 'OSB', 'MYS', 'WLY', 'KIN') | |
order by curviness desc limit 300) | |
order by curviness desc limit 60; | |
endef | |
export CURVIESTSEGMENTS | |
# finds overlapping segments in the top handful, so you can eliminate them. | |
dbtables/overlapping: dbtables/shingled_tenths | dbtables | |
psql -d "$(DBNAME)" -c "DROP TABLE IF EXISTS $(shell basename $@)" | |
psql -d "$(DBNAME)" -c "$$OVERLAPPING" | |
touch $@ | |
define OVERLAPPING | |
create table overlapping as | |
with top_ten as ( | |
(select name, id, curviness, geom | |
from shingled_tenths | |
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND') | |
order by curviness desc limit 300 | |
) EXCEPT | |
(select name, id, curviness, shingled_tenths.geom | |
from shingled_tenths | |
inner join amtrk_sta | |
on ST_Intersects(shingled_tenths.geom, ST_Buffer(ST_Transform(amtrk_sta.geom, 2263), 10560)) | |
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND') and | |
amtrk_sta.state in ('DC', 'MD', 'NJ', 'DE', 'PA', 'NY', 'CT', 'RI', 'MA') | |
-- and amtrk_sta.stncode not in ('ABE', 'NRK', 'PHN', 'NBK', 'PJC', 'CWH',-- exclude stations skipped by some Northeast regionals | |
-- 'NRO', 'BRP', 'OSB', 'MYS', 'WLY', 'KIN') | |
order by curviness desc limit 300) | |
order by curviness desc limit 60 | |
) | |
select | |
distinct(least(tt1.id, tt2.id) || ',' || greatest(tt1.id, tt2.id)) as overlaps | |
from top_ten tt1, top_ten tt2 | |
where ST_Intersects(tt1.geom, tt2.geom) and tt1.id != tt2.id; | |
endef | |
export OVERLAPPING | |
overlapping_segments.csv: dbtables/overlapping | |
psql -d "$(DBNAME)" -c "\COPY $(shell basename $<) to '$@' with csv header;" | |
nec_segments_shingled_tenths.geojson: dbtables/shingled_tenths | |
rm -f $@ | |
ogr2ogr -f "GeoJSON" $@ PG:"dbname=$(DBNAME)" $(shell basename $<) -where "name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND')" | |
# below here is just boring plumbing. :) | |
inputs: | |
mkdir $@ | |
dbtables/amtrk_sta: inputs/amtrk_sta/amtrk_sta.shp | dbtables | |
psql -d $(DBNAME) -c "DROP TABLE IF EXISTS amtrk_sta" | |
shp2pgsql -s 4269 $< | psql -q -d $(DBNAME) | |
touch $@ | |
inputs/amtrk_sta/amtrk_sta.shp: | inputs | |
wget http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/national_transportation_atlas_database/2014/zip/amtrk_sta.zip | |
unzip amtrk_sta.zip -d amtrk_sta/ | |
rm amtrk_sta.zip | |
mv amtrk_sta inputs/ | |
touch inputs/amtrk_sta/* | |
inputs/rail_lines/rail_lines.shp: | inputs | |
wget http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/national_transportation_atlas_database/2014/zip/rail_lines.zip | |
unzip rail_lines.zip -d rail_lines/ | |
rm rail_lines.zip | |
mv rail_lines inputs/ | |
touch inputs/rail_lines/* | |
dbtables: | |
createdb "$(DBNAME)" | |
mkdir $@ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey! FYI your base maps aren't loading on the live page :)