Skip to content

Instantly share code, notes, and snippets.

@jeremybmerrill
Last active November 14, 2018 23:26
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jeremybmerrill/0837d7d7eb6609c16b6c to your computer and use it in GitHub Desktop.
Save jeremybmerrill/0837d7d7eb6609c16b6c to your computer and use it in GitHub Desktop.
How to create the data used in my Amtrak Northeast Corridor curves story: http://www.nytimes.com/interactive/2015/06/24/upshot/amtrak-crash-was-on-one-of-curviest-stretches.html?_r=0
DBNAME="rail_lines2" # the name of the database you're using. you can name this whatever you want.
####
#
# You need GNU make to run this; execute in a Terminal `make -f curviness.make`.
# Everything else should happen by itself, assuming you have postgresql and PostGIS installed.
# Installation instructions for those tools, for your platform (Mac, Windows, Linux) are available on the web.
# you also need `wget` installed, but you probably already do.
#
####
all: curviest_segments_on_nec_with_overlapping_segments.csv overlapping_segments.csv nec_segments_shingled_tenths.geojson
echo "curviest_segments_on_nec_with_overlapping_segments.csv has the curviest segments, but many overlap. Deduping these with overlapping_segments.csv is left as an exercise for the reader"
dbtables/rail_lines: inputs/rail_lines/rail_lines.shp | dbtables
# load into the database the FRA's rail network shapefile.
# It's at about 1:100k resolution. Railroads are divided into segments,
# ranging in length from a few hundred feet to a few miles long.
# This division isn't meaningful, it's an artifact of the digitization
# process. They are divided into subdivisions and categorized by what
# type of traffic (Amtrak, commuter, other or freight only) passes over
# them.
psql -d $(DBNAME) -c "DROP TABLE IF EXISTS rail_lines"
shp2pgsql -s 4269 $< | psql -q -d $(DBNAME)
psql -d $(DBNAME) -c "alter table rail_lines add column curviness numeric;"
psql -d $(DBNAME) -c "update rail_lines set curviness = ST_Length(geom) / ST_Distance(ST_StartPoint(ST_GeometryN(geom, 1)), ST_EndPoint(ST_GeometryN(geom, 1))) where ST_Distance(ST_StartPoint(ST_GeometryN(geom, 1)), ST_EndPoint(ST_GeometryN(geom, 1))) > 0;"
# manually fixing segments that don't match: some of this will get covered by the long miscreant-fixing query below, but not all of it.
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'METROPOLITAN' where gid in (46364, 46195, 47123, 49474, 46245, 46246, 154869, 46243, 46195, 46465, 46365, 46378, 46195, 46182, 46193, 46196, 80539, 46707, 104180);"
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'MID-ATLANTIC' where gid in (103927, 137442, 148092 );"
# turns out there are two Metropolitan divisions -- they might change the name sometime soon, apparently.
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'METROPOLITAN-NEC' where subdiv = 'METROPOLITAN' and stateab in ('NY', 'NJ', 'PA');"
# some segments have a null subdivision. we fix that here. (it's not meaningful)
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'PHILLY-HARRISBURG-MAINLINE-JBM' where fraarcid in (236282, 236277, 159980,204776,204821,207719,207333,207335,207463,272633,272634,236366,236285,236324,236304,236305,236292,236300,236308,236309,236321,236361,236368,236370,236372,236373,236392,236393,236430,236415,236416,236424,236434,236435,236447,236457,236463,266913,252808,254946,264431,266938,266943,271141,272635,272636,272637,272638,273690,273691,274778,274779,274862,274863,274864);"
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'EMPIRE-JBM' where fraarcid in (259498, 210996, 210997, 211003, 210964, 277974,277975, 210978, 102703,102700, 102711,150905,61342,167438,210967,210980,211004,278677,277976,277977,279569,279571);"
psql -d $(DBNAME) -c "update rail_lines set subdiv = 'NEW HAVEN LINE' where fraarcid in (278679,278680, 210995, 210999);"
# find all solitary nulls (surrounded by the same subdiv on both sides)
psql -d $(DBNAME) -c "$$FINDSOLITARYNULLS"
touch $@
define FINDSOLITARYNULLS
update rail_lines this
set subdiv = nextarc.subdiv
from rail_lines nextarc, rail_lines prevarc
where ( (this.tofranode = nextarc.frfranode or this.tofranode = nextarc.tofranode) and
(this.frfranode = prevarc.frfranode or this.frfranode = prevarc.tofranode)
) and
this.subdiv is null and nextarc.subdiv is not null and prevarc.subdiv is not null and
nextarc.subdiv = prevarc.subdiv and
this.passngr like 'A%' and this.net = 'M' and
nextarc.passngr like 'A%' and nextarc.net = 'M' and
prevarc.passngr like 'A%' and prevarc.net = 'M';
endef
export FINDSOLITARYNULLS
# joining the original segments into single LineStrings
dbtables/division_lines: dbtables/rail_lines | dbtables
psql -d "$(DBNAME)" -c "drop table if exists division_lines;"
psql -d "$(DBNAME)" -c "create table division_lines as select ST_Transform(ST_LineMerge(ST_Union(geom)), 2263) as geom, subdiv as name from rail_lines where passngr like 'A%' and net = 'M' group by subdiv;"
touch $@
define SHINGLEBYTENTHS
create table shingled_tenths as
SELECT name,
-- take a substring if the length reamining in the segment is greater than 528 feet (1/10 of a mile)
-- otherwise take the remainder
ST_LineSubstring(geom, 528*n/length,
CASE
WHEN 528*(n+10) < length THEN 528*(n+10)/length
ELSE 1
END) as geom
FROM
(SELECT name,
-- reverse the vertex order so that the upstream end of each
-- linestring will be the remainder
ST_LineMerge(ST_Reverse(geom)) AS geom,
ST_Length(geom) As length
FROM division_lines ts
) t
CROSS JOIN generate_series(0,10000) n # 10000 is arbitrary, but must be greater than the number of segments you're getting
WHERE n*528/length < 1;
endef
export SHINGLEBYTENTHS
# "shingle" each line into mile-long segments, overlapping, starting every tenth of a mile
dbtables/shingled_tenths: dbtables/division_lines | dbtables
psql -d "$(DBNAME)" -c "DROP TABLE IF EXISTS $(shell basename $@)"
psql -d "$(DBNAME)" -c "$$SHINGLEBYTENTHS"
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add column length numeric; "
psql -d "$(DBNAME)" -c "update $(shell basename $@) set length = ST_Length(geom);"
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add column id serial; "
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add primary key(id);"
psql -d "$(DBNAME)" -c "alter table $(shell basename $@) add column curviness numeric;"
psql -d "$(DBNAME)" -c "update $(shell basename $@) set curviness = ST_Length(geom) / ST_Distance(ST_StartPoint(geom), ST_EndPoint(geom)) where ST_Distance(ST_StartPoint(geom), ST_EndPoint(geom)) > 0;"
touch $@
dbtables/curviest_segments_in_nec_with_overlapping_segments: dbtables/shingled_tenths dbtables/amtrk_sta
psql -d "$(DBNAME)" -c "DROP TABLE IF EXISTS $(shell basename $@)"
psql -d "$(DBNAME)" -c "$$CURVIESTSEGMENTS"
touch $@
curviest_segments_on_nec_with_overlapping_segments.csv: dbtables/curviest_segments_in_nec_with_overlapping_segments
psql -d "$(DBNAME)" -c "\COPY $(shell basename $<) to '$@' with csv header;"
define CURVIESTSEGMENTS
create table curviest_segments_in_nec_with_overlapping_segments as
(select name, id, curviness
from shingled_tenths
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND')
order by curviness desc limit 300
) EXCEPT
(select name, id, curviness
from shingled_tenths
inner join amtrk_sta
on ST_Intersects(shingled_tenths.geom, ST_Buffer(ST_Transform(amtrk_sta.geom, 2263), 10560))
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND') and
amtrk_sta.state in ('DC', 'MD', 'NJ', 'DE', 'PA', 'NY', 'CT', 'RI', 'MA')
-- uncomment this to exclude stations skipped by some Northeast regionals
-- and amtrk_sta.stncode not in ('ABE', 'NRK', 'PHN', 'NBK', 'PJC', 'CWH',
-- 'NRO', 'BRP', 'OSB', 'MYS', 'WLY', 'KIN')
order by curviness desc limit 300)
order by curviness desc limit 60;
endef
export CURVIESTSEGMENTS
# finds overlapping segments in the top handful, so you can eliminate them.
dbtables/overlapping: dbtables/shingled_tenths | dbtables
psql -d "$(DBNAME)" -c "DROP TABLE IF EXISTS $(shell basename $@)"
psql -d "$(DBNAME)" -c "$$OVERLAPPING"
touch $@
define OVERLAPPING
create table overlapping as
with top_ten as (
(select name, id, curviness, geom
from shingled_tenths
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND')
order by curviness desc limit 300
) EXCEPT
(select name, id, curviness, shingled_tenths.geom
from shingled_tenths
inner join amtrk_sta
on ST_Intersects(shingled_tenths.geom, ST_Buffer(ST_Transform(amtrk_sta.geom, 2263), 10560))
where name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND') and
amtrk_sta.state in ('DC', 'MD', 'NJ', 'DE', 'PA', 'NY', 'CT', 'RI', 'MA')
-- and amtrk_sta.stncode not in ('ABE', 'NRK', 'PHN', 'NBK', 'PJC', 'CWH',-- exclude stations skipped by some Northeast regionals
-- 'NRO', 'BRP', 'OSB', 'MYS', 'WLY', 'KIN')
order by curviness desc limit 300)
order by curviness desc limit 60
)
select
distinct(least(tt1.id, tt2.id) || ',' || greatest(tt1.id, tt2.id)) as overlaps
from top_ten tt1, top_ten tt2
where ST_Intersects(tt1.geom, tt2.geom) and tt1.id != tt2.id;
endef
export OVERLAPPING
overlapping_segments.csv: dbtables/overlapping
psql -d "$(DBNAME)" -c "\COPY $(shell basename $<) to '$@' with csv header;"
nec_segments_shingled_tenths.geojson: dbtables/shingled_tenths
rm -f $@
ogr2ogr -f "GeoJSON" $@ PG:"dbname=$(DBNAME)" $(shell basename $<) -where "name in ('METROPOLITAN-NEC','MID-ATLANTIC','NEW HAVEN LINE', 'NEW YORK TERMINAL', 'NEW ENGLAND')"
# below here is just boring plumbing. :)
inputs:
mkdir $@
dbtables/amtrk_sta: inputs/amtrk_sta/amtrk_sta.shp | dbtables
psql -d $(DBNAME) -c "DROP TABLE IF EXISTS amtrk_sta"
shp2pgsql -s 4269 $< | psql -q -d $(DBNAME)
touch $@
inputs/amtrk_sta/amtrk_sta.shp: | inputs
wget http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/national_transportation_atlas_database/2014/zip/amtrk_sta.zip
unzip amtrk_sta.zip -d amtrk_sta/
rm amtrk_sta.zip
mv amtrk_sta inputs/
touch inputs/amtrk_sta/*
inputs/rail_lines/rail_lines.shp: | inputs
wget http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/national_transportation_atlas_database/2014/zip/rail_lines.zip
unzip rail_lines.zip -d rail_lines/
rm rail_lines.zip
mv rail_lines inputs/
touch inputs/rail_lines/*
dbtables:
createdb "$(DBNAME)"
mkdir $@
@emilymerwin
Copy link

Hey! FYI your base maps aren't loading on the live page :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment