Skip to content

Instantly share code, notes, and snippets.

@hansthompson
Created March 9, 2016 18:45
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hansthompson/a3d2c710ac8e3584d58c to your computer and use it in GitHub Desktop.
Save hansthompson/a3d2c710ac8e3584d58c to your computer and use it in GitHub Desktop.
A script I use to import Mat-Su region gtfs into PostGIS
# NOTES BEFORE CONTINUING
# Problem 1. Time datatype for systems that go past midnight needs to have the right definition. So it won't work with trimet because it goes beyone midnight.
# Problem 2. Needs a service to find the correct projection to move from WGS84 to a local plane for accurate measurements in meters. Maybe I'm off here
# Problem 3. Since I set my machine up to need sudo commands for Postgres I don't know the best method as an admin to do database connections without sudo.
#system(paste("sudo -kS -u postgres psql -c 'CREATE DATABASE ", database_name, "'"), input=readline("Enter your password: "))
#Variables to set
database_name <- "nameOfDatabase"
zipfile <- "zipfile/path"
library(dplyr)
library(sp)
library(rgdal)
library(RPostgreSQL)
#define funtion for converting shapes.txt to sp object.
make_Lines <- function(df) {
df %>% select( shape_pt_lon, shape_pt_lat) %>% as.data.frame %>% Line %>% list %>% return
}
system(paste("sudo -kS -u postgres psql -c 'CREATE DATABASE ", database_name, "'"), input=readline("Enter your password: "))
#CREATE EXTENSION postgis; CREATE EXTENSION postgis_topology;
#paste0("PG:dbname=", database_name, " host=localhost user=postgres port=5432")
postgres_connection <- dbConnect(PostgreSQL(), host="localhost", user= "postgres", dbname=database_name)
#unzip it
dbSendQuery(postgres_connection,"CREATE EXTENSION postgis; CREATE EXTENSION postgis_topology;")
unzip(zipfile, exdir = "/tmp/gtfs2postgis")
#get all the files in the gtfs provided
filenames <- list.files(zipfile, pattern = ".csv")
full_path <- list.files(zipfile, full.names = TRUE, pattern = ".csv")
for(i in seq(filenames)) {
tabl <- read.csv(full_path[i])
table_name <- gsub(".csv", "", gsub(".txt", "", filenames[i]))
if(table_name != "shapes" & table_name != "stops") {
#paste("COPY", gsub(".txt", "", filenames[i]), "FROM", full_path[i], "DELIMITER ',' CSV;")
dbWriteTable(conn = postgres_connection, name = table_name,
value = tabl)
}
if(table_name == "stops") {
#get coordinates for sp object
coord <- tabl %>% select(stop_lon, stop_lat)
#get attributes for shapefile
dat <- tabl %>% select(-stop_lat, -stop_lon)
#create sp object with attributes
stops <- SpatialPointsDataFrame(coords = coord, data = dat,
proj4string = CRS("+proj=longlat +datum=WGS84"))
#write geojson
writeOGR(stops, '/tmp/stops_geojson', layer = 'stops', driver = 'GeoJSON')
#import geojson
system(paste('ogr2ogr -f "PostgreSQL" PG:"dbname=', database_name, ' host=localhost user=postgres port=5432" "/tmp/stops_geojson" -nln stops -overwrite'))
}
if(table_name == "shapes") {
#group shapes.txt by shape_id to help create sp object
shapes_grouped <- tabl %>% group_by(shape_id)
#create ids for shapefile
shapes_lines <- shapes_grouped %>% select( shape_pt_lat, shape_pt_lon) %>% do(res = make_Lines(.))
#find the attributes to include
shape_dat <- shapes_grouped %>% select(-shape_pt_lon, -shape_pt_lat, -shape_pt_sequence) %>%
group_by(shape_id) %>% filter(row_number(shape_id) == 1)
#create sp object
shapes <- SpatialLines(mapply(x = shapes_lines$res,
ids = seq(dim(shapes_lines)[1]),
FUN = function(x,ids) {Lines(x,ID=ids)}),
proj4string = CRS("+proj=longlat +datum=WGS84"))
#add data to sp object
shapes <- SpatialLinesDataFrame(shapes, data = data.frame(shape_id = shape_dat$shape_id))
#write geojson
writeOGR(shapes, '/tmp/shapes_geojson', layer = 'shapes', driver = 'GeoJSON')
#import geojson
system(paste('ogr2ogr -f "PostgreSQL" PG:"dbname=', database_name, 'host=localhost user=postgres port=5432" "/tmp/shapes_geojson" -nln shapes -overwrite'))
}
}
# Alter statements
# Add time as data type to stops_times.
alter_statement_sql <- "ALTER TABLE stop_times
ALTER COLUMN arrival_time TYPE time without time zone USING arrival_time::time without time zone,
ALTER COLUMN departure_time TYPE time without time zone USING departure_time::time without time zone,;"
#write to db.
#dbGetQuery(con, alter_statement_sql)
#dbDisconnect(postgres_connection)
#system("rm -rf /tmp/gtfs2postgis")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment