Skip to content

Instantly share code, notes, and snippets.

@kurtraschke kurtraschke/README.md
Last active Dec 14, 2015

Embed
What would you like to do?

This Gist contains a script and supporting files for building a OneBusAway GTFS bundle with data for all agencies in the Baltimore/Washington area that have publicly released their data.

Configuration parameters come from two sheets in a Google Docs document:

https://docs.google.com/spreadsheet/ccc?key=0AvrkbWHnoksNdGdYam4wX214SXpoRmdia0FEalZvUHc&usp=sharing

The dependency on Google Docs could easily be removed by removing that part of download.sh and using a locally-maintained CSV file instead.

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-2.5.xsd">
<bean class="org.onebusaway.container.spring.PropertyOverrideConfigurer">
<property name="properties">
<props>
<prop key="tripEntriesFactory.throwExceptionOnInvalidStopToShapeMappingException">false</prop>
</props>
</property>
</bean>
<bean id="gtfs-bundles" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundles">
<property name="bundles">
<list>
<ref bean="A3080" />
<ref bean="A3068" />
<ref bean="A3034" />
<ref bean="A3051" />
<ref bean="A3073" />
<ref bean="A3030-1" />
<ref bean="A3030-2" />
<!--<ref bean="A3072" />-->
</list>
</property>
</bean>
<bean id="A3080" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/inputs/3080.zip" />
<property name="defaultAgencyId" value="3080" />
<property name="agencyIdMappings">
<map>
<entry key="1" value="3080" />
</map>
</property>
</bean>
<bean id="A3068" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/outputs/3068" />
<property name="defaultAgencyId" value="3068" />
<property name="agencyIdMappings">
<map>
<entry key="" value="3068" />
</map>
</property>
</bean>
<bean id="A3034" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/inputs/3034.zip" />
<property name="defaultAgencyId" value="3034" />
<property name="agencyIdMappings">
<map>
<entry key="1" value="3034" />
</map>
</property>
</bean>
<bean id="A3051" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/inputs/3051.zip" />
<property name="defaultAgencyId" value="3051" />
<property name="agencyIdMappings">
<map>
<entry key="MCRO" value="3051" />
</map>
</property>
</bean>
<bean id="A3073" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/inputs/3073.zip" />
<property name="defaultAgencyId" value="3073" />
<property name="agencyIdMappings">
<map>
<entry key="" value="3073" />
</map>
</property>
</bean>
<bean id="A3030-1" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/outputs/3030-1" />
<property name="defaultAgencyId" value="3030-1" />
<property name="agencyIdMappings">
<map>
<entry key="1" value="3030-1" />
</map>
</property>
</bean>
<bean id="A3030-2" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/outputs/3030-2" />
<property name="defaultAgencyId" value="3030-2" />
<property name="agencyIdMappings">
<map>
<entry key="2" value="3030-2" />
</map>
</property>
</bean>
<bean id="A3072" class="org.onebusaway.transit_data_federation.bundle.model.GtfsBundle">
<property name="path" value="work/inputs/3072.zip" />
<property name="defaultAgencyId" value="3072" />
<property name="agencyIdMappings">
<map>
<entry key="1" value="3072" />
</map>
</property>
</bean>
</beans>
{"op":"retain","match":{"class":"Agency","id":"1"}}
{"op":"update","match":{"class":"Route","shortName":"DC98"},"update":{"longName":"Woodley Park - Adams Morgan-McPherson Square Metro"}}
{"op":"update","match":{"class":"Route","shortName":"DC98"},"update":{"shortName":"WP-MS"}}
{"op":"update","match":{"class":"Route","shortName":"DCDGR"},"update":{"longName":"Dupont - Georgetown - Rosslyn"}}
{"op":"update","match":{"class":"Route","shortName":"DCDGR"},"update":{"shortName":"RS-DP"}}
{"op":"update","match":{"class":"Route","shortName":"DCN22"},"update":{"longName":"Union Station - Navy Yard via Capitol Hill"}}
{"op":"update","match":{"class":"Route","shortName":"DCN22"},"update":{"shortName":"US-NY"}}
{"op":"update","match":{"class":"Route","shortName":"DCPOTSKY"},"update":{"longName":"Potomac - Skyland"}}
{"op":"update","match":{"class":"Route","shortName":"DCPOTSKY"},"update":{"shortName":"POT-SKY"}}
{"op":"update","match":{"class":"Route","shortName":"DCWE"},"update":{"longName": "Georgetown - Union Station"}}
{"op":"update","match":{"class":"Route","shortName":"DCWE"},"update":{"shortName":"GT-US"}}
{"op":"retain","match":{"class":"Agency","id":"2"}}
{"op":"update","match":{"class":"Route","longName":"Richmond Highway Express Bus"},"update":{"shortName":"REX"}}
{"op":"remove", "match":{"class":"Route","shortName":"18S"}}
{"op":"transform","class":"com.kurtraschke.wmatagtfsnames.BusRouteNameTransformStrategy"}
FEEDS_URL="https://docs.google.com/spreadsheet/pub?key=0AvrkbWHnoksNdGdYam4wX214SXpoRmdia0FEalZvUHc&single=true&gid=0&output=csv"
TRANSFORMS_URL="https://docs.google.com/spreadsheet/pub?key=0AvrkbWHnoksNdGdYam4wX214SXpoRmdia0FEalZvUHc&single=true&gid=1&output=csv"
JAVA_HOME=/usr/java/default
JARS="jars"
WORKING_DIR="work"
INPUT_DIR="${WORKING_DIR}/inputs"
OUTPUT_DIR="${WORKING_DIR}/outputs"
CONFIG_DIR="config"
FEED_INDEX="${CONFIG_DIR}/feeds.csv"
TRANSFORMS_INDEX="${CONFIG_DIR}/transforms.csv"
mkdir -p $INPUT_DIR
rm -rf $OUTPUT_DIR
mkdir -p $OUTPUT_DIR
#Download CSV files, remove first line
curl -sS $FEEDS_URL | tail -n +2 > $FEED_INDEX
curl -sS $TRANSFORMS_URL | tail -n +2 > $TRANSFORMS_INDEX
#Add trailing newline if needed
sed -i -e '$a\' $FEED_INDEX
sed -i -e '$a\' $TRANSFORMS_INDEX
#Download feeds
while IFS=, read NTD_ID FEED_URL
do
FILE_NAME="${INPUT_DIR}/${NTD_ID}.zip"
echo "Downloading $FEED_URL to $FILE_NAME"
if [ -f $FILE_NAME ]; then
CONDITIONAL="-z $FILE_NAME"
else
CONDITIONAL=""
fi
curl --compressed -L -o "$FILE_NAME" -R $CONDITIONAL $FEED_URL
done < $FEED_INDEX
#Fix Fairfax Connector with this one-liner
unzip -p ${INPUT_DIR}/3068.zip google.log \
| grep "did not match" \
| sed "s/^.*TripId = \([0-9]*\).*$/{\"op\": \"remove\", \"match\": {\"class\": \"Trip\", \"id\": \"3068_\1\"} }/" \
| sort \
| uniq > ${CONFIG_DIR}/transforms/3068.json
#Transform feeds
while IFS=, read INPUT_AGENCY DEFAULT_AGENCY_ID OUTPUT_AGENCY
do
TRANSFORM_FILE="${CONFIG_DIR}/transforms/${OUTPUT_AGENCY}.json"
TRANSFORMER_INPUT="${INPUT_DIR}/${INPUT_AGENCY}.zip"
TRANSFORMER_OUTPUT="${OUTPUT_DIR}/${OUTPUT_AGENCY}"
echo "Running transformer with configuration ${TRANSFORM_FILE}"
$JAVA_HOME/bin/java -cp "${JARS}/*" org.onebusaway.gtfs_transformer.GtfsTransformerMain \
--transform=$TRANSFORM_FILE \
--agencyId $DEFAULT_AGENCY_ID \
$TRANSFORMER_INPUT \
$TRANSFORMER_OUTPUT
done < $TRANSFORMS_INDEX
#Remove bundle output directory
rm -rf "${WORKING_DIR}/bundle"
#Build bundle
$JAVA_HOME/bin/java -Xmx6G -jar $JARS/onebusaway-transit-data-federation-builder-1.1.8-SNAPSHOT-withAllDependencies.jar \
"${CONFIG_DIR}/bundle.xml" "${WORKING_DIR}/bundle"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.