Last active
March 1, 2018 00:42
-
-
Save csb19815/9b130ead4db855cb333c13304faad427 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import partridge as ptg" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Read the zip using `raw_feed` which does not parse or prune the files" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"inpath = '/Users/Charlie/Downloads/contents20180127-4-1ujy8oq.zip'\n", | |
"feed = ptg.raw_feed(inpath)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Inspect the original stops file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stop_id</th>\n", | |
" <th>stop_code</th>\n", | |
" <th>stop_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>795462</td>\n", | |
" <td>75548</td>\n", | |
" <td>5th St and P St</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>5424</td>\n", | |
" <td>75406</td>\n", | |
" <td>9th St and L St</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>245</th>\n", | |
" <td>5425</td>\n", | |
" <td>75407</td>\n", | |
" <td>9th St and O St</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>96</th>\n", | |
" <td>28753</td>\n", | |
" <td>75542</td>\n", | |
" <td>Air Base Pkwy and Parker Rd</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>206</th>\n", | |
" <td>805313</td>\n", | |
" <td>75528</td>\n", | |
" <td>Air Base Pkwy and Parker Rd</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stop_id stop_code stop_name\n", | |
"0 795462 75548 5th St and P St\n", | |
"5 5424 75406 9th St and L St\n", | |
"245 5425 75407 9th St and O St\n", | |
"96 28753 75542 Air Base Pkwy and Parker Rd\n", | |
"206 805313 75528 Air Base Pkwy and Parker Rd" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"feed.stops.sort_values('stop_name')[['stop_id', 'stop_code', 'stop_name']].head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Replace empty `stop_code`'s with `stop_id`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"button": false, | |
"collapsed": true, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"feed.stops.stop_code = feed.stops.stop_code.fillna(feed.stops.stop_id)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Ensure `stop_id` and `stop_code` are unique" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"button": false, | |
"collapsed": true, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"assert len(set(feed.stops.stop_id)) == len(feed.stops)\n", | |
"assert len(set(feed.stops.stop_code)) == len(set(feed.stops.stop_id))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Create a dictionary mapping `stop_id` to `stop_code`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"button": false, | |
"collapsed": true, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"mapping = dict(feed.stops[['stop_id', 'stop_code']].as_matrix())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Remove `stop_code` column" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"button": false, | |
"collapsed": true, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"feed.stops.drop('stop_code', axis=1, inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Replace old references to `stop_id`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"button": false, | |
"collapsed": true, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"def swap_id(stop_id):\n", | |
" return mapping[stop_id]\n", | |
"\n", | |
"feed.stops.stop_id = feed.stops.stop_id.apply(swap_id)\n", | |
"feed.stop_times.stop_id = feed.stop_times.stop_id.apply(swap_id)\n", | |
"\n", | |
"if not feed.transfers.empty:\n", | |
" feed.transfers.from_stop_id = feed.transfers.from_stop_id.apply(swap_id)\n", | |
" feed.transfers.to_stop_id = feed.transfers.to_stop_id.apply(swap_id)\n", | |
"\n", | |
"if 'parent_station' in feed.stops.columns:\n", | |
" # optional field, preserve nan's\n", | |
" feed.stops.parent_station = feed.stops.parent_station.apply(mapping.get, args=(np.nan,))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Create a new GTFS file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'/Users/Charlie/Downloads/FAST_feb_2018.zip'" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"outpath = '/Users/Charlie/Downloads/FAST_feb_2018.zip'\n", | |
"ptg.writers.write_feed_dangerously(feed, outpath)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"button": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"source": [ | |
"Inspect the result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"button": false, | |
"collapsed": false, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stop_id</th>\n", | |
" <th>stop_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>75548</td>\n", | |
" <td>5th St and P St</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>75406</td>\n", | |
" <td>9th St and L St</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>245</th>\n", | |
" <td>75407</td>\n", | |
" <td>9th St and O St</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>96</th>\n", | |
" <td>75542</td>\n", | |
" <td>Air Base Pkwy and Parker Rd</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>206</th>\n", | |
" <td>75528</td>\n", | |
" <td>Air Base Pkwy and Parker Rd</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stop_id stop_name\n", | |
"0 75548 5th St and P St\n", | |
"5 75406 9th St and L St\n", | |
"245 75407 9th St and O St\n", | |
"96 75542 Air Base Pkwy and Parker Rd\n", | |
"206 75528 Air Base Pkwy and Parker Rd" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ptg.feed(outpath).stops.sort_values('stop_name')[['stop_id', 'stop_name']].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"button": false, | |
"collapsed": true, | |
"deletable": true, | |
"new_sheet": false, | |
"run_control": { | |
"read_only": false | |
} | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment