Created
May 1, 2018 20:08
-
-
Save csb19815/5bf7923ffb1ce7ec155ac9a94a83ea70 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import partridge as ptg" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Read the zip using `raw_feed` which does not parse or prune the files" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"feed = ptg.raw_feed('scratch/cmbc-translink.zip')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Inspect the original stops file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stop_id</th>\n", | |
" <th>stop_code</th>\n", | |
" <th>stop_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>5351</th>\n", | |
" <td>1329</td>\n", | |
" <td>51318</td>\n", | |
" <td>22 TERMINUS UNDER KNIGHT ST BRIDGE</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>888</th>\n", | |
" <td>10915</td>\n", | |
" <td>58173</td>\n", | |
" <td>22ND STREET STN BAY 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7941</th>\n", | |
" <td>10423</td>\n", | |
" <td>52165</td>\n", | |
" <td>22ND STREET STN BAY 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2111</th>\n", | |
" <td>3535</td>\n", | |
" <td>53497</td>\n", | |
" <td>22ND STREET STN BAY 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5822</th>\n", | |
" <td>2252</td>\n", | |
" <td>52230</td>\n", | |
" <td>22ND STREET STN BAY 4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stop_id stop_code stop_name\n", | |
"5351 1329 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n", | |
"888 10915 58173 22ND STREET STN BAY 1\n", | |
"7941 10423 52165 22ND STREET STN BAY 2\n", | |
"2111 3535 53497 22ND STREET STN BAY 3\n", | |
"5822 2252 52230 22ND STREET STN BAY 4" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"feed.stops.sort_values('stop_name')[['stop_id', 'stop_code', 'stop_name']].head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Replace empty `stop_code`'s with `stop_id`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"feed.stops.stop_code = feed.stops.stop_code.fillna(feed.stops.stop_id)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Ensure `stop_id` and `stop_code` are unique" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"assert len(set(feed.stops.stop_id)) == len(feed.stops)\n", | |
"assert len(set(feed.stops.stop_code)) == len(set(feed.stops.stop_id))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create a dictionary mapping `stop_id` to `stop_code`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mapping = dict(feed.stops[['stop_id', 'stop_code']].as_matrix())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Remove `stop_code` column" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"feed.stops.drop('stop_code', axis=1, inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Replace old references to `stop_id`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def swap_id(stop_id):\n", | |
" return mapping[stop_id]\n", | |
"\n", | |
"feed.stops.stop_id = feed.stops.stop_id.apply(swap_id)\n", | |
"feed.stop_times.stop_id = feed.stop_times.stop_id.apply(swap_id)\n", | |
"\n", | |
"if not feed.transfers.empty:\n", | |
" feed.transfers.from_stop_id = feed.transfers.from_stop_id.apply(swap_id)\n", | |
" feed.transfers.to_stop_id = feed.transfers.to_stop_id.apply(swap_id)\n", | |
"\n", | |
"if 'parent_station' in feed.stops.columns:\n", | |
" # optional field, preserve nan's\n", | |
" feed.stops.parent_station = feed.stops.parent_station.apply(mapping.get, args=(np.nan,))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create a new GTFS file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'/Users/drw/Code/partridge/scratch/cmbc-translink-swapped.zip'" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import os\n", | |
"import shutil\n", | |
"import tempfile\n", | |
"\n", | |
"outpath = 'scratch/cmbc-translink-swapped.zip'\n", | |
"\n", | |
"try:\n", | |
" tmpdir = tempfile.mkdtemp()\n", | |
" for node in ptg.writers.DEFAULT_NODES:\n", | |
" df = feed.get(node)\n", | |
" if not df.empty:\n", | |
" df.to_csv(os.path.join(tmpdir, node), index=False)\n", | |
" shutil.make_archive(os.path.splitext(outpath)[0], 'zip', tmpdir)\n", | |
"finally:\n", | |
" shutil.rmtree(tmpdir)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Inspect the result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>stop_id</th>\n", | |
" <th>stop_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>5351</th>\n", | |
" <td>51318</td>\n", | |
" <td>22 TERMINUS UNDER KNIGHT ST BRIDGE</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>888</th>\n", | |
" <td>58173</td>\n", | |
" <td>22ND STREET STN BAY 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7941</th>\n", | |
" <td>52165</td>\n", | |
" <td>22ND STREET STN BAY 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2111</th>\n", | |
" <td>53497</td>\n", | |
" <td>22ND STREET STN BAY 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5822</th>\n", | |
" <td>52230</td>\n", | |
" <td>22ND STREET STN BAY 4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" stop_id stop_name\n", | |
"5351 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n", | |
"888 58173 22ND STREET STN BAY 1\n", | |
"7941 52165 22ND STREET STN BAY 2\n", | |
"2111 53497 22ND STREET STN BAY 3\n", | |
"5822 52230 22ND STREET STN BAY 4" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ptg.feed(outpath).stops.sort_values('stop_name')[['stop_id', 'stop_name']].head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment