Skip to content

Instantly share code, notes, and snippets.

@csb19815
Last active March 1, 2018 00:42
Show Gist options
  • Save csb19815/9b130ead4db855cb333c13304faad427 to your computer and use it in GitHub Desktop.
Save csb19815/9b130ead4db855cb333c13304faad427 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import partridge as ptg"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Read the zip using `raw_feed` which does not parse or prune the files"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"inpath = '/Users/Charlie/Downloads/contents20180127-4-1ujy8oq.zip'\n",
"feed = ptg.raw_feed(inpath)"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Inspect the original stops file"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stop_id</th>\n",
" <th>stop_code</th>\n",
" <th>stop_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>795462</td>\n",
" <td>75548</td>\n",
" <td>5th St and P St</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5424</td>\n",
" <td>75406</td>\n",
" <td>9th St and L St</td>\n",
" </tr>\n",
" <tr>\n",
" <th>245</th>\n",
" <td>5425</td>\n",
" <td>75407</td>\n",
" <td>9th St and O St</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>28753</td>\n",
" <td>75542</td>\n",
" <td>Air Base Pkwy and Parker Rd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>206</th>\n",
" <td>805313</td>\n",
" <td>75528</td>\n",
" <td>Air Base Pkwy and Parker Rd</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stop_id stop_code stop_name\n",
"0 795462 75548 5th St and P St\n",
"5 5424 75406 9th St and L St\n",
"245 5425 75407 9th St and O St\n",
"96 28753 75542 Air Base Pkwy and Parker Rd\n",
"206 805313 75528 Air Base Pkwy and Parker Rd"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feed.stops.sort_values('stop_name')[['stop_id', 'stop_code', 'stop_name']].head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Replace empty `stop_code`'s with `stop_id`"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"button": false,
"collapsed": true,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"feed.stops.stop_code = feed.stops.stop_code.fillna(feed.stops.stop_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Ensure `stop_id` and `stop_code` are unique"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"button": false,
"collapsed": true,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"assert len(set(feed.stops.stop_id)) == len(feed.stops)\n",
"assert len(set(feed.stops.stop_code)) == len(set(feed.stops.stop_id))"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Create a dictionary mapping `stop_id` to `stop_code`"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"button": false,
"collapsed": true,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"mapping = dict(feed.stops[['stop_id', 'stop_code']].as_matrix())"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Remove `stop_code` column"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"button": false,
"collapsed": true,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"feed.stops.drop('stop_code', axis=1, inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Replace old references to `stop_id`"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"button": false,
"collapsed": true,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"def swap_id(stop_id):\n",
" return mapping[stop_id]\n",
"\n",
"feed.stops.stop_id = feed.stops.stop_id.apply(swap_id)\n",
"feed.stop_times.stop_id = feed.stop_times.stop_id.apply(swap_id)\n",
"\n",
"if not feed.transfers.empty:\n",
" feed.transfers.from_stop_id = feed.transfers.from_stop_id.apply(swap_id)\n",
" feed.transfers.to_stop_id = feed.transfers.to_stop_id.apply(swap_id)\n",
"\n",
"if 'parent_station' in feed.stops.columns:\n",
" # optional field, preserve nan's\n",
" feed.stops.parent_station = feed.stops.parent_station.apply(mapping.get, args=(np.nan,))"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Create a new GTFS file"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/Charlie/Downloads/FAST_feb_2018.zip'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outpath = '/Users/Charlie/Downloads/FAST_feb_2018.zip'\n",
"ptg.writers.write_feed_dangerously(feed, outpath)"
]
},
{
"cell_type": "markdown",
"metadata": {
"button": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"source": [
"Inspect the result"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>stop_id</th>\n",
" <th>stop_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>75548</td>\n",
" <td>5th St and P St</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>75406</td>\n",
" <td>9th St and L St</td>\n",
" </tr>\n",
" <tr>\n",
" <th>245</th>\n",
" <td>75407</td>\n",
" <td>9th St and O St</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>75542</td>\n",
" <td>Air Base Pkwy and Parker Rd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>206</th>\n",
" <td>75528</td>\n",
" <td>Air Base Pkwy and Parker Rd</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" stop_id stop_name\n",
"0 75548 5th St and P St\n",
"5 75406 9th St and L St\n",
"245 75407 9th St and O St\n",
"96 75542 Air Base Pkwy and Parker Rd\n",
"206 75528 Air Base Pkwy and Parker Rd"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ptg.feed(outpath).stops.sort_values('stop_name')[['stop_id', 'stop_name']].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"button": false,
"collapsed": true,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment