Skip to content

Instantly share code, notes, and snippets.

@smmaurer
Created July 23, 2018 22:19
Show Gist options
  • Save smmaurer/c3b4f2f7c4d612a4520de119f9f497cf to your computer and use it in GitHub Desktop.
Save smmaurer/c3b4f2f7c4d612a4520de119f9f497cf to your computer and use it in GitHub Desktop.
Building a custom merged choice table
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Destination choice models using CHTS public data\n",
"\n",
"Sam Maurer, July 2018 - Python 3.6\n",
"\n",
"This notebook won't run; it's been cleaned up to just show the code for building a custom merged choice table."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build a merged choice table including interaction terms\n",
"\n",
"Limit observations to trips where we observe at least 5 different destinations from the origin tract. Draw estimation alternatives from the other observed trips."
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>full_tract_id</th>\n",
" <th>prior_tract</th>\n",
" <th>next_tract</th>\n",
" </tr>\n",
" <tr>\n",
" <th>place_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>18979900103</th>\n",
" <td>6097153806</td>\n",
" <td>6097151402</td>\n",
" <td>6097153806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22291390105</th>\n",
" <td>6001427200</td>\n",
" <td>6001427300</td>\n",
" <td>6001427100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26093980314</th>\n",
" <td>6075020900</td>\n",
" <td>6081605200</td>\n",
" <td>6075020900</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" full_tract_id prior_tract next_tract\n",
"place_id \n",
"18979900103 6097153806 6097151402 6097153806\n",
"22291390105 6001427200 6001427300 6001427100\n",
"26093980314 6075020900 6081605200 6075020900"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"_obs_cols = ['full_tract_id', 'prior_tract', 'next_tract']\n",
"_alt_cols = ['drive_shopping_attraction', 'non_residential_sqft', 'trad_sqft', 'arterial_sqft', \n",
" 'shopcenter_sqft', 'downtown_sqft']\n",
"\n",
"_trip_filters = ((trips.prior_tract > 0) &\n",
" (trips.next_tract > 0) &\n",
" (trips['mode'] == 5) &\n",
" (trips.prior_tract.isin(link_stats.loc[link_stats.outbound_count > 5].index.tolist())))\n",
"\n",
"observations = trips.loc[_trip_filters, _obs_cols].sample(10000)\n",
"\n",
"observations.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"50000\n",
"50000\n",
"50000\n",
"CPU times: user 7.28 s, sys: 12.4 ms, total: 7.29 s\n",
"Wall time: 7.29 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"sample_size = 5 # alternatives per observation\n",
"obs = observations.index.tolist()\n",
"choices = observations.full_tract_id.tolist()\n",
"origins = observations.prior_tract.tolist() # for availability of alternatives\n",
"\n",
"alts = distances.reset_index()[['prior_tract', 'full_tract_id']]\n",
"\n",
"full_obs = obs\n",
"full_alts = choices\n",
"chosen = np.repeat(1, len(obs)).tolist()\n",
"\n",
"for i in range(len(obs)):\n",
" full_obs += np.repeat(obs[i], sample_size-1).tolist()\n",
" chosen += np.repeat(0, sample_size-1).tolist()\n",
" \n",
" available_alts = alts.loc[(alts.prior_tract == origins[i]) & \\\n",
" (alts.full_tract_id != choices[i])].full_tract_id\n",
" sampled_alts = available_alts.sample(n = sample_size - 1).tolist()\n",
" full_alts += sampled_alts\n",
"\n",
"print(len(full_obs))\n",
"print(len(full_alts))\n",
"print(len(chosen))"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"50000\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>chosen</th>\n",
" <th>full_tract_id</th>\n",
" <th>place_id</th>\n",
" <th>prior_tract</th>\n",
" <th>next_tract</th>\n",
" <th>drive_shopping_attraction</th>\n",
" <th>non_residential_sqft</th>\n",
" <th>trad_sqft</th>\n",
" <th>arterial_sqft</th>\n",
" <th>shopcenter_sqft</th>\n",
" <th>downtown_sqft</th>\n",
" <th>travel_time</th>\n",
" <th>distance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>6085506501</td>\n",
" <td>72115600208</td>\n",
" <td>6085506302</td>\n",
" <td>6085506202</td>\n",
" <td>4.699926</td>\n",
" <td>0.259702</td>\n",
" <td>0.000000</td>\n",
" <td>0.071144</td>\n",
" <td>0.126361</td>\n",
" <td>0.062197</td>\n",
" <td>4.0</td>\n",
" <td>0.181507</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>6085506103</td>\n",
" <td>72115600208</td>\n",
" <td>6085506302</td>\n",
" <td>6085506202</td>\n",
" <td>4.605324</td>\n",
" <td>0.549149</td>\n",
" <td>0.018630</td>\n",
" <td>0.170145</td>\n",
" <td>0.257974</td>\n",
" <td>0.102400</td>\n",
" <td>10.0</td>\n",
" <td>2.893457</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>6085506202</td>\n",
" <td>72115600208</td>\n",
" <td>6085506302</td>\n",
" <td>6085506202</td>\n",
" <td>23.645701</td>\n",
" <td>0.564751</td>\n",
" <td>0.000000</td>\n",
" <td>0.088703</td>\n",
" <td>0.394847</td>\n",
" <td>0.081201</td>\n",
" <td>10.0</td>\n",
" <td>1.483285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>6085509901</td>\n",
" <td>72115600208</td>\n",
" <td>6085506302</td>\n",
" <td>6085506202</td>\n",
" <td>15.279850</td>\n",
" <td>0.154433</td>\n",
" <td>0.000000</td>\n",
" <td>0.001379</td>\n",
" <td>0.153054</td>\n",
" <td>0.000000</td>\n",
" <td>29.0</td>\n",
" <td>11.636691</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>6085506401</td>\n",
" <td>72115600208</td>\n",
" <td>6085506302</td>\n",
" <td>6085506202</td>\n",
" <td>14.791729</td>\n",
" <td>1.798499</td>\n",
" <td>0.025061</td>\n",
" <td>0.077240</td>\n",
" <td>0.432782</td>\n",
" <td>1.263416</td>\n",
" <td>12.0</td>\n",
" <td>0.912078</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" chosen full_tract_id place_id prior_tract next_tract \\\n",
"0 1 6085506501 72115600208 6085506302 6085506202 \n",
"1 0 6085506103 72115600208 6085506302 6085506202 \n",
"2 0 6085506202 72115600208 6085506302 6085506202 \n",
"3 0 6085509901 72115600208 6085506302 6085506202 \n",
"4 0 6085506401 72115600208 6085506302 6085506202 \n",
"\n",
" drive_shopping_attraction non_residential_sqft trad_sqft arterial_sqft \\\n",
"0 4.699926 0.259702 0.000000 0.071144 \n",
"1 4.605324 0.549149 0.018630 0.170145 \n",
"2 23.645701 0.564751 0.000000 0.088703 \n",
"3 15.279850 0.154433 0.000000 0.001379 \n",
"4 14.791729 1.798499 0.025061 0.077240 \n",
"\n",
" shopcenter_sqft downtown_sqft travel_time distance \n",
"0 0.126361 0.062197 4.0 0.181507 \n",
"1 0.257974 0.102400 10.0 2.893457 \n",
"2 0.394847 0.081201 10.0 1.483285 \n",
"3 0.153054 0.000000 29.0 11.636691 \n",
"4 0.432782 1.263416 12.0 0.912078 "
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mct = pd.DataFrame({'place_id': full_obs, 'full_tract_id': full_alts, 'chosen': chosen})\n",
"mct = mct.sort_values(by=['place_id', 'chosen'], ascending=False).reset_index(drop=True)\n",
"\n",
"# Add choice scenario attributes\n",
"obs_to_merge = observations.reset_index().drop(labels='full_tract_id', axis='columns')\n",
"mct = pd.merge(mct, obs_to_merge, how='left', on='place_id')\n",
"\n",
"# Add attributes of the alternatives\n",
"alts_to_merge = tracts[_alt_cols].reset_index()\n",
"mct = pd.merge(mct, alts_to_merge, how='left', on='full_tract_id')\n",
"\n",
"# Add interaction attributes\n",
"ints_to_merge = distances.reset_index()[['prior_tract', 'full_tract_id', 'travel_time', 'distance']]\n",
"mct = pd.merge(mct, ints_to_merge, how='left', on=['prior_tract', 'full_tract_id'])\n",
"\n",
"print(len(mct))\n",
"mct.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment