Created
October 5, 2019 18:51
-
-
Save jjsantos01/ae4c959bab99afe7f26426d9247e7a57 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Imports, funciones y globales" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"pandas 0.24.2\n", | |
"geopandas 0.5.0\n", | |
"seaborn 0.9.0\n" | |
] | |
} | |
], | |
"source": [ | |
"import seaborn as sns\n", | |
"import pandas as pd\n", | |
"import geopandas as gpd\n", | |
"import matplotlib.pyplot as plt\n", | |
"from bokeh.palettes import viridis\n", | |
"\n", | |
"print(pd.__name__, pd.__version__)\n", | |
"print(gpd.__name__, gpd.__version__)\n", | |
"print(sns.__name__, sns.__version__)\n", | |
"\n", | |
"%matplotlib inline\n", | |
"plt.style.use('fivethirtyeight')\n", | |
"\n", | |
"dir_datos_uber = 'D:/datos/UBER'\n", | |
"idx = pd.IndexSlice\n", | |
"def linea_division_meses(ax, no_semana, no_dia):\n", | |
" semanas = ax.yaxis.get_data_interval().max()\n", | |
" ax.axhline(y=no_semana-1, xmin=(7-no_dia)/7, xmax=1, c='k', ls='--')\n", | |
" ax.axhline(y=no_semana, xmin=0, xmax=(no_dia-1)/7, c='k', ls='--')\n", | |
" ax.axvline(x=no_dia-1, ymin=(no_semana-1)/semanas, ymax=(no_semana)/semanas, c='k', ls='--')\n", | |
" return ax\n", | |
"\n", | |
"puntos = {\"Aeropuerto\": 4946,\n", | |
" \"Santa Fe\": 2922,\n", | |
" \"Bellas Artes\": 4743,\n", | |
" \"Central del norte\": 3220,\n", | |
" \"Taxqueña\": 2806,\n", | |
" \"Indios verdes\": 3126,\n", | |
" \"Pantitlán\": 4962,\n", | |
" \"El Rosario\": 2674,\n", | |
" \"Tlahuac\": 4113,\n", | |
" \"Ciudad Azteca\": 982,\n", | |
" \"Cuatro caminos\": 1384,\n", | |
" \"Tacubaya\": 4917,\n", | |
" \"Barranca del muerto\": 3959,\n", | |
" \"Metro Constitucion de 1917\": 3509,\n", | |
" \"Metro universidad\": 2766,\n", | |
" \"Metro La Paz\": 1842,\n", | |
" \"Metro Martín Carrera\": 3037,\n", | |
" \"Metro Politécnico\": 3086}\n", | |
"\n", | |
"ageb_to_nombre = {v:k for k, v in puntos.items()}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Datos" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dir_uber_diario = f'{dir_datos_uber}/diario_aeropuerto'\n", | |
"df = pd.read_csv(f'{dir_uber_diario}/uber_diario_puntos_ciudad_20190101_20190331.csv', dtype={'dstId': str}).query('periodo==\"ALL_DAY\"')\n", | |
"agebs = gpd.read_file(f'{dir_datos_uber}/mexico_city_agebs.json')\n", | |
"agebs.set_index('MOVEMENT_ID', inplace=True)\n", | |
"colonias = gpd.read_file('datos/coloniascdmx.geojson')\\\n", | |
" .loc[lambda x: x['geometry'].notnull()]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Unnamed: 0</th>\n", | |
" <th>max</th>\n", | |
" <th>meanTravelTimeSec</th>\n", | |
" <th>min</th>\n", | |
" <th>periodo</th>\n", | |
" <th>tiempo</th>\n", | |
" <th>n_semana</th>\n", | |
" <th>n_dia_sem</th>\n", | |
" <th>dia</th>\n", | |
" <th>zona</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>fecha</th>\n", | |
" <th>sourceid</th>\n", | |
" <th>dstId</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">2019-01-01</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">982</th>\n", | |
" <th>100</th>\n", | |
" <td>17</td>\n", | |
" <td>2021.0</td>\n", | |
" <td>1495.0</td>\n", | |
" <td>1105.0</td>\n", | |
" <td>ALL_DAY</td>\n", | |
" <td>24.92</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Martes</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1001</th>\n", | |
" <td>261</td>\n", | |
" <td>935.0</td>\n", | |
" <td>582.0</td>\n", | |
" <td>361.0</td>\n", | |
" <td>ALL_DAY</td>\n", | |
" <td>9.70</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Martes</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1002</th>\n", | |
" <td>262</td>\n", | |
" <td>1343.0</td>\n", | |
" <td>963.0</td>\n", | |
" <td>690.0</td>\n", | |
" <td>ALL_DAY</td>\n", | |
" <td>16.05</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Martes</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1003</th>\n", | |
" <td>263</td>\n", | |
" <td>1287.0</td>\n", | |
" <td>830.0</td>\n", | |
" <td>535.0</td>\n", | |
" <td>ALL_DAY</td>\n", | |
" <td>13.83</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Martes</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1004</th>\n", | |
" <td>264</td>\n", | |
" <td>1369.0</td>\n", | |
" <td>945.0</td>\n", | |
" <td>652.0</td>\n", | |
" <td>ALL_DAY</td>\n", | |
" <td>15.75</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Martes</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Unnamed: 0 max meanTravelTimeSec min \\\n", | |
"fecha sourceid dstId \n", | |
"2019-01-01 982 100 17 2021.0 1495.0 1105.0 \n", | |
" 1001 261 935.0 582.0 361.0 \n", | |
" 1002 262 1343.0 963.0 690.0 \n", | |
" 1003 263 1287.0 830.0 535.0 \n", | |
" 1004 264 1369.0 945.0 652.0 \n", | |
"\n", | |
" periodo tiempo n_semana n_dia_sem dia zona \n", | |
"fecha sourceid dstId \n", | |
"2019-01-01 982 100 ALL_DAY 24.92 1 1 Martes NaN \n", | |
" 1001 ALL_DAY 9.70 1 1 Martes NaN \n", | |
" 1002 ALL_DAY 16.05 1 1 Martes NaN \n", | |
" 1003 ALL_DAY 13.83 1 1 Martes NaN \n", | |
" 1004 ALL_DAY 15.75 1 1 Martes NaN " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['fecha'] = pd.to_datetime(df['fecha'], format='%Y-%m-%d')\n", | |
"df['tiempo'] = df['meanTravelTimeSec'].div(60).round(2)\n", | |
"df['n_semana'] = df['fecha'].dt.weekofyear\n", | |
"df['n_dia_sem'] = df['fecha'].dt.dayofweek\n", | |
"df['dia'] = df['fecha'].dt.day_name(locale='Spanish')\n", | |
"df.sort_values(['fecha', 'sourceid', 'dstId'], inplace=True)\n", | |
"df.set_index(['fecha', 'sourceid', 'dstId'], inplace=True)\n", | |
"\n", | |
"# Límites de las zonas\n", | |
"N0, N1 = 19.45, 19.6\n", | |
"S0, S1 = 19.15, 19.28\n", | |
"P0, P1 = -99.3, -99.2\n", | |
"O0, O1= -99.1, -98.95\n", | |
"\n", | |
"# Crea DF para cada zona\n", | |
"norte = agebs.cx[P1:O0, N0:N1]\n", | |
"sur = agebs.cx[P1:O0, S0:S1]\n", | |
"poniente = agebs.cx[P0:P1, S1:N0]\n", | |
"oriente = agebs.cx[O0:O1, S1:N0]\n", | |
"centro = agebs.cx[P1:O0, S1:N0]\n", | |
"\n", | |
"# Crea variable de la zona\n", | |
"df.loc[idx[:, :, norte.index], 'zona'] = 'Norte'\n", | |
"df.loc[idx[:, :, sur.index], 'zona'] = 'Sur'\n", | |
"df.loc[idx[:, :, poniente.index], 'zona'] = 'Poniente'\n", | |
"df.loc[idx[:, :, oriente.index], 'zona'] = 'Oriente'\n", | |
"df.loc[idx[:, :, centro.index], 'zona'] = 'Centro'\n", | |
"\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Gráficas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dicc_dias = {0: 'Lu', 1: 'Ma', 2: \"Mi\", 3: \"Ju\", 4: \"Vi\", 5:\"Sa\" , 6: \"Do\"}\n", | |
"\n", | |
"def crea_grafica_evolucion(ageb_origen):\n", | |
" fig = plt.figure()\n", | |
" gs = plt.GridSpec(20, 30)\n", | |
"\n", | |
" ax1 = fig.add_subplot(gs[0:9, 0:9])\n", | |
" ax2 = fig.add_subplot(gs[11:20, 0:9])\n", | |
" ax3 = fig.add_subplot(gs[0:20, 10:20])\n", | |
" ax4 = fig.add_subplot(gs[0:9, 20:30])\n", | |
" ax5 = fig.add_subplot(gs[11:20, 20:30])\n", | |
"\n", | |
" fig.subplots_adjust(wspace=110)\n", | |
" fig.set_size_inches(20, 10)\n", | |
" df_origen = df.xs(ageb_origen, axis=0, level='sourceid')\n", | |
"\n", | |
" # Diario\n", | |
" agrup_fecha = df_origen.groupby('fecha')[['tiempo']].mean()\n", | |
" agrup_fecha.unstack()['tiempo'].plot(ax=ax1)\n", | |
" agrup_fecha.rolling(7).mean().plot(ax=ax1)\n", | |
" ax1.legend(['Diario', 'promedio semanal'])\n", | |
" ax1.set_ylabel('minutos')\n", | |
" ax1.set_ylim(0, 40)\n", | |
" ax1.set_title('Tiempo promedio a todos los destinos', fontsize=14)\n", | |
" ax1.set_xlabel('')\n", | |
"\n", | |
" # Dia de la semana \n", | |
" agrup_dia = df_origen.groupby(['n_dia_sem', 'dia'])[['tiempo']].mean()\n", | |
" agrup_dia.reset_index('n_dia_sem').plot.bar(y='tiempo', ax=ax2, legend=False)\n", | |
" ax2.set_ylim(0, 40)\n", | |
" ax2.set_ylabel('minutos')\n", | |
" ax2.set_xlabel('')\n", | |
" ax2.set_title('Tiempo promedio por día de la semana', fontsize=14)\n", | |
"\n", | |
" # Calendario\n", | |
" heat_data = df_origen.groupby(['n_semana', 'n_dia_sem'])[['tiempo']].mean().unstack('n_semana')['tiempo'].T\\\n", | |
" .rename(columns=dicc_dias)\n", | |
" ax = sns.heatmap(heat_data, linewidths=.5, cmap='viridis_r', vmin=20, vmax=35, ax=ax3)\n", | |
" ax.set_yticklabels(ax.get_yticklabels(), rotation=0)\n", | |
" ax = linea_division_meses(ax, no_semana=5, no_dia=4)\n", | |
" ax = linea_division_meses(ax, no_semana=9, no_dia=4)\n", | |
" ax.set_title('Tiempo promedio a todos los destinos (minutos)', fontsize=14, y=1.03)\n", | |
" ax.xaxis.set_ticks_position('top')\n", | |
" ax.set_ylabel('')\n", | |
" ax.set_xlabel('')\n", | |
" ax_twin = ax.twinx()\n", | |
" ax_twin.yaxis.grid(False)\n", | |
" ax_twin.set_ylim(0, 13)\n", | |
" ax_twin.set_yticks([2, 6.5, 11])\n", | |
" ax_twin.set_yticklabels(reversed(['Ene', 'Feb', 'Mar']))\n", | |
" ax_twin.yaxis.set_ticks_position('left')\n", | |
" ax_twin.yaxis.set_label_position('left')\n", | |
" ax_twin.spines['left'].set_position(('outward', 25))\n", | |
"\n", | |
" # Tiempo diario por zona\n", | |
" df_origen.groupby(['fecha', 'zona'])['tiempo'].mean().unstack('zona').rolling(7).mean().plot(ax=ax4)\n", | |
" ax4.set_ylim(0, 60)\n", | |
" ax4.set_title('Tiempo promedio, por zona de destino', fontsize=14)\n", | |
" ax4.set_xlabel('')\n", | |
" ax4.legend(ncol=3, loc=(0, 0.05))\n", | |
" ax4.set_ylabel('minutos (media movil 7 días)', fontsize=12)\n", | |
"\n", | |
" # Mapa de zonas\n", | |
" colonias.boundary.plot(color='k', ax=ax5, lw=0.2)\n", | |
" norte.boundary.plot(color='C1', ax=ax5, label='Norte', lw=0.2, zorder=-1)\n", | |
" sur.boundary.plot(color='C4', ax=ax5, label='Sur', lw=0.2, zorder=-1)\n", | |
" poniente.boundary.plot(color='C3', ax=ax5, label='Poniente', lw=0.2, zorder=-1)\n", | |
" oriente.boundary.plot(color='C2', ax=ax5, label='Oriente', lw=0.2, zorder=-1)\n", | |
" centro.boundary.plot(color='C0', ax=ax5, label='Centro', lw=0.2, zorder=-1)\n", | |
" agebs.loc[[str(ageb_origen)]].plot(color='red', ax=ax5, zorder=10)\n", | |
" ax5.legend(bbox_to_anchor=(1.4, 0.7))\n", | |
" ax5.set_axis_off()\n", | |
" \n", | |
" #Titulo y guardado\n", | |
" zona_origen = ageb_to_nombre[ageb_origen]\n", | |
" fig.text(x=0, y=-0.1, s='Elaborado por @jjsantoso con datos de Uber Movements', fontdict={'size': 9}, transform=ax.transAxes)\n", | |
" fig.suptitle(f'Todos los viajes de UBER con origen en {zona_origen} (Movement ID {ageb_origen}), 2019-1T', fontsize=18)\n", | |
" figname = f'graficas/evolucion_diaria_{zona_origen}_2019T1.png'\n", | |
" fig.savefig(figname, dpi=300, bbox_inches='tight')\n", | |
" print('Guardada en:', figname)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for origen in ageb_to_nombre:\n", | |
" crea_grafica_evolucion(origen)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"hide_input": false, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment