Skip to content

Instantly share code, notes, and snippets.

@jjsantos01
Created October 5, 2019 18:51
Show Gist options
  • Save jjsantos01/ae4c959bab99afe7f26426d9247e7a57 to your computer and use it in GitHub Desktop.
Save jjsantos01/ae4c959bab99afe7f26426d9247e7a57 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports, funciones y globales"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pandas 0.24.2\n",
"geopandas 0.5.0\n",
"seaborn 0.9.0\n"
]
}
],
"source": [
"import seaborn as sns\n",
"import pandas as pd\n",
"import geopandas as gpd\n",
"import matplotlib.pyplot as plt\n",
"from bokeh.palettes import viridis\n",
"\n",
"print(pd.__name__, pd.__version__)\n",
"print(gpd.__name__, gpd.__version__)\n",
"print(sns.__name__, sns.__version__)\n",
"\n",
"%matplotlib inline\n",
"plt.style.use('fivethirtyeight')\n",
"\n",
"dir_datos_uber = 'D:/datos/UBER'\n",
"idx = pd.IndexSlice\n",
"def linea_division_meses(ax, no_semana, no_dia):\n",
" semanas = ax.yaxis.get_data_interval().max()\n",
" ax.axhline(y=no_semana-1, xmin=(7-no_dia)/7, xmax=1, c='k', ls='--')\n",
" ax.axhline(y=no_semana, xmin=0, xmax=(no_dia-1)/7, c='k', ls='--')\n",
" ax.axvline(x=no_dia-1, ymin=(no_semana-1)/semanas, ymax=(no_semana)/semanas, c='k', ls='--')\n",
" return ax\n",
"\n",
"puntos = {\"Aeropuerto\": 4946,\n",
" \"Santa Fe\": 2922,\n",
" \"Bellas Artes\": 4743,\n",
" \"Central del norte\": 3220,\n",
" \"Taxqueña\": 2806,\n",
" \"Indios verdes\": 3126,\n",
" \"Pantitlán\": 4962,\n",
" \"El Rosario\": 2674,\n",
" \"Tlahuac\": 4113,\n",
" \"Ciudad Azteca\": 982,\n",
" \"Cuatro caminos\": 1384,\n",
" \"Tacubaya\": 4917,\n",
" \"Barranca del muerto\": 3959,\n",
" \"Metro Constitucion de 1917\": 3509,\n",
" \"Metro universidad\": 2766,\n",
" \"Metro La Paz\": 1842,\n",
" \"Metro Martín Carrera\": 3037,\n",
" \"Metro Politécnico\": 3086}\n",
"\n",
"ageb_to_nombre = {v:k for k, v in puntos.items()}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Datos"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dir_uber_diario = f'{dir_datos_uber}/diario_aeropuerto'\n",
"df = pd.read_csv(f'{dir_uber_diario}/uber_diario_puntos_ciudad_20190101_20190331.csv', dtype={'dstId': str}).query('periodo==\"ALL_DAY\"')\n",
"agebs = gpd.read_file(f'{dir_datos_uber}/mexico_city_agebs.json')\n",
"agebs.set_index('MOVEMENT_ID', inplace=True)\n",
"colonias = gpd.read_file('datos/coloniascdmx.geojson')\\\n",
" .loc[lambda x: x['geometry'].notnull()]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>max</th>\n",
" <th>meanTravelTimeSec</th>\n",
" <th>min</th>\n",
" <th>periodo</th>\n",
" <th>tiempo</th>\n",
" <th>n_semana</th>\n",
" <th>n_dia_sem</th>\n",
" <th>dia</th>\n",
" <th>zona</th>\n",
" </tr>\n",
" <tr>\n",
" <th>fecha</th>\n",
" <th>sourceid</th>\n",
" <th>dstId</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">2019-01-01</th>\n",
" <th rowspan=\"5\" valign=\"top\">982</th>\n",
" <th>100</th>\n",
" <td>17</td>\n",
" <td>2021.0</td>\n",
" <td>1495.0</td>\n",
" <td>1105.0</td>\n",
" <td>ALL_DAY</td>\n",
" <td>24.92</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Martes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1001</th>\n",
" <td>261</td>\n",
" <td>935.0</td>\n",
" <td>582.0</td>\n",
" <td>361.0</td>\n",
" <td>ALL_DAY</td>\n",
" <td>9.70</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Martes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1002</th>\n",
" <td>262</td>\n",
" <td>1343.0</td>\n",
" <td>963.0</td>\n",
" <td>690.0</td>\n",
" <td>ALL_DAY</td>\n",
" <td>16.05</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Martes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1003</th>\n",
" <td>263</td>\n",
" <td>1287.0</td>\n",
" <td>830.0</td>\n",
" <td>535.0</td>\n",
" <td>ALL_DAY</td>\n",
" <td>13.83</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Martes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1004</th>\n",
" <td>264</td>\n",
" <td>1369.0</td>\n",
" <td>945.0</td>\n",
" <td>652.0</td>\n",
" <td>ALL_DAY</td>\n",
" <td>15.75</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Martes</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 max meanTravelTimeSec min \\\n",
"fecha sourceid dstId \n",
"2019-01-01 982 100 17 2021.0 1495.0 1105.0 \n",
" 1001 261 935.0 582.0 361.0 \n",
" 1002 262 1343.0 963.0 690.0 \n",
" 1003 263 1287.0 830.0 535.0 \n",
" 1004 264 1369.0 945.0 652.0 \n",
"\n",
" periodo tiempo n_semana n_dia_sem dia zona \n",
"fecha sourceid dstId \n",
"2019-01-01 982 100 ALL_DAY 24.92 1 1 Martes NaN \n",
" 1001 ALL_DAY 9.70 1 1 Martes NaN \n",
" 1002 ALL_DAY 16.05 1 1 Martes NaN \n",
" 1003 ALL_DAY 13.83 1 1 Martes NaN \n",
" 1004 ALL_DAY 15.75 1 1 Martes NaN "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['fecha'] = pd.to_datetime(df['fecha'], format='%Y-%m-%d')\n",
"df['tiempo'] = df['meanTravelTimeSec'].div(60).round(2)\n",
"df['n_semana'] = df['fecha'].dt.weekofyear\n",
"df['n_dia_sem'] = df['fecha'].dt.dayofweek\n",
"df['dia'] = df['fecha'].dt.day_name(locale='Spanish')\n",
"df.sort_values(['fecha', 'sourceid', 'dstId'], inplace=True)\n",
"df.set_index(['fecha', 'sourceid', 'dstId'], inplace=True)\n",
"\n",
"# Límites de las zonas\n",
"N0, N1 = 19.45, 19.6\n",
"S0, S1 = 19.15, 19.28\n",
"P0, P1 = -99.3, -99.2\n",
"O0, O1= -99.1, -98.95\n",
"\n",
"# Crea DF para cada zona\n",
"norte = agebs.cx[P1:O0, N0:N1]\n",
"sur = agebs.cx[P1:O0, S0:S1]\n",
"poniente = agebs.cx[P0:P1, S1:N0]\n",
"oriente = agebs.cx[O0:O1, S1:N0]\n",
"centro = agebs.cx[P1:O0, S1:N0]\n",
"\n",
"# Crea variable de la zona\n",
"df.loc[idx[:, :, norte.index], 'zona'] = 'Norte'\n",
"df.loc[idx[:, :, sur.index], 'zona'] = 'Sur'\n",
"df.loc[idx[:, :, poniente.index], 'zona'] = 'Poniente'\n",
"df.loc[idx[:, :, oriente.index], 'zona'] = 'Oriente'\n",
"df.loc[idx[:, :, centro.index], 'zona'] = 'Centro'\n",
"\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Gráficas"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"dicc_dias = {0: 'Lu', 1: 'Ma', 2: \"Mi\", 3: \"Ju\", 4: \"Vi\", 5:\"Sa\" , 6: \"Do\"}\n",
"\n",
"def crea_grafica_evolucion(ageb_origen):\n",
" fig = plt.figure()\n",
" gs = plt.GridSpec(20, 30)\n",
"\n",
" ax1 = fig.add_subplot(gs[0:9, 0:9])\n",
" ax2 = fig.add_subplot(gs[11:20, 0:9])\n",
" ax3 = fig.add_subplot(gs[0:20, 10:20])\n",
" ax4 = fig.add_subplot(gs[0:9, 20:30])\n",
" ax5 = fig.add_subplot(gs[11:20, 20:30])\n",
"\n",
" fig.subplots_adjust(wspace=110)\n",
" fig.set_size_inches(20, 10)\n",
" df_origen = df.xs(ageb_origen, axis=0, level='sourceid')\n",
"\n",
" # Diario\n",
" agrup_fecha = df_origen.groupby('fecha')[['tiempo']].mean()\n",
" agrup_fecha.unstack()['tiempo'].plot(ax=ax1)\n",
" agrup_fecha.rolling(7).mean().plot(ax=ax1)\n",
" ax1.legend(['Diario', 'promedio semanal'])\n",
" ax1.set_ylabel('minutos')\n",
" ax1.set_ylim(0, 40)\n",
" ax1.set_title('Tiempo promedio a todos los destinos', fontsize=14)\n",
" ax1.set_xlabel('')\n",
"\n",
" # Dia de la semana \n",
" agrup_dia = df_origen.groupby(['n_dia_sem', 'dia'])[['tiempo']].mean()\n",
" agrup_dia.reset_index('n_dia_sem').plot.bar(y='tiempo', ax=ax2, legend=False)\n",
" ax2.set_ylim(0, 40)\n",
" ax2.set_ylabel('minutos')\n",
" ax2.set_xlabel('')\n",
" ax2.set_title('Tiempo promedio por día de la semana', fontsize=14)\n",
"\n",
" # Calendario\n",
" heat_data = df_origen.groupby(['n_semana', 'n_dia_sem'])[['tiempo']].mean().unstack('n_semana')['tiempo'].T\\\n",
" .rename(columns=dicc_dias)\n",
" ax = sns.heatmap(heat_data, linewidths=.5, cmap='viridis_r', vmin=20, vmax=35, ax=ax3)\n",
" ax.set_yticklabels(ax.get_yticklabels(), rotation=0)\n",
" ax = linea_division_meses(ax, no_semana=5, no_dia=4)\n",
" ax = linea_division_meses(ax, no_semana=9, no_dia=4)\n",
" ax.set_title('Tiempo promedio a todos los destinos (minutos)', fontsize=14, y=1.03)\n",
" ax.xaxis.set_ticks_position('top')\n",
" ax.set_ylabel('')\n",
" ax.set_xlabel('')\n",
" ax_twin = ax.twinx()\n",
" ax_twin.yaxis.grid(False)\n",
" ax_twin.set_ylim(0, 13)\n",
" ax_twin.set_yticks([2, 6.5, 11])\n",
" ax_twin.set_yticklabels(reversed(['Ene', 'Feb', 'Mar']))\n",
" ax_twin.yaxis.set_ticks_position('left')\n",
" ax_twin.yaxis.set_label_position('left')\n",
" ax_twin.spines['left'].set_position(('outward', 25))\n",
"\n",
" # Tiempo diario por zona\n",
" df_origen.groupby(['fecha', 'zona'])['tiempo'].mean().unstack('zona').rolling(7).mean().plot(ax=ax4)\n",
" ax4.set_ylim(0, 60)\n",
" ax4.set_title('Tiempo promedio, por zona de destino', fontsize=14)\n",
" ax4.set_xlabel('')\n",
" ax4.legend(ncol=3, loc=(0, 0.05))\n",
" ax4.set_ylabel('minutos (media movil 7 días)', fontsize=12)\n",
"\n",
" # Mapa de zonas\n",
" colonias.boundary.plot(color='k', ax=ax5, lw=0.2)\n",
" norte.boundary.plot(color='C1', ax=ax5, label='Norte', lw=0.2, zorder=-1)\n",
" sur.boundary.plot(color='C4', ax=ax5, label='Sur', lw=0.2, zorder=-1)\n",
" poniente.boundary.plot(color='C3', ax=ax5, label='Poniente', lw=0.2, zorder=-1)\n",
" oriente.boundary.plot(color='C2', ax=ax5, label='Oriente', lw=0.2, zorder=-1)\n",
" centro.boundary.plot(color='C0', ax=ax5, label='Centro', lw=0.2, zorder=-1)\n",
" agebs.loc[[str(ageb_origen)]].plot(color='red', ax=ax5, zorder=10)\n",
" ax5.legend(bbox_to_anchor=(1.4, 0.7))\n",
" ax5.set_axis_off()\n",
" \n",
" #Titulo y guardado\n",
" zona_origen = ageb_to_nombre[ageb_origen]\n",
" fig.text(x=0, y=-0.1, s='Elaborado por @jjsantoso con datos de Uber Movements', fontdict={'size': 9}, transform=ax.transAxes)\n",
" fig.suptitle(f'Todos los viajes de UBER con origen en {zona_origen} (Movement ID {ageb_origen}), 2019-1T', fontsize=18)\n",
" figname = f'graficas/evolucion_diaria_{zona_origen}_2019T1.png'\n",
" fig.savefig(figname, dpi=300, bbox_inches='tight')\n",
" print('Guardada en:', figname)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for origen in ageb_to_nombre:\n",
" crea_grafica_evolucion(origen)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment