Skip to content

Instantly share code, notes, and snippets.

View gonzalezgouveia's full-sized avatar

Rafael Gonzalez Gouveia gonzalezgouveia

View GitHub Profile
import pandas as pd
print('hello world')
if True:
print('True!')
dia hora viajes
0 0 38
0 1 0
0 2 0
0 3 0
0 4 0
0 5 145
0 6 627.5
0 7 1757.25
0 8 3181
@gonzalezgouveia
gonzalezgouveia / ecobici_pandas.py
Last active November 26, 2018 15:58
carga de modulos y datos para analisis de ecobici
import pandas as pd
viajes = pd.read_csv('2018-10.csv')
@gonzalezgouveia
gonzalezgouveia / ecobici_data_preparation_time.py
Last active November 26, 2018 21:05
ecobici data preparation time
# concatenar Hora_Retiro y Fecha_Retiro
viajes['fecha_hora_retiro'] = viajes.Fecha_Retiro + ' ' + viajes.Hora_Retiro
# cambiar de str a datetime
viajes['fecha_hora'] = viajes.fecha_hora_retiro \
.map(lambda x : datetime.strptime(x, '%d/%m/%Y %H:%M:%S'))
# reindexar el dataframe
viajes.index = viajes.fecha_hora
@gonzalezgouveia
gonzalezgouveia / ecobici_data_aggregation.py
Last active November 26, 2018 15:27
ecobici data aggregation
# resample y agregacion por dia de mes
viajes_resample_day = viajes.Bici.resample('H').count()
# asignar día de la semana
df_resample = pd.concat([viajes_resample_day], axis=1)
df_resample['dayofweek'] = df_resample.index.dayofweek # 0 es lunes
# lunes a viernes
df_mon_to_fri = df_resample[df_resample.dayofweek.isin([0,1,2,3,4])].Bici
@gonzalezgouveia
gonzalezgouveia / ecobici_model_sarima.py
Last active November 28, 2018 07:09
ecobici_model_sarima.py
from statsmodels.tsa.statespace.sarimax import SARIMAX
# definir conjunto de datos
x = df_mon_to_fri
# instanciar modelo
sarima_model = SARIMAX(x, order=(2,0,1), seasonal_order=(2, 1, 0, 24))
# ajustar modelo
results = sarima_model.fit()
@gonzalezgouveia
gonzalezgouveia / ecobici_order_sarima.py
Last active November 2, 2018 10:09
ecobici order sarima
from statsmodels.tsa.statespace.sarimax import SARIMAX # modelo SARIMA
from itertools import product # facilita ejecicion del ciclo for
# definiendo niveles del modelo
niveles = ['p', 'd', 'q', 'sp', 'sd', 'sq', 'AIC', 'BIC','log-like']
resultados_modelos = pd.DataFrame(columns=niveles)
grados = [0,1,2]
# recorriendo todos los modelos posibles
for p, d, q, sp, sd, sq in product(grados, repeat=6):
@gonzalezgouveia
gonzalezgouveia / ecobici_forecast.py
Last active November 26, 2018 15:45
ecobici forecast
# tomar de datos originales dias 29-oct, 30-oct, y 31-oct
df_29_31 = df_mon_to_fri.loc['2018-10-29':'2018-10-31']
df_29_31.plot()
# agregar bandas de confianza
pred_1_2_conf = results.get_forecast(steps=24*2).conf_int()
pred_1_2_conf.index = pd.date_range(start='11/1/2018', end='11/3/2018', freq='H')[:-1]
x = pd.date_range(start='11/1/2018', end='11/3/2018', freq='H')[:-1]
y1 = pred_1_2_conf['lower Bici']
y2 = pred_1_2_conf['upper Bici']
library(tidyverse)
path <- "path/to/data/winemag-data-130k-v2.csv"
winedata <- read_csv(file = path, col_names = TRUE)
@gonzalezgouveia
gonzalezgouveia / wine_exploratory.R
Last active February 5, 2019 19:52
wine_exploratory.R
set.seed(12321)
ggplot(sample_n(winedata, size=10000),
aes(x=price,
y=jitter(points, factor = 3),
color=country)) +
geom_point(size=2) +
xlab('Precio') +
ylab('Puntuación') +
ggtitle('Exploración por país')
ggsave('./path/to/data/all_wine_country.png')