Skip to content

Instantly share code, notes, and snippets.

View geniusnhu's full-sized avatar

Nhu Hoang geniusnhu

View GitHub Profile
@geniusnhu
geniusnhu / TS Visualization
Created March 5, 2020 09:09
Time Series plot
auto_cor = sales.groupby("Date")["Weekly_Sales"].sum()
auto_cor = pd.DataFrame(auto_cor)
auto_cor.columns = ["y"]
# Adding the lag of the target variable from 1 steps back up to 52 (due to a seasonality at the end of the year)
for i in range(1, 53):
auto_cor["lag_{}".format(i)] = auto_cor.y.shift(i)
# Compute autocorrelation of the series and its lags
lag_corr = auto_cor.corr()
@geniusnhu
geniusnhu / Auto arima
Created March 5, 2020 09:20
Auto arima for time series
stepwise_model = pm.auto_arima(Wal_sales.iloc[:,1].values, start_p=1, start_q=1,
max_p=20, max_q=20, m=52,
start_P=0, seasonal=True,
d=1, D=1, trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(stepwise_model.aic())
Result
#Split train and test
train = Wal_sales.iloc[:106,1].values
test = Wal_sales.iloc[106:,1].values
# Train the model
stepwise_model.fit(train)
# Predict test set
pred = stepwise_model.predict(n_periods=37)
# Reframe the data
@geniusnhu
geniusnhu / residual autocorrelation
Last active March 5, 2020 09:53
residual autocorrelation
# Compute Residual
train_pred = stepwise_model.predict(n_periods=106)
r_train = train - train_pred
r_test = test - pred
residual = pd.DataFrame(np.concatenate((r_train,r_test)), columns={"y"})
# Generate lag of Residuals from 1 step to 52 steps
# Adding the lag of the target variable from 1 steps back up to 52
for i in range(1, 53):
@geniusnhu
geniusnhu / Train test set for Time series
Created March 5, 2020 09:29
Train test set for Time series
# Split train and test sets in correspondence with Time series data
def ts_train_test_split(X, y, test_size):
test_index = int(len(X)*(1-test_size))
X_train = X.iloc[:test_index]
y_train = y.iloc[:test_index]
X_test = X.iloc[test_index:]
y_test = y.iloc[test_index:]
return X_train, X_test, y_train, y_test
from tqdm.notebook import tqdm
import plotly.graph_objects as go
import plotly.express as px
import gc
import pandas as pd
from tslearn.clustering import TimeSeriesKMeans
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, TimeSeriesScalerMinMax
from kneed import KneeLocator
@geniusnhu
geniusnhu / main.py
Last active August 23, 2021 01:09
Main code pipeline example
## Example of the main code pipeline in .py format
import sys, os
import pandas as pd
import numpy as np
from your_classes.ClassOne import load_funtion, preprocess_function, training_function, save_result_function
import your_classes.PATH
def main():
@geniusnhu
geniusnhu / YourFunction.py
Created August 23, 2021 01:12
Example of code annotation for a function and class
## Example of code annotation for a function
def your_function(X):
""" Explanation of what the function does
Parameters
----------
X: dtype
explanation of X
y: dtype
@geniusnhu
geniusnhu / data_optimize.py
Created August 28, 2021 12:05
Optimize dtype function in Python
def data_optimize(df, object_option=False):
"""Reduce the size of the input dataframe
Parameters
----------
df: pd.DataFrame
input DataFrame
object_option : bool, default=False
if true, try to convert object to category
@geniusnhu
geniusnhu / list_generator.py
Created August 29, 2021 02:22
List vs Generator
>>> import sys
>>> my_generator_list = (i for i in range(100000))
>>> print(f"My generator is {sys.getsizeof(my_generator_list)} bytes")
My generator is 128 bytes
>>> timeit(my_generator_list)
10000000 loops, best of 5: 32 ns per loop
>>> my_list = [i for i in range(100000)]
>>> print(f"My list is {sys.getsizeof(my_list)} bytes")