Nhu Hoang geniusnhu

## TS Visualization
auto_cor = sales.groupby("Date")["Weekly_Sales"].sum()
auto_cor = pd.DataFrame(auto_cor)
auto_cor.columns = ["y"]

# Adding the lag of the target variable from 1 steps back up to 52 (due to a seasonality at the end of the year)
for i in range(1, 53):
    auto_cor["lag_{}".format(i)] = auto_cor.y.shift(i)

# Compute autocorrelation of the series and its lags
lag_corr = auto_cor.corr()

## Auto arima
stepwise_model = pm.auto_arima(Wal_sales.iloc[:,1].values, start_p=1, start_q=1,
                               max_p=20, max_q=20, m=52,
                               start_P=0, seasonal=True,
                               d=1, D=1, trace=True,
                               error_action='ignore',
                               suppress_warnings=True,
                               stepwise=True)
print(stepwise_model.aic())

Result

## Predict arima model
#Split train and test
train = Wal_sales.iloc[:106,1].values
test = Wal_sales.iloc[106:,1].values
# Train the model
stepwise_model.fit(train)

# Predict test set
pred = stepwise_model.predict(n_periods=37)

# Reframe the data

## residual autocorrelation
# Compute Residual
train_pred = stepwise_model.predict(n_periods=106)
r_train = train - train_pred
r_test = test - pred
residual = pd.DataFrame(np.concatenate((r_train,r_test)), columns={"y"})


# Generate lag of Residuals from 1 step to 52 steps
# Adding the lag of the target variable from 1 steps back up to 52
for i in range(1, 53):

## Train test set for Time series
# Split train and test sets in correspondence with Time series data
def ts_train_test_split(X, y, test_size):
    test_index = int(len(X)*(1-test_size))

    X_train = X.iloc[:test_index]
    y_train = y.iloc[:test_index]
    X_test = X.iloc[test_index:]
    y_test = y.iloc[test_index:]

    return X_train, X_test, y_train, y_test

## utils.py
from tqdm.notebook import tqdm
import plotly.graph_objects as go
import plotly.express as px
import gc
import pandas as pd

from tslearn.clustering import TimeSeriesKMeans
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, TimeSeriesScalerMinMax
from kneed import KneeLocator

## main.py
## Example of the main code pipeline in .py format

import sys, os
import pandas as pd
import numpy as np

from your_classes.ClassOne import load_funtion, preprocess_function, training_function, save_result_function
import your_classes.PATH

def main():

## YourFunction.py
## Example of code annotation for a function

def your_function(X):
    """ Explanation of what the function does

    Parameters
    ----------
    X: dtype
        explanation of X
    y: dtype

## data_optimize.py
def data_optimize(df, object_option=False):
    """Reduce the size of the input dataframe

    Parameters
    ----------
    df: pd.DataFrame
        input DataFrame
    object_option : bool, default=False
        if true, try to convert object to category

## list_generator.py
>>> import sys
>>> my_generator_list = (i for i in range(100000))
>>> print(f"My generator is {sys.getsizeof(my_generator_list)} bytes")
My generator is 128 bytes

>>> timeit(my_generator_list)
10000000 loops, best of 5: 32 ns per loop

>>> my_list = [i for i in range(100000)]
>>> print(f"My list is {sys.getsizeof(my_list)} bytes")
	auto_cor = sales.groupby("Date")["Weekly_Sales"].sum()
	auto_cor = pd.DataFrame(auto_cor)
	auto_cor.columns = ["y"]

	# Adding the lag of the target variable from 1 steps back up to 52 (due to a seasonality at the end of the year)
	for i in range(1, 53):
	auto_cor["lag_{}".format(i)] = auto_cor.y.shift(i)

	# Compute autocorrelation of the series and its lags
	lag_corr = auto_cor.corr()
	stepwise_model = pm.auto_arima(Wal_sales.iloc[:,1].values, start_p=1, start_q=1,
	max_p=20, max_q=20, m=52,
	start_P=0, seasonal=True,
	d=1, D=1, trace=True,
	error_action='ignore',
	suppress_warnings=True,
	stepwise=True)
	print(stepwise_model.aic())

	Result
	#Split train and test
	train = Wal_sales.iloc[:106,1].values
	test = Wal_sales.iloc[106:,1].values
	# Train the model
	stepwise_model.fit(train)

	# Predict test set
	pred = stepwise_model.predict(n_periods=37)

	# Reframe the data
	# Compute Residual
	train_pred = stepwise_model.predict(n_periods=106)
	r_train = train - train_pred
	r_test = test - pred
	residual = pd.DataFrame(np.concatenate((r_train,r_test)), columns={"y"})


	# Generate lag of Residuals from 1 step to 52 steps
	# Adding the lag of the target variable from 1 steps back up to 52
	for i in range(1, 53):
	# Split train and test sets in correspondence with Time series data
	def ts_train_test_split(X, y, test_size):
	test_index = int(len(X)*(1-test_size))

	X_train = X.iloc[:test_index]
	y_train = y.iloc[:test_index]
	X_test = X.iloc[test_index:]
	y_test = y.iloc[test_index:]

	return X_train, X_test, y_train, y_test
	from tqdm.notebook import tqdm
	import plotly.graph_objects as go
	import plotly.express as px
	import gc
	import pandas as pd

	from tslearn.clustering import TimeSeriesKMeans
	from tslearn.preprocessing import TimeSeriesScalerMeanVariance, TimeSeriesScalerMinMax
	from kneed import KneeLocator
	## Example of the main code pipeline in .py format

	import sys, os
	import pandas as pd
	import numpy as np

	from your_classes.ClassOne import load_funtion, preprocess_function, training_function, save_result_function
	import your_classes.PATH

	def main():
	## Example of code annotation for a function

	def your_function(X):
	""" Explanation of what the function does

	Parameters
	----------
	X: dtype
	explanation of X
	y: dtype
	def data_optimize(df, object_option=False):
	"""Reduce the size of the input dataframe

	Parameters
	----------
	df: pd.DataFrame
	input DataFrame
	object_option : bool, default=False
	if true, try to convert object to category
	>>> import sys
	>>> my_generator_list = (i for i in range(100000))
	>>> print(f"My generator is {sys.getsizeof(my_generator_list)} bytes")
	My generator is 128 bytes

	>>> timeit(my_generator_list)
	10000000 loops, best of 5: 32 ns per loop

	>>> my_list = [i for i in range(100000)]
	>>> print(f"My list is {sys.getsizeof(my_list)} bytes")