Skip to content

Instantly share code, notes, and snippets.

View slehkyi's full-sized avatar
🏠
Working from home

Serhii slehkyi

🏠
Working from home
  • Barcelona, Spain
View GitHub Profile
TOTAL = LAST_SUM
for index, value in arima_pred_full_out.items():
TOTAL += int(value)
if TOTAL >= 100000:
print(f"100K day is : {index:%Y-%m-%d}")
break
predict = pd.DataFrame(index=pd.date_range(SDATE,EDATE-timedelta(days=1),freq='d'))
y_full = data['Personnel']
# y_full = train['Personnel']
ARIMAmodel = ARIMA(y_full, order = best_order)
ARIMAmodel = ARIMAmodel.fit()
arima_pred_full = ARIMAmodel.get_forecast(len(predict.index))
arima_pred_full_df = arima_pred_full.conf_int(alpha = 0.05)
arima_pred_full_df["Predictions"] = ARIMAmodel.predict(start = arima_pred_full_df.index[0], end = arima_pred_full_df.index[-1])
# evaluate parameters
p_values = range(10,30)
d_values = range(0, 2) # the range here was found manually after few hours of plotting different values
q_values = range(1, 20)
best_order = evaluate_models(data, p_values, d_values, q_values) # takes a lot of time depending on the amount of combinations you want to try there.
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(data)
plt.show()
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(d, arima_order):
train = d[d.index <= pd.to_datetime(TRAIN_TEST_SPLIT_DATE, format='%d/%m/%Y')]
test = d[d.index > pd.to_datetime(TRAIN_TEST_SPLIT_DATE, format='%d/%m/%Y')]
y = train.iloc[0]
ARIMAmodel = ARIMA(y, order = arima_order)
ARIMAmodel = ARIMAmodel.fit()
arima_pred = ARIMAmodel.get_forecast(len(test.index))
@slehkyi
slehkyi / arma.py
Last active November 6, 2022 22:27
y = train['Personnel']
ARMAmodel = SARIMAX(y, order = (1, 0, 1))
ARMAmodel = ARMAmodel.fit()
arma_pred = ARMAmodel.get_forecast(len(test.index))
arma_pred_df = arma_pred.conf_int(alpha = 0.05)
arma_pred_df["Predictions"] = ARMAmodel.predict(start = arma_pred_df.index[0], end = arma_pred_df.index[-1])
arma_pred_df.index = test.index
arma_pred_out = arma_pred_df["Predictions"]
train = data[data.index <= pd.to_datetime(TRAIN_TEST_SPLIT_DATE, format='%d/%m/%Y')]
test = data[data.index > pd.to_datetime(TRAIN_TEST_SPLIT_DATE, format='%d/%m/%Y')]
plt.plot(train, color = "black")
plt.plot(test, color = "red")
plt.ylabel('Dohla rusnia')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.title("Train/Test split for dohla rusnia data")
plt.show()
plt.ylabel('Dohla rusnia')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.plot(data.index, data['Personnel'])
data = pd.read_csv('data/dead_rusnia.csv')
data.index = pd.to_datetime(data['date'], format='%d/%m/%Y')
del data['date']
data.tail()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import mean_squared_error
from datetime import date, timedelta
# today = date.today()
today = date(2022, 11, 6)
SDATE = today + timedelta(days=1) # start date