Skip to content

Instantly share code, notes, and snippets.

@yjucho1
Last active May 2, 2019 04:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yjucho1/fa517213628e0f8fcbf10a96cbe01141 to your computer and use it in GitHub Desktop.
Save yjucho1/fa517213628e0f8fcbf10a96cbe01141 to your computer and use it in GitHub Desktop.
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.statespace.sarimax import SARIMAX
import time
# one-step sarima forecast
def sarima_forecast(data, config):
order, sorder, trend = config
model = SARIMAX(data, order=order, seasonal_order=sorder,
trend=trend, enforce_stationarity=False, enforce_invertibility=False)
model_fit = model.fit(disp=False)
aic = model_fit.aic
return aic
# score a model, return None on failure
def score_model(data, cfg, debug=False):
result = None
key = str(cfg)
if debug:
result = sarima_forecast(data, cfg)
else:
try:
with catch_warnings():
filterwarnings("ignore")
result = sarima_forecast(data, cfg)
except:
error = None
if result is not None:
print(' > Model[%s] %.3f' % (key, result))
return (key, result)
# grid search configs
def grid_search(data, cfg_list, parallel=True):
scores = None
if parallel:
executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
tasks = (delayed(score_model)(data, cfg) for cfg in cfg_list)
scores = executor(tasks)
else:
scores = [score_model(data, cfg) for cfg in cfg_list]
scores = [r for r in scores if r[1] != None]
scores.sort(key=lambda tup: tup[1])
return scores
# create a set of sarima configs to try
def sarima_configs(seasonal=[24]):
models = list()
# define config lists
p_params = [0, 1, 2]
d_params = [0, 1]
q_params = [0, 1, 2]
t_params = ['n','c','t','ct']
P_params = [0, 1, 2]
D_params = [0, 1]
Q_params = [0, 1, 2]
m_params = seasonal
# create config instances
for p in p_params:
for d in d_params:
for q in q_params:
for t in t_params:
for P in P_params:
for D in D_params:
for Q in Q_params:
for m in m_params:
cfg = [(p,d,q), (P,D,Q,m), t]
models.append(cfg)
return models
if __name__ == '__main__':
### grid search for model's order selection
# it takes long time.
data = list(df.pm25Value)
cfg_list = sarima_configs()
print('There are ' + str(len(cfg_list)) + ' alternatives.')
scores = grid_search(data, cfg_list)
print('done')
for cfg, error in scores[:3]:
print(cfg, error)
pd.to_pickle(scores, 'score.pkl')
## best model : SARIMA((2, 0, 2), (1, 1, 2, 24), 'n')
start = time.time()
order, sorder, trend = (2, 0, 2), (1, 1, 2, 24), 'n'
model = SARIMAX(df.pm25Value, order=order, seasonal_order=sorder,
trend=trend, enforce_stationarity=False, enforce_invertibility=False)
model_fit = model.fit(disp=False)
print(time.time()-start) ### 212.13128 (it takes some time. i.e. above 200 seconds.)
resid = model_fit.resid
## residual plot
plt.figure(figsize=(12,4))
plt.plot(resid)
plt.show()
## residual - acf, pacf
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid, lags=100, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=100, ax=ax2)
plt.show()
## residual - qq plot
sm.graphics.qqplot(resid, line='q',fit=True)
plt.show()
## residual - nomality test
print(stats.normaltest(resid))
## forcast
# AR(1) model's RMSE: 3.006486217286414
# AR(3) model's RMSE: 2.9717700323027256
# AR(8) model's RMSE: 2.9174007777104203
def mean_forecast_err(y, yhat):
return np.sqrt(y.sub(yhat)**2).mean()
predict_pm25 = model_fit.predict(2000, 2200, dynamic=False)
print('RMSE : ' + str(mean_forecast_err(df.pm25Value, predict_pm25)))
predict_dyT = model_fit.predict(2000, 2200, dynamic=True)
predict_dyF = model_fit.predict(2000, 2200, dynamic=False)
plt.figure(figsize=(12,4))
plt.plot(df.pm25Value)
plt.plot(predict_dyT)
plt.plot(predict_dyF)
plt.legend(['PM2.5(true)', 'forcast_dynamic_True', 'forcast_dynamic_False'])
plt.show()
@yjucho1
Copy link
Author

yjucho1 commented May 2, 2019

haha

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment