Skip to content

Instantly share code, notes, and snippets.

@hkawabata
Last active June 4, 2023 21:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hkawabata/fb0b6e86ba7e6bdbfbff115d73dafa57 to your computer and use it in GitHub Desktop.
Save hkawabata/fb0b6e86ba7e6bdbfbff115d73dafa57 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
def my_acorr(data, lag, is_experiment=False):
"""
自己相関係数の計算
- is_experiment: 自己共分散を計算するとき、全時系列長 n ではなく n-h で割り、一般的な共分散と同じように計算する実験
"""
data_ave = data.mean()
T = len(data)
d1 = data[:T-lag]
d2 = data[lag:]
if is_experiment:
r = ((d1-data_ave)*(d2-data_ave)).sum() / len(d1) / ((data-data_ave)**2).sum() * len(data)
else:
r = ((d1-data_ave)*(d2-data_ave)).sum() / ((data-data_ave)**2).sum()
return r
d = np.array([1,1,1,1,1,1,5]*10)
d = d + np.random.normal(0, 0.3, len(d)) # ノイズを追加する場合
result = []
for lag in range(len(d)):
r = my_acorr(d, lag)
r_exp = my_acorr(d, lag, is_experiment=True)
result.append([lag, r, r_exp])
df = pd.DataFrame(result, columns=['lag', 'acorr', 'acorr_exp'])
plt.figure(figsize=(8, 6)) # 描画範囲全体のサイズを設定(ヨコ, タテ)
plt.subplots_adjust(wspace=0.4, hspace=0.4) # グラフ間の余白の大きさ
# 元データ
plt.subplot(2, 1, 1)
plt.plot(range(len(d)), d, color='black')
plt.xlabel('time')
plt.ylabel('value')
plt.grid()
# 自己相関係数
plt.subplot(2, 1, 2)
plt.plot(df['lag'], df['acorr'], lw=2, label=r'$\rho$')
plt.plot(df['lag'], df['acorr_exp'], lw=1, label=r'$\rho^{\prime}$')
plt.xlabel('lag')
plt.ylabel('autocorrelation')
plt.grid()
plt.legend()
# 描画
plt.show()
acorr = []
acorr_exp = []
for i in range(1000):
r = []
r_exp = []
d = np.array([1,1,1,1,1,1,5]*10)
d = d + np.random.normal(0, 0.3, len(d))
for lag in range(len(d)-7):
r.append(my_acorr(d, lag))
r_exp.append(my_acorr(d, lag, is_experiment=True))
acorr.append(r)
acorr_exp.append(r_exp)
var = np.var(acorr, axis=0)
var_exp = np.var(acorr_exp, axis=0)
plt.plot(range(var.size), var, label=r'$Var(\rho)$')
plt.plot(range(var_exp.size), var_exp, label=r'$Var(\rho^{\prime})$')
plt.xlabel('lag')
plt.ylabel('variance of $R, R\'$')
plt.grid()
plt.legend()
plt.show()
# 偏自己相関係数の計算
pacf = {}
for k, vs in data.items():
# 各ラグに対する自己共分散
cov_all = []
for lag in range(len(vs)//2):
cov_all.append(np.cov(vs[:len(vs)-lag], vs[lag:])[0][1])
cov_all = np.array(cov_all)
# 自己共分散を並べた行列
cov_matrix_all = np.empty((len(cov_all), len(cov_all)))
for i in range(len(cov_all)):
for j in range(len(cov_all)):
cov_matrix_all[i][j] = cov_all[np.abs(i-j)]
# 偏自己共分散
pacf[k] = []
for lag in range(1, len(vs)//2):
cov_matrix = cov_matrix_all[:lag, :lag]
cov = cov_all[1:lag+1]
phi = np.linalg.inv(cov_matrix).dot(np.matrix(cov).T)[-1,0]
pacf[k].append(phi)
days = 5
plt.figure(figsize=(8, 10))
plt.subplots_adjust(wspace=0.4, hspace=0.4)
plt_cnt = 1
for k, vs in pacf.items():
plt.subplot(4, 1, plt_cnt)
plt.xticks(np.arange(0, 24*days + 1, 24))
plt.ylim([-1.0, 1.0])
plt.stem(range(1, 24*days+1), vs[:24*days], markerfmt='C0.')
plt.grid()
plt.title(k)
plt_cnt += 1
plt.show()
"""
おまけ:以上の実装が正しいか、statsmodels ライブラリの計算結果も確認
"""
import statsmodels.api as sm
days = 5
fig, ax = plt.subplots(4, 2, figsize=(10, 8))
plt_cnt = 0
for k, vs in data.items():
# 自己相関係数
sm.graphics.tsa.plot_acf(vs, lags=120, ax=ax[plt_cnt,0], marker='.')
ax[plt_cnt,0].set_xticks(np.arange(0, 24*days+1, 24))
ax[plt_cnt,0].grid()
ax[plt_cnt,0].set_title('Autocorrelation' if plt_cnt==0 else '')
ax[plt_cnt,0].set_ylabel(k, fontsize=12)
# 偏自己相関係数
sm.graphics.tsa.plot_pacf(vs, lags=120, ax=ax[plt_cnt,1], marker='.')
ax[plt_cnt,1].set_xticks(np.arange(0, 24*days+1, 24))
ax[plt_cnt,1].grid()
ax[plt_cnt,1].set_title('Partial Autocorrelation' if plt_cnt==0 else '')
plt_cnt += 1
plt.show()
import numpy as np
from matplotlib import pyplot as plt
from urllib import request
unit = {
'temperature': 'degree',
'rain-fall': 'mm',
'pressure': 'hPa',
'humidity': '%'
}
# データ読み込み・格納
csv_url = 'https://gist.githubusercontent.com/hkawabata/5e0e7cbbb142125643acb663b3c11559/raw/af638f0b4193a9adb4d1f3d069e389a196146daa/20220116_Yakushima_weather.csv'
response = request.urlopen(csv_url)
lines = response.read().decode().split('\n')
response.close()
t = []
data = {}
for k in unit:
data[k] = []
for line in lines[1:]:
arr = line.split(',')
t.append(arr[0])
data['temperature'].append(float(arr[1]))
data['rain-fall'].append(float(arr[4]))
data['pressure'].append(float(arr[8]))
data['humidity'].append(float(arr[11]))
# 自己共分散を計算
acf = {}
for k in unit:
acf[k] = []
for k, vs in data.items():
for lag in range(1, len(vs)//2):
acf[k].append(my_acorr(np.array(vs), lag))
# 元データの描画
days = 15
plt.figure(figsize=(8, 10))
plt.subplots_adjust(wspace=0.4, hspace=0.4)
plt_cnt = 1
for k, vs in data.items():
plt.subplot(4, 1, plt_cnt)
plt.xticks(np.arange(0, 24*days + 1, 24))
plt.plot(range(24*days), vs[:24*days])
plt.grid()
plt.title('{} [{}]'.format(k, unit[k]))
plt_cnt += 1
plt.show()
# 自己相関係数の描画
days = 5
plt.figure(figsize=(8, 10))
plt.subplots_adjust(wspace=0.4, hspace=0.4)
plt_cnt = 1
for k, vs in acf.items():
plt.subplot(4, 1, plt_cnt)
plt.xticks(np.arange(0, 24*days + 1, 24))
plt.ylim([-1.0, 1.0])
plt.stem(range(1, 24*days+1), vs[:24*days], markerfmt='C0.')
plt.grid()
plt.title(k)
plt_cnt += 1
plt.show()
@hkawabata
Copy link
Author

hkawabata commented Jun 1, 2023

Figure_1

Figure_2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment