Last active
August 30, 2017 19:26
-
-
Save shinob/46f3b587ef3dc1411c2deb57443ffccc to your computer and use it in GitHub Desktop.
データシティ鯖江のオープンデータを使って水位計の値を機械学習で予測してみる パート2 ref: http://qiita.com/mix_dvd/items/190a20f4ceb439a64107
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ipywidgets import FloatProgress | |
from IPython.display import display | |
%matplotlib inline | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import datetime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ファイル読込 | |
filename = "sparql.csv" | |
df_level = pd.read_csv(filename, header=None, skiprows=1) | |
# 列名の変更 | |
df_level.columns = ["url","datetime","level"] | |
# 日時をタイムスタンプに変換 | |
df_level["datetime"] = df_level.datetime.map(lambda _: pd.to_datetime(_)) | |
# 日時をインデックスに設定 | |
df_level.index = df_level.pop("datetime") | |
# 日時順に並べ替え(...しなくても動作すると思いますが、しておきます) | |
df_level = df_level.sort_index() | |
# グラフ表示 | |
df_level["level"].plot(figsize=(15,5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[54.0, 54.0, 54.0, 53.0, 53.0, 53.0, 53.0, 53.0, 53.0, 53.0, 0.0, 53.0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ファイル読込 | |
filename = "data.csv" | |
df = pd.read_csv(filename,encoding="SHIFT-JIS",skiprows=4) | |
# 列名の変更 | |
df_rain.columns = ["datetime", "rain", "現象なし情報","品質情報","均質番号"] | |
# 日時をタイムスタンプに変換 | |
df_rain["datetime"] = df_rain.datetime.map(lambda _: pd.to_datetime(_)) | |
# 日時をインデックスに設定 | |
df_rain.index = df_rain.pop("datetime") | |
# グラフ表示 | |
df_level.level.plot(figsize=(15,5)) | |
df_rain.rain.plot(figsize=(15,5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 降水量のインデックスを取得 | |
ixs = df_rain.index | |
# データ取得用配列の作成 | |
df = [] | |
y = [] | |
for i in range(len(ixs)-2): | |
# インデックスから日時を取得 | |
dt1 = ixs[i] | |
dt2 = ixs[i + 1] | |
dt3 = ixs[i + 2] | |
# 日時データから水位データを取得 | |
d1 = df_level[dt1:dt2].level.tolist() | |
d2 = df_level[dt2:dt3].level.tolist() | |
if len(d1) > 10 and len(d2) > 10: | |
# 1時間後の最大水位を取得 | |
y.append(max(d2)) | |
# 1時間前の水位データを高い順に並替 | |
d1.sort() | |
d1.reverse() | |
# 10点のデータを取得 | |
d1 = d1[:10] | |
# 降水量データを取得 | |
d1.append(df_rain.ix[i].rain) | |
# 入力データの配列を取得 | |
df.append(d1) | |
# データフレームに変換 | |
df = pd.DataFrame(df) | |
df["y"] = y | |
# データ数の確認 | |
print(df.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# データを入力と出力に分割 | |
y = df.pop("y").as_matrix().astype("int").flatten() | |
X = df.as_matrix().astype("float") | |
# 9割を学習、1割を検証に使用するために分割 | |
num = int(len(X) * 0.9) | |
print(len(X), num, len(X)-num) | |
X_train = X[:num] | |
X_test = X[num:] | |
y_train = y[:num] | |
y_test = y[num:] | |
# 学習モデルとしてランダムフォレストを設定 | |
from sklearn.ensemble import RandomForestRegressor | |
model = RandomForestRegressor(random_state=42) | |
# 学習と検証 | |
model.fit(X_train, y_train) | |
result = model.predict(X_test) | |
# スコア | |
print(model.score(X_test,y_test)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pp = pd.DataFrame({'act': np.array(y_test), "pred": np.array(result), "rain": X_test[:,-1]}) | |
pp.rain = pp.rain * 5 | |
plt.figure(figsize=(15,5)) | |
plt.ylim(0,250) | |
plt.plot(pp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
# ランダムにインデックスを選定 | |
i = random.randint(0,len(df)) | |
d = df.ix[i].as_matrix().tolist() | |
print(d) | |
# テスト用配列を取得 | |
df_test = [] | |
# 降水量を0から20まで変化させてテストデータを作成 | |
for i in range(21): | |
temp = d[:10] | |
temp.append(i) | |
df_test.append(temp) | |
# 予測 | |
test = model.predict(np.array(df_test).astype("float")) | |
# グラフ表示 | |
plt.plot(test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[150.0, 149.0, 149.0, 148.0, 147.0, 147.0, 147.0, 146.0, 146.0, 146.0, 8.0, 147.0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# データを入力と出力に分割 | |
y = df.pop("y").as_matrix().astype("int").flatten() | |
X = df.as_matrix().astype("float") | |
# 9割を学習、1割を検証に使用するために分割 | |
num = int(len(X) * 0.9) | |
print(len(X), num, len(X)-num) | |
X_train = X[:num] | |
X_test = X[num:] | |
y_train = y[:num] | |
y_test = y[num:] | |
# データの正規化 | |
from sklearn.preprocessing import StandardScaler | |
scaler = StandardScaler() | |
scaler.fit(X_train) | |
X_train = scaler.transform(X_train) | |
X_test = scaler.transform(X_test) | |
# 学習モデルとしてニューラルネットワークを設定 | |
from sklearn.neural_network import MLPRegressor | |
model = MLPRegressor(random_state=42) | |
# 学習と検証 | |
model.fit(X_train, y_train) | |
result = model.predict(X_test) | |
# スコア | |
print(model.score(X_test,y_test)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
# ランダムにインデックスを選定 | |
i = random.randint(0,len(df)) | |
d = df.ix[i].as_matrix().tolist() | |
print(d) | |
df_test = [] | |
# 降水量を0から20まで変化させてテストデータを作成 | |
for i in range(21): | |
temp = d[:10] | |
temp.append(i) | |
df_test.append(temp) | |
# 入力データの正規化 | |
d = scaler.transform(np.array(df_test).astype("float")) | |
# 予測 | |
test = model.predict(d) | |
plt.plot(test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment