Skip to content

Instantly share code, notes, and snippets.

@gyosit
Last active October 19, 2020 04:58
Show Gist options
  • Save gyosit/dc0b7ae38ffaf42812d01a9af7aae02c to your computer and use it in GitHub Desktop.
Save gyosit/dc0b7ae38ffaf42812d01a9af7aae02c to your computer and use it in GitHub Desktop.
PD
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.animation as animation
import random
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
import pandas as pd
from keras import regularizers
import datetime
import random as rnd
from sklearn.metrics import r2_score
import math
from tqdm import tqdm
import copy
from sklearn.metrics import mean_squared_error
#Random Forest
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree
import pydotplus as pdp
import pickle
from PIL import Image
from io import BytesIO
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings("ignore")
class Graphics():
def dayPlot2D(self, startday, y, title, xlabel, ylabel):
row = y[1].shape[0]
col = 2
data_range = pd.date_range(startday, periods=y[1].shape[1], freq='d')
plt.rcParams["font.size"] = 12
fig = plt.figure(figsize=(15,10))
for i in range(y[0].shape[0]):
axL = fig.add_subplot(row, col, 1+i*2)
axL.plot(data_range, y[0][i], linewidth=2)
if(i==0): axL.set_title(title[0], fontsize=18)
axL.set_xlabel(xlabel[0], fontsize=18)
axL.set_ylabel(ylabel[0][i], fontsize=18)
axL.grid(True)
axR = fig.add_subplot(row, col, 2+i*2)
axR.plot(data_range, y[1][i], linewidth=2)
if(i==0): axR.set_title(title[1], fontsize=18)
axR.set_xlabel(xlabel[1], fontsize=18)
axR.set_ylabel(ylabel[1][i], fontsize=18)
axR.grid(True)
for ax in fig.axes:
plt.sca(ax)
plt.xticks(rotation=30)
fig.show()
return fig
class RF:
def split(self, df, learning=0.8, fold=5):
#l = int(df.shape[0]*learning)
#df_l, df_v = train_test_split(df, train_size=learning)
#df_l, df_v = df[:l], df[l:]
kf = KFold(n_splits=fold, shuffle=True, random_state=0) # シャッフルする場合shuffle=True
df_l, df_v = [], []
for i, (train_index, test_index) in enumerate(kf.split(df)):
df_l.append([]), df_v.append([])
df_l[i]=df.iloc[train_index]
df_v[i]=df.iloc[test_index]
return df_l, df_v
def prepare(self, df, header):
integrated = np.stack([df[h] for h in header])
integrated = integrated.T
return integrated
def machineL(self, x, y, target, j):
#モデル
forest = RandomForestRegressor(n_estimators=1000, n_jobs = -1, random_state=0, max_depth=None)
f = forest.fit(x, y)
pickle.dump(forest, open('./'+target+str(j)+'.sav','wb'))
#print("score:", f.score(t_x, t_y))
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
for f in range(x.shape[1]):
print("%d. feature %d (%f)" % (f+1, indices[f], importances[indices[f]]))
return forest, indices, importances
def validation(self, forests, df, x_h_c, y_h, j):
#評価
l = str(len(x_h_c))
x_c = rf.prepare(df, x_h_c) # climate data
y = rf.prepare(df, y_h) # Actual data
hist_p = []
hist_y = []
i = 0
for eachx_c in x_c:
x = eachx_c
predicted_p = np.empty(0)
for forest in forests:
forsee = forest.predict(x.reshape(1,-1))
if(abs(forsee) < 0.0001):
forsee = 0
predicted_p = np.append(predicted_p, forsee)
#x_p += predicted_x # integrated
#y_ += y[i] # integrated
x_p = predicted_p
y_ = y[i]
i += 1
hist_p.append(copy.copy(x_p))
hist_y.append(copy.copy(y_))
#print(i, x_p)
#hist_x = [l.tolist() for l in hist_x]
#print(hist_x[0])
#showRF(forests[id])
p = np.array(hist_p)
y = np.array(hist_y)
joint = np.concatenate([y, p], axis=1)
df = pd.DataFrame(joint, columns = np.append([h+"(actual)" for h in y_h], [h+"(predicted)" for h in y_h]))
df.to_csv("result_rf"+l+"_"+str(j)+".csv")
g = Graphics()
fig = g.dayPlot2D("20191231", y=[y.T, p.T], title=["Actual", "Predicted"], xlabel=["day","day"], ylabel=[y_h, y_h])
fig.savefig("Predicted"+l+"_"+str(j)+".png")
return mean_squared_error(y.T[0], p.T[0])
"""
plt.scatter(delY, predicted, alpha=0.3)
plt.xlabel("TOMGRO")
plt.ylabel("Predicted")
plt.show()
"""
#相関係数
#return np.dot(predicted, delY)/(np.linalg.norm(predicted, ord=2)*np.linalg.norm(delY, ord=2))
return 0
def gradation(self, forest, fix, range1, range2):
hist_p = np.empty(0)
for i, x1 in enumerate(range1):
forsee_r = np.empty(0)
for j, x2 in enumerate(range2):
x = np.array(copy.copy(fix))
x = np.where(x==-1, x1, x)
x = np.where(x==-2, x2, x)
forsee = forest.predict(x.reshape(1,-1))
print(i, j, x, forsee)
forsee_r = np.append(forsee_r, copy.copy(forsee))
forsee_r = np.insert(forsee_r, 0, range1[i])
hist_p = np.append(hist_p, copy.copy(forsee_r))
hist_p = np.insert(hist_p, 0, range2)
hist_p = np.insert(hist_p, 0, 0)
hist_p = hist_p.reshape(-1, len(range2)+1)
df = pd.DataFrame(hist_p)
df.to_csv("gradation.csv")
def showRF(self, rf):
estimator = rf.estimators_[0]
filename = "./tree.png"
dot_data = tree.export_graphviz(
estimator,
out_file=None,
filled=True,
rounded=True,
special_characters=True
)
graph = pdp.graph_from_dot_data(dot_data)
graph.write_png(filename)
class Gene:
def __init__(self, num, mins, maxs):
one_gene = np.ones(num)
set_gene = np.stack([one_gene, mins, maxs], axis=1)
init_gene = [x[0] * random.uniform(x[1], x[2]) for x in set_gene]
self.gene = init_gene
self.num = num
self.mins = mins
self.maxs = maxs
self.fitness = 0
@classmethod
def getFitness(self, genes, model):
x = np.array([g.gene for g in genes]).reshape(-1, genes[0].num)
predicted = model.predict(x)
for i, p in enumerate(predicted):
genes[i].fitness = p
def getFitness_(self):
self.fitness = self.gene[0] - self.gene[1]
return self.fitness
def mutation(self, p=0.2):
r = random.randint(0, 10)
if(r <= p*10):
one_gene = np.ones(self.num)
set_gene = np.stack([one_gene, self.mins, self.maxs], axis=1)
init_gene = [x[0] * random.uniform(x[1], x[2]) for x in set_gene]
rand_i = random.randint(0, self.num-1)
self.gene[rand_i] = init_gene[rand_i]
@classmethod
def sortGene(self, genes, model):
self.getFitness(genes, model)
genes = sorted(genes, key=lambda g: g.fitness, reverse=True)
return genes
@classmethod
def select(self, genes):
l = int(len(genes)/4)
try:
return genes[:l]
except TypeError:
print("Removed the last gene")
genes.pop()
return genes[:l]
def cross(self, g1, g2):
rand_i = random.randint(1, g1.num-1)
g1_, g2_ = Gene(g1.num, g1.mins, g1.maxs), Gene(g1.num, g1.mins, g1.maxs)
f1, b1= g1.gene[:rand_i], g1.gene[rand_i:]
f2, b2= g2.gene[:rand_i], g2.gene[rand_i:]
g1_.gene, g2_.gene = f1 + b2, f2 + b1
return [g1, g2, g1_, g2_]
@classmethod
def crossover(self, genes):
new_genes = []
l = len(genes)
while(len(genes)*4 > len(new_genes)):
r1, r2 = random.randint(0,l-1), random.randint(0,l-1)
family = self.cross(self, genes[r1], genes[r2])
new_genes += family
return new_genes
if __name__ == '__main__':
rf = RF()
df = pd.read_csv('kyuri_liquid_202001-06.csv') # file name
df_ls, df_vs = rf.split(df=df, learning=0.8, fold=5)
#print(np.sort(df_ls[0].values[:,0]))
#print(np.sort(df_v.values[:,0]))
x_h_c = ["radiation"
, "temp"
, "day_temp"
, "night_temp"
, "day_satiety"
, "night_satiety"] # inputs (climate)
"""
# For net_photo
x_h_c = ["radiation"
, "night_temp"
, "temp"
, "day_temp"]
# For net_photo
x_h_c = ["radiation"
, "temp"
, "day_temp"]
"""
x_h = x_h_c
y_h = ["net_photo"] # outputs
sum_importances = [0 for i in range(len(x_h))]
for j in range(len(df_ls)):
df_l = df_ls[j]
df_v = df_vs[j]
print(df_l.values[:,0])
x = rf.prepare(df_l, x_h)
# Training
forests = []
for targ in y_h:
forest, indices, importances = rf.machineL(x, df_l[targ], targ, 0) # x, y
sum_importances = list(map(lambda x: sum_importances[x[0]]+x[1], list(enumerate(importances))))
print(sum_importances)
#print(indices, importances)
forests.append(forest)
#rf.showRF(forests[0])
# Validation
rf.validation(forests, df_v, x_h_c, y_h, j)
#rf.gradation(forests[0], [20.0, 25.0, -1, -2, 6.0, 2.0], np.arange(11)*4, np.arange(11)*4)
print("-----")
# ---High contributing
high_cont = False
if(high_cont):
hc_x_h = []
for i, hc in enumerate(indices):
hc_x_h.append(x_h_c[hc])
x = rf.prepare(df_l, hc_x_h)
forests = []
for targ in y_h:
forest, indices, importances = rf.machineL(x, df_l[targ], targ, j) # x, y
forest = pickle.load(open(targ+str(j)+'.sav', 'rb'))
forests.append(forest)
print("-")
print(rf.validation(forests, df_v, hc_x_h, y_h, j))
print("-----")
"""
genes = [Gene(6, [5,20,28,14,5,1],[20,30,33,26,10,4]) for i in range(10000)]
for i in range(100):
[g.mutation() for g in genes]
genes = Gene.sortGene(genes, forests[0])
print(i, genes[0].gene, genes[0].fitness)
genes = Gene.select(genes)
genes = Gene.crossover(genes)
"""
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.animation as animation
import random
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
import pandas as pd
from keras import regularizers
import datetime
import random as rnd
from sklearn.metrics import r2_score
import math
from tqdm import tqdm
import copy
from sklearn.metrics import mean_squared_error
#Random Forest
from sklearn import svm
import pydotplus as pdp
import pickle
from PIL import Image
from io import BytesIO
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings("ignore")
class Graphics():
def dayPlot2D(self, startday, y, title, xlabel, ylabel):
row = y[1].shape[0]
col = 2
data_range = pd.date_range(startday, periods=y[1].shape[1], freq='d')
plt.rcParams["font.size"] = 12
fig = plt.figure(figsize=(15,10))
for i in range(y[0].shape[0]):
axL = fig.add_subplot(row, col, 1+i*2)
axL.plot(data_range, y[0][i], linewidth=2)
if(i==0): axL.set_title(title[0], fontsize=18)
axL.set_xlabel(xlabel[0], fontsize=18)
axL.set_ylabel(ylabel[0][i], fontsize=18)
axL.grid(True)
axR = fig.add_subplot(row, col, 2+i*2)
axR.plot(data_range, y[1][i], linewidth=2)
if(i==0): axR.set_title(title[1], fontsize=18)
axR.set_xlabel(xlabel[1], fontsize=18)
axR.set_ylabel(ylabel[1][i], fontsize=18)
axR.grid(True)
for ax in fig.axes:
plt.sca(ax)
plt.xticks(rotation=30)
fig.show()
return fig
class SVR:
def split(self, df, learning=0.8, fold=5):
#l = int(df.shape[0]*learning)
#df_l, df_v = train_test_split(df, train_size=learning)
#df_l, df_v = df[:l], df[l:]
kf = KFold(n_splits=fold, shuffle=True, random_state=0) # シャッフルする場合shuffle=True
df_l, df_v = [], []
for i, (train_index, test_index) in enumerate(kf.split(df)):
df_l.append([]), df_v.append([])
df_l[i]=df.iloc[train_index]
df_v[i]=df.iloc[test_index]
return df_l, df_v
def prepare(self, df, header):
integrated = np.stack([df[h] for h in header])
integrated = integrated.T
return integrated
def machineL(self, x, y, target, j):
#モデル
model = svm.SVR(kernel = "rbf",gamma = 1,C=100,epsilon = 0)
f = model.fit(x, y)
pickle.dump(model, open('./'+target+str(j)+'.sav','wb'))
return model
def validation(self, models, df, x_h_c, y_h, j):
#評価
l = str(len(x_h_c))
x_c = svr.prepare(df, x_h_c) # climate data
y = svr.prepare(df, y_h) # Actual data
hist_p = []
hist_y = []
i = 0
for eachx_c in x_c:
x = eachx_c
predicted_p = np.empty(0)
for model in models:
forsee = model.predict(x.reshape(1,-1))
predicted_p = np.append(predicted_p, forsee)
x_p = predicted_p
y_ = y[i]
i += 1
hist_p.append(copy.copy(x_p))
hist_y.append(copy.copy(y_))
p = np.array(hist_p)
y = np.array(hist_y)
joint = np.concatenate([y, p], axis=1)
df = pd.DataFrame(joint, columns = np.append([h+"(actual)" for h in y_h], [h+"(predicted)" for h in y_h]))
df.to_csv("result_svr"+l+"_"+str(j)+".csv")
g = Graphics()
fig = g.dayPlot2D("20191231", y=[y.T, p.T], title=["Actual", "Predicted"], xlabel=["day","day"], ylabel=[y_h, y_h])
#fig.savefig("Predicted"+l+"_"+str(j)+".png")
return mean_squared_error(y.T[0], p.T[0])
"""
plt.scatter(delY, predicted, alpha=0.3)
plt.xlabel("TOMGRO")
plt.ylabel("Predicted")
plt.show()
"""
#相関係数
#return np.dot(predicted, delY)/(np.linalg.norm(predicted, ord=2)*np.linalg.norm(delY, ord=2))
return 0
def gradation(self, forest, fix, range1, range2):
hist_p = np.empty(0)
for i, x1 in enumerate(range1):
forsee_r = np.empty(0)
for j, x2 in enumerate(range2):
x = np.array(copy.copy(fix))
x = np.where(x==-1, x1, x)
x = np.where(x==-2, x2, x)
forsee = forest.predict(x.reshape(1,-1))
print(i, j, x, forsee)
forsee_r = np.append(forsee_r, copy.copy(forsee))
forsee_r = np.insert(forsee_r, 0, range1[i])
hist_p = np.append(hist_p, copy.copy(forsee_r))
hist_p = np.insert(hist_p, 0, range2)
hist_p = np.insert(hist_p, 0, 0)
hist_p = hist_p.reshape(-1, len(range2)+1)
df = pd.DataFrame(hist_p)
df.to_csv("gradation.csv")
if __name__ == '__main__':
svr = SVR()
df = pd.read_csv('kyuri_liquid_202001-06.csv') # file name
df_ls, df_vs = svr.split(df=df, learning=0.8, fold=5)
x_h_c = ["radiation"
, "temp"
, "day_temp"
, "night_temp"
, "day_satiety"
, "night_satiety"] # inputs (climate)
"""
# For net_photo
x_h_c = ["radiation"
, "night_temp"
, "temp"
, "day_temp"]
# For net_photo
x_h_c = ["radiation"
, "temp"
, "day_temp"]
"""
x_h = x_h_c
y_h = ["net_photo"] # outputs
for j in range(len(df_ls)):
df_l = df_ls[j]
df_v = df_vs[j]
print(df_l.values[:,0])
x = svr.prepare(df_l, x_h)
print(x.shape)
# Training
models = []
for targ in y_h:
model = svr.machineL(x, df_l[targ], targ, 0) # x, y
models.append(model)
# Validation
svr.validation(models, df_v, x_h_c, y_h, j)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.animation as animation
import random
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
import pandas as pd
from keras import regularizers
import datetime
import random as rnd
from sklearn.metrics import r2_score
import math
from tqdm import tqdm
import copy
from sklearn.metrics import mean_squared_error
#Random Forest
from keras.layers import Input, Dense, Dropout
from keras.models import Model
import pydotplus as pdp
import pickle
from PIL import Image
from io import BytesIO
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings("ignore")
class Graphics():
def dayPlot2D(self, startday, y, title, xlabel, ylabel):
row = y[1].shape[0]
col = 2
data_range = pd.date_range(startday, periods=y[1].shape[1], freq='d')
plt.rcParams["font.size"] = 12
fig = plt.figure(figsize=(15,10))
for i in range(y[0].shape[0]):
axL = fig.add_subplot(row, col, 1+i*2)
axL.plot(data_range, y[0][i], linewidth=2)
if(i==0): axL.set_title(title[0], fontsize=18)
axL.set_xlabel(xlabel[0], fontsize=18)
axL.set_ylabel(ylabel[0][i], fontsize=18)
axL.grid(True)
axR = fig.add_subplot(row, col, 2+i*2)
axR.plot(data_range, y[1][i], linewidth=2)
if(i==0): axR.set_title(title[1], fontsize=18)
axR.set_xlabel(xlabel[1], fontsize=18)
axR.set_ylabel(ylabel[1][i], fontsize=18)
axR.grid(True)
for ax in fig.axes:
plt.sca(ax)
plt.xticks(rotation=30)
fig.show()
return fig
class DL:
def split(self, df, learning=0.8, fold=5):
#l = int(df.shape[0]*learning)
#df_l, df_v = train_test_split(df, train_size=learning)
#df_l, df_v = df[:l], df[l:]
kf = KFold(n_splits=fold, shuffle=True, random_state=0) # シャッフルする場合shuffle=True
df_l, df_v = [], []
for i, (train_index, test_index) in enumerate(kf.split(df)):
df_l.append([]), df_v.append([])
df_l[i]=df.iloc[train_index]
df_v[i]=df.iloc[test_index]
return df_l, df_v
def prepare(self, df, header):
integrated = np.stack([df[h] for h in header])
integrated = integrated.T
return integrated
def machineL(self, x, y, j):
#モデル
inputs = Input(shape=(x.shape[1],))
d = Dense(8, activation='relu')(inputs)
d = Dense(16, activation='relu')(d)
d = Dense(32, activation='relu')(d)
d = Dense(64, activation='relu')(d)
d = Dense(32, activation='relu')(d)
d = Dense(16, activation='relu')(d)
d = Dense(8, activation='relu')(d)
d = Dense(4, activation='relu')(d)
outputs = Dense(1, activation='linear')(d)
model = Model(inputs=inputs, outputs=outputs)
model.summary()
model.compile(optimizer='adam',
loss='mean_squared_error')
f = model.fit(x, y, epochs=1000, validation_split=0.2, verbose=0)
pickle.dump(model, open('./DL_'+str(j)+'.sav','wb'))
return model
def validation(self, model, df, x_h_c, y_h, j):
#評価
l = str(len(x_h_c))
x_c = self.prepare(df, x_h_c) # climate data
y = self.prepare(df, y_h) # Actual data
hist_p = []
hist_y = []
i = 0
for eachx_c in x_c:
x = eachx_c
predicted_p = np.empty(0)
forsee = model.predict(x.reshape(1,-1))
predicted_p = np.append(predicted_p, forsee)
x_p = predicted_p
y_ = y[i]
i += 1
hist_p.append(copy.copy(x_p))
hist_y.append(copy.copy(y_))
p = np.array(hist_p)
y = np.array(hist_y)
joint = np.concatenate([y, p], axis=1)
df = pd.DataFrame(joint, columns = np.append([h+"(actual)" for h in y_h], [h+"(predicted)" for h in y_h]))
df.to_csv("result_dl"+l+"_"+str(j)+".csv")
g = Graphics()
fig = g.dayPlot2D("20191231", y=[y.T, p.T], title=["Actual", "Predicted"], xlabel=["day","day"], ylabel=[y_h, y_h])
#fig.savefig("Predicted"+l+"_"+str(j)+".png")
return mean_squared_error(y.T[0], p.T[0])
"""
plt.scatter(delY, predicted, alpha=0.3)
plt.xlabel("TOMGRO")
plt.ylabel("Predicted")
plt.show()
"""
#相関係数
#return np.dot(predicted, delY)/(np.linalg.norm(predicted, ord=2)*np.linalg.norm(delY, ord=2))
return 0
def gradation(self, forest, fix, range1, range2):
hist_p = np.empty(0)
for i, x1 in enumerate(range1):
forsee_r = np.empty(0)
for j, x2 in enumerate(range2):
x = np.array(copy.copy(fix))
x = np.where(x==-1, x1, x)
x = np.where(x==-2, x2, x)
forsee = forest.predict(x.reshape(1,-1))
print(i, j, x, forsee)
forsee_r = np.append(forsee_r, copy.copy(forsee))
forsee_r = np.insert(forsee_r, 0, range1[i])
hist_p = np.append(hist_p, copy.copy(forsee_r))
hist_p = np.insert(hist_p, 0, range2)
hist_p = np.insert(hist_p, 0, 0)
hist_p = hist_p.reshape(-1, len(range2)+1)
df = pd.DataFrame(hist_p)
df.to_csv("gradation.csv")
if __name__ == '__main__':
dl = DL()
df = pd.read_csv('kyuri_liquid_202001-06.csv') # file name
df_ls, df_vs = dl.split(df=df, learning=0.8, fold=5)
x_h_c = ["radiation"
, "temp"
, "day_temp"
, "night_temp"
, "day_satiety"
, "night_satiety"] # inputs (climate)
"""
# For net_photo
x_h_c = ["radiation"
, "night_temp"
, "temp"
, "day_temp"]
# For net_photo
x_h_c = ["radiation"
, "temp"
, "day_temp"]
"""
x_h = x_h_c
y_h = ["net_photo"] # outputs
for j in range(len(df_ls)):
df_l = df_ls[j]
df_v = df_vs[j]
print(df_l.values[:,0])
x = dl.prepare(df_l, x_h)
y = dl.prepare(df_l, y_h)
# Training
model = dl.machineL(x, y, 0) # x, y
# Validation
dl.validation(model, df_v, x_h_c, y_h, j)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment