gyosit/graphic.py

## graphic.py
class Graphics():
  def dayPlot2D(self, startday, y, title, xlabel, ylabel):
    row = y[1].shape[0]
    col = 2

    data_range = pd.date_range(startday, periods=y[1].shape[1], freq='d')
    plt.rcParams["font.size"] = 12

    fig = plt.figure(figsize=(15,20))

    for i in range(y[0].shape[0]):
      axL = fig.add_subplot(row, col, 1+i*2)
      axL.plot(data_range, y[0][i], linewidth=2)
      if(i==0): axL.set_title(title[0], fontsize=18)
      axL.set_xlabel(xlabel[0], fontsize=18)
      axL.set_ylabel(ylabel[0][i], fontsize=18)
      axL.grid(True)

      axR = fig.add_subplot(row, col, 2+i*2)
      axR.plot(data_range, y[1][i], linewidth=2)
      if(i==0): axR.set_title(title[1], fontsize=18)
      axR.set_xlabel(xlabel[1], fontsize=18)
      axR.set_ylabel(ylabel[1][i], fontsize=18)
      axR.grid(True)

    for ax in fig.axes:
      plt.sca(ax)
      plt.xticks(rotation=30)

    fig.show()

    return fig

## main.py
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.animation as animation
import random
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
import pandas as pd
from keras import regularizers
import datetime
import random as rnd
from sklearn.metrics import r2_score
import math
from tqdm import tqdm
import copy

#Random Forest
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree
import pydotplus as pdp
import pickle
from PIL import Image
from io import BytesIO

import warnings
warnings.filterwarnings("ignore")

if __name__ == '__main__':
  rf = RF()

  # CHANGE !!
  df_l = pd.read_csv('training.csv') # file name (Learning)
  df_v = pd.read_csv('validation.csv') # file name (Validation)
  x_h_c = ["Light", "CO2", "Temp"] # inputs (climate)
  x_h_p = ["AofF", "AofL", "AofLA"] # inputs (plants)
  x_h = np.append(x_h_p, x_h_c) # inputs
  y_h = ["dF", "dL", "dLA"] # outputs
  # ######

  x = rf.prepare(df_l, x_h)
  print(x)

  # Training
  forests = []
  for targ in y_h:
    forest = rf.machineL(x, df_l[targ], targ) # x, y
    forests.append(forest)
    print("-----")

  rf.showRF(forests[0])

  # Validation
  rf.validation(forests, df_v, x_h_c, x_h_p, y_h)
  rf.predict(forests, df_v, x_h_c, x_h_p)

## rf.py
class RF:
  def split(self, df, learning=0.8, fold=5):
    l = int(df.shape[0]*learning)
    df_l, df_v = df[:l], df[l:]
    return df_l, df_v

  def prepare(self, df, header):
    integrated = np.stack([df[h] for h in header])
    return integrated.T

  def machineL(self, x, y, target):
    #モデル
    forest = RandomForestRegressor(n_estimators=100, n_jobs = -1)
    f = forest.fit(x, y)
    pickle.dump(forest, open('./'+target+'.sav','wb'))
    #print("score:", f.score(t_x, t_y))

    importances = forest.feature_importances_
    indices = np.argsort(importances)[::-1]
    for f in range(x.shape[1]):
      print("%d. feature %d (%f)" % (f+1, indices[f], importances[indices[f]]))

    return forest

  def validation(self, forests, df, x_h_c, x_h_p, y_h):
    #評価
    x_c = rf.prepare(df, x_h_c) # climate data
    x_p = rf.prepare(df, x_h_p)[0] # plants growing initial data
    y = rf.prepare(df, y_h) # Actual data
    print("y", y)
    y_ = copy.copy(x_p)
    hist_x = []
    hist_y = []

    i = 0
    for eachx_c in x_c:
      hist_x.append(copy.copy(x_p))
      hist_y.append(copy.copy(y_))
      x = np.append(x_p, eachx_c)
      predicted_x = np.empty(0)
      for forest in forests:
        forsee = forest.predict(x.reshape(1,-1))
        if(abs(forsee) < 0.0001):
          forsee = 0
        predicted_x = np.append(predicted_x, forsee)
      x_p += predicted_x
      y_ += y[i]
      i += 1
      #print(i, x_p)

    #hist_x = [l.tolist() for l in hist_x]
    #print(hist_x[0])
    #showRF(forests[id])
    p = np.array(hist_x)
    y = np.array(hist_y)

    joint = np.concatenate([y, p], axis=1)
    df = pd.DataFrame(joint, columns = np.append(x_h_p, x_h_p))
    df.to_csv("result.csv")

    g = Graphics()
    fig = g.dayPlot2D("20200629", y=[y.T, p.T], title=["Actual", "Predicted"], xlabel=["day","day"], ylabel=[x_h_p,x_h_p])
    fig.savefig("Predicted.png")
      #outTxt(y[id], "testy2_"+str(id))

    #outTxt(hist_x, "testx2")

    """
    plt.scatter(delY, predicted, alpha=0.3)
    plt.xlabel("TOMGRO")
    plt.ylabel("Predicted")
    plt.show()
    """

    #相関係数
    #return np.dot(predicted, delY)/(np.linalg.norm(predicted, ord=2)*np.linalg.norm(delY, ord=2))
    return 0

  def predict(self, forests, df, x_h_c, x_h_p):
    #評価
    x_c = rf.prepare(df, x_h_c) # climate data
    x_p = rf.prepare(df, x_h_p)[0] # plants growing initial data
    hist_x = []

    i = 0
    for eachx_c in x_c:
      hist_x.append(copy.copy(x_p))
      x = np.append(x_p, eachx_c)
      predicted_x = np.empty(0)
      for forest in forests:
        forsee = forest.predict(x.reshape(1,-1))
        if(abs(forsee) < 0.0001):
          forsee = 0
        predicted_x = np.append(predicted_x, forsee)
      x_p += predicted_x
      i += 1

    p = np.array(hist_x)

    df = pd.DataFrame(p, columns = x_h_p)
    df.to_csv("predicted.csv")

  def showRF(self, rf):
    estimator = rf.estimators_[0]
    filename = "./tree.png"
    dot_data = tree.export_graphviz(
                estimator,
                out_file=None,
                filled=True,
                rounded=True,
                special_characters=True
                )
    graph = pdp.graph_from_dot_data(dot_data)
    graph.write_png(filename)

def removeNan(x):
  #...
  return res_x
	class Graphics():
	def dayPlot2D(self, startday, y, title, xlabel, ylabel):
	row = y[1].shape[0]
	col = 2

	data_range = pd.date_range(startday, periods=y[1].shape[1], freq='d')
	plt.rcParams["font.size"] = 12

	fig = plt.figure(figsize=(15,20))

	for i in range(y[0].shape[0]):
	axL = fig.add_subplot(row, col, 1+i*2)
	axL.plot(data_range, y[0][i], linewidth=2)
	if(i==0): axL.set_title(title[0], fontsize=18)
	axL.set_xlabel(xlabel[0], fontsize=18)
	axL.set_ylabel(ylabel[0][i], fontsize=18)
	axL.grid(True)

	axR = fig.add_subplot(row, col, 2+i*2)
	axR.plot(data_range, y[1][i], linewidth=2)
	if(i==0): axR.set_title(title[1], fontsize=18)
	axR.set_xlabel(xlabel[1], fontsize=18)
	axR.set_ylabel(ylabel[1][i], fontsize=18)
	axR.grid(True)

	for ax in fig.axes:
	plt.sca(ax)
	plt.xticks(rotation=30)

	fig.show()

	return fig
	import numpy as np
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	import matplotlib.animation as animation
	import random
	from mpl_toolkits.mplot3d import Axes3D
	from scipy.interpolate import griddata
	import pandas as pd
	from keras import regularizers
	import datetime
	import random as rnd
	from sklearn.metrics import r2_score
	import math
	from tqdm import tqdm
	import copy

	#Random Forest
	from sklearn.ensemble import RandomForestRegressor
	from sklearn import tree
	import pydotplus as pdp
	import pickle
	from PIL import Image
	from io import BytesIO

	import warnings
	warnings.filterwarnings("ignore")

	if __name__ == '__main__':
	rf = RF()

	# CHANGE !!
	df_l = pd.read_csv('training.csv') # file name (Learning)
	df_v = pd.read_csv('validation.csv') # file name (Validation)
	x_h_c = ["Light", "CO2", "Temp"] # inputs (climate)
	x_h_p = ["AofF", "AofL", "AofLA"] # inputs (plants)
	x_h = np.append(x_h_p, x_h_c) # inputs
	y_h = ["dF", "dL", "dLA"] # outputs
	# ######

	x = rf.prepare(df_l, x_h)
	print(x)

	# Training
	forests = []
	for targ in y_h:
	forest = rf.machineL(x, df_l[targ], targ) # x, y
	forests.append(forest)
	print("-----")

	rf.showRF(forests[0])

	# Validation
	rf.validation(forests, df_v, x_h_c, x_h_p, y_h)
	rf.predict(forests, df_v, x_h_c, x_h_p)
	class RF:
	def split(self, df, learning=0.8, fold=5):
	l = int(df.shape[0]*learning)
	df_l, df_v = df[:l], df[l:]
	return df_l, df_v

	def prepare(self, df, header):
	integrated = np.stack([df[h] for h in header])
	return integrated.T

	def machineL(self, x, y, target):
	#モデル
	forest = RandomForestRegressor(n_estimators=100, n_jobs = -1)
	f = forest.fit(x, y)
	pickle.dump(forest, open('./'+target+'.sav','wb'))
	#print("score:", f.score(t_x, t_y))

	importances = forest.feature_importances_
	indices = np.argsort(importances)[::-1]
	for f in range(x.shape[1]):
	print("%d. feature %d (%f)" % (f+1, indices[f], importances[indices[f]]))

	return forest

	def validation(self, forests, df, x_h_c, x_h_p, y_h):
	#評価
	x_c = rf.prepare(df, x_h_c) # climate data
	x_p = rf.prepare(df, x_h_p)[0] # plants growing initial data
	y = rf.prepare(df, y_h) # Actual data
	print("y", y)
	y_ = copy.copy(x_p)
	hist_x = []
	hist_y = []

	i = 0
	for eachx_c in x_c:
	hist_x.append(copy.copy(x_p))
	hist_y.append(copy.copy(y_))
	x = np.append(x_p, eachx_c)
	predicted_x = np.empty(0)
	for forest in forests:
	forsee = forest.predict(x.reshape(1,-1))
	if(abs(forsee) < 0.0001):
	forsee = 0
	predicted_x = np.append(predicted_x, forsee)
	x_p += predicted_x
	y_ += y[i]
	i += 1
	#print(i, x_p)

	#hist_x = [l.tolist() for l in hist_x]
	#print(hist_x[0])
	#showRF(forests[id])
	p = np.array(hist_x)
	y = np.array(hist_y)

	joint = np.concatenate([y, p], axis=1)
	df = pd.DataFrame(joint, columns = np.append(x_h_p, x_h_p))
	df.to_csv("result.csv")

	g = Graphics()
	fig = g.dayPlot2D("20200629", y=[y.T, p.T], title=["Actual", "Predicted"], xlabel=["day","day"], ylabel=[x_h_p,x_h_p])
	fig.savefig("Predicted.png")
	#outTxt(y[id], "testy2_"+str(id))

	#outTxt(hist_x, "testx2")

	"""
	plt.scatter(delY, predicted, alpha=0.3)
	plt.xlabel("TOMGRO")
	plt.ylabel("Predicted")
	plt.show()
	"""

	#相関係数
	#return np.dot(predicted, delY)/(np.linalg.norm(predicted, ord=2)*np.linalg.norm(delY, ord=2))
	return 0

	def predict(self, forests, df, x_h_c, x_h_p):
	#評価
	x_c = rf.prepare(df, x_h_c) # climate data
	x_p = rf.prepare(df, x_h_p)[0] # plants growing initial data
	hist_x = []

	i = 0
	for eachx_c in x_c:
	hist_x.append(copy.copy(x_p))
	x = np.append(x_p, eachx_c)
	predicted_x = np.empty(0)
	for forest in forests:
	forsee = forest.predict(x.reshape(1,-1))
	if(abs(forsee) < 0.0001):
	forsee = 0
	predicted_x = np.append(predicted_x, forsee)
	x_p += predicted_x
	i += 1

	p = np.array(hist_x)

	df = pd.DataFrame(p, columns = x_h_p)
	df.to_csv("predicted.csv")

	def showRF(self, rf):
	estimator = rf.estimators_[0]
	filename = "./tree.png"
	dot_data = tree.export_graphviz(
	estimator,
	out_file=None,
	filled=True,
	rounded=True,
	special_characters=True
	)
	graph = pdp.graph_from_dot_data(dot_data)
	graph.write_png(filename)

	def removeNan(x):
	#...
	return res_x