Skip to content

Instantly share code, notes, and snippets.

@tristansokol
Created May 24, 2018 13:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tristansokol/2c9eace463b65ac2730bd55d96a3feb2 to your computer and use it in GitHub Desktop.
Save tristansokol/2c9eace463b65ac2730bd55d96a3feb2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Q-learning first attempt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Optionally install what you need:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# import sys\n",
"# !{sys.executable} -m pip install seaborn gym-retro Pillow keras tensorflow opencv-python pandas matplotlib scipy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First we set up all of the imports that we will have with the project, there are a bunch:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-2-5b9450a8a2b2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mpp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPrettyPrinter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mcv2\u001b[0m \u001b[0;31m#OpenCV\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgym_remote\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mgrc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Development/Bobcats/reverie_agent/reverie_agent/lib/python3.6/site-packages/cv2/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mimportlib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mcv2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"import random\n",
"import math\n",
"import retro\n",
"from PIL import Image\n",
"import gym\n",
"import pickle\n",
"import operator\n",
"import pprint\n",
"pp = pprint.PrettyPrinter(indent=4)\n",
"import numpy as np\n",
"import cv2 #OpenCV\n",
"import time\n",
"import gym_remote.client as grc\n",
"import gym_remote.exceptions as gre\n",
"import os\n",
"import json\n",
"import pandas as pd\n",
"from IPython.display import clear_output\n",
"from collections import deque\n",
"from matplotlib import pyplot as plt\n",
"plt.rcParams['figure.figsize'] = (30, 30)\n",
"import seaborn as sns\n",
"\n",
"from keras.initializers import normal, identity\n",
"from keras.models import model_from_json\n",
"from keras.models import Sequential\n",
"from keras.layers.core import Dense, Dropout, Activation, Flatten\n",
"from keras.layers.convolutional import Conv2D, MaxPooling2D\n",
"from keras.optimizers import SGD , Adam\n",
"import tensorflow as tf\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Helper functions handle disc IO, and help set the stage for recor keeping in a run. `show_img` was useful in rendering a frame of sonic, at various stages in the image processing. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# file system interactors\n",
"def save_obj(obj, name ):\n",
" with open(name + '.pkl', 'wb') as f: #dump files into objects folder\n",
" pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)\n",
"\n",
"def load_obj(name ):\n",
" try:\n",
" with open(name + '.pkl', 'rb') as f:\n",
" return pickle.load(f)\n",
" except FileNotFoundError:\n",
" if name == 'epsilon':\n",
" return .7;\n",
" return []\n",
" else:\n",
" return []\n",
" \n",
"output_dir = './run-'+time.strftime(\"%Y%m%d-%H:%M\")+'/'\n",
"if not os.path.exists(output_dir):\n",
" os.makedirs(output_dir)\n",
"loss_file_path = output_dir+\"loss_df.csv\"\n",
"\n",
"#Intialize log structures from file if exists else create new#Intiali \n",
"loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def show_img(image,graphs = False):\n",
" \"\"\"\n",
" Show images in new window\n",
" \"\"\"\n",
" while True:\n",
"# print(image.shape)\n",
"# image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n",
" processed = process_img(image)\n",
" window_title = \"logs\" if graphs else \"game_play\"\n",
" cv2.namedWindow(window_title, cv2.WINDOW_NORMAL) \n",
" cv2.moveWindow(window_title, 20,20);\n",
"# imS = cv2.resize(screen, (800, 400)) \n",
"# cv2.imshow(window_title, screen)\n",
" cv2.imshow(window_title, processed)\n",
" cv2.waitKey(5)\n",
" cv2.destroyAllWindows\n",
" break\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set up a similar tracked environment as the jerk agent, but with a bit of extra stuff tacked in"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class TrackedEnv(gym.Wrapper):\n",
" \"\"\"\n",
" An environment that tracks the current trajectory and\n",
" the total number of timesteps ever taken.\n",
" \"\"\"\n",
"\n",
" def __init__(self, env):\n",
" super(TrackedEnv, self).__init__(env)\n",
" self.action_history = []\n",
" self.reward_history = []\n",
" self.total_reward = 0\n",
" self.total_steps_ever = 0\n",
" record_file_path = output_dir+\"record.csv\"\n",
" self.record = pd.read_csv(record_file_path) if os.path.isfile(record_file_path) else pd.DataFrame(columns = ['Timesteps','Total_Score'])\n",
" actions_file_path = output_dir+\"actions.csv\"\n",
" self.actions = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['Action','Intention'])\n",
" \n",
" def best_sequence(self):\n",
" \"\"\"\n",
" Get the prefix of the trajectory with the best\n",
" cumulative reward.\n",
" \"\"\"\n",
" max_cumulative = max(self.reward_history)\n",
" for i, rew in enumerate(self.reward_history):\n",
" if rew == max_cumulative:\n",
" return self.action_history[:i + 1]\n",
" raise RuntimeError('unreachable')\n",
"\n",
" # pylint: disable=E0202\n",
" def reset(self, **kwargs):\n",
" self.action_history = []\n",
" self.reward_history = []\n",
" self.total_reward = 0\n",
" return self.env.reset(**kwargs)\n",
"\n",
" def step(self, action):\n",
" self.total_steps_ever += 1\n",
" self.action_history.append(action.copy())\n",
" obs, rew, done, info = self.env.step(action)\n",
" if done:\n",
" data = pd.DataFrame({'Timesteps':[len(self.action_history)], 'Total_Score': [self.total_reward]})\n",
" self.record = self.record.append(data)\n",
"# self.record.loc[len(loss_df)] = score\n",
"# self.record.append([self.total_reward, len(self.action_history)])\n",
" print('rip')\n",
" self.total_reward += rew\n",
" self.reward_history.append(self.total_reward)\n",
" return obs, rew, done, info\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Hyperparameters, many of these are copied from the dino learning paper, and also the flappy bird paper referenced within."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"EXPLOIT_BIAS = 0.25 # 0.;5\n",
"TOTAL_TIMESTEPS = int(1e6)\n",
"\n",
"#game parameters\n",
"ACTIONS = 8 # possible actions\n",
"GAMMA = 0.99 # decay rate of past observations original 0.99\n",
"OBSERVATION = 200000. # timesteps to observe before training\n",
"EXPLORE = 200000 # frames over which to anneal epsilon\n",
"FINAL_EPSILON = 0.0001 # final value of epsilon\n",
"INITIAL_EPSILON = 0.1 # starting value of epsilon\n",
"REPLAY_MEMORY = 100000 # number of previous transitions to remember\n",
"BATCH = 32 # size of minibatch\n",
"FRAMERATE= 4 #how often to render\n",
"LEARNING_RATE = 1e-4\n",
"img_rows , img_cols = 120,84\n",
"img_channels = 4 #We stack 4 frames"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def buildmodel():\n",
" print(\"Now we build the model\")\n",
" model = Sequential()\n",
" model.add(Conv2D(32, (8, 8), strides=(4, 4), padding='same',input_shape=(img_cols,img_rows,img_channels))) #20*40*4\n",
" model.add(Activation('relu'))\n",
" model.add(Conv2D(64, (4, 4), strides=(2, 2), padding='same'))\n",
" model.add(Activation('relu'))\n",
" model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same'))\n",
" model.add(Activation('relu'))\n",
" model.add(Flatten())\n",
" model.add(Dense(512))\n",
" model.add(Activation('relu'))\n",
" model.add(Dense(ACTIONS))\n",
" adam = Adam(lr=LEARNING_RATE)\n",
" model.compile(loss='mean_squared_logarithmic_error',optimizer=adam)\n",
" print(\"We finish building the model\")\n",
" return model\n",
"# buildmodel().summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The actions and random move function are the commands we use to actually make actions on our state. Random move had the option to use a supplied value, for example when a move was predicted by the model. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Possible actions\n",
"# [\"B\", \"A\", \"MODE\", \"START\", \"UP\", \"DOWN\", \"LEFT\", \"RIGHT\", \"C\", \"Y\", \"X\", \"Z\"]\n",
"actions = [\n",
" [True, False, False, False, False, False, False, False, False, False, False, False],\n",
" [True, False, False, False, False, False, False, True, False, False, False, False],\n",
" [False, False, False, False, False, False, False, True, False, False, False, False],\n",
" [False, False, False, False, False, True, False, False, False, False, False, False],\n",
" [True, False, False, False, False, True, False, False, False, False, False, False],\n",
" [False, False, False, False, False, False, False, False, False, False, False, False],\n",
" [False, False, False, False, False, False, True, False, False, False, False, False],\n",
" [True, False, False, False, False, False, True, False, False, False, False, False],\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def random_move(env, Choice=None):\n",
" done = False\n",
" if Choice is None:\n",
" Choice = random.randint(1,len(actions))-1\n",
" # no info variable in contest environment\n",
" obs, rew, done, _ = env.step(actions[Choice])\n",
" \n",
" return rew, done, obs, Choice\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`proccessObs` was used in image cropping and resizing, but was ultimately dropped. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def proccessObs(obs):\n",
" window_size_x = 180\n",
" window_size_y = 100\n",
" window_offset_x = 140 #int((320 - window_size_x)/2)\n",
" window_offset_y = 100 #int((224 - window_size_y)/2)\n",
" # lm = Image.fromarray(np.array(obs[window_offset_y:(window_offset_y+window_size_y),window_offset_x:(window_offset_x+window_size_x)]))\n",
" # lm.show()\n",
" # input(\"Press Enter to continue...\")\n",
" return obs[window_offset_y:(window_offset_y+window_size_y),window_offset_x:(window_offset_x+window_size_x)].flatten().tostring()\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`process_img` takes a screen from the evironment and applies the resizing, edge detection, and color correction."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def process_img(image):\n",
" #crop out the dino agent from the frame\n",
" height, width = image.shape[:2]\n",
" image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)\n",
"# print(height)\n",
"# print(width)\n",
" image = image[round(height*.25):height,round(width*.25):width] #img[y:y+h, x:x+w] \n",
" image = cv2.resize(image, (0,0), fx = 0.5, fy = 0.5) \n",
"# height, width = image.shape[:2]\n",
"# print(height)\n",
"# print(width)\n",
" image = cv2.Canny(image, threshold1 = 100, threshold2 = 200) #apply the canny edge detection\n",
" return image "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def main(observe=False):\n",
" # Set up a new TrackedEnv that can keep track of total timestamps and store\n",
" # previous best solutions.\n",
" #\n",
" # env = grc.RemoteEnv('tmp/sock')\n",
" # env = TrackedEnv(env)\n",
"\n",
" \n",
" env = retro.make(game='SonicTheHedgehog-Genesis',\n",
" state='GreenHillZone.Act1',\n",
" scenario='contest',\n",
" record=output_dir)\n",
" env = TrackedEnv(env)\n",
"\n",
" # new_ep will keep track of if a new episode should be started.\n",
" new_ep = True\n",
" # solutions is an array of successful gameplay sequences as well as the\n",
" \n",
" solutions = []\n",
" \n",
" model = buildmodel()\n",
" x_t = process_img(np.zeros((224,320,3), dtype=np.uint8))\n",
" \n",
" s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)\n",
" s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) #1*20*40*4\n",
"\n",
" initial_state = s_t\n",
" if observe :\n",
" OBSERVE = 999999999 #We keep observing, never train\n",
" epsilon = FINAL_EPSILON\n",
" print (\"Now we load weight\")\n",
" model.load_weights(\"model_final.h5\")\n",
" adam = Adam(lr=LEARNING_RATE)\n",
" model.compile(loss='mse',optimizer=adam)\n",
" print (\"Weight load successfully\") \n",
" else: #We go to training mode\n",
" OBSERVE = OBSERVATION\n",
" epsilon = load_obj(\"epsilon\") \n",
"# model.load_weights(\"model_final.h5\")\n",
" adam = Adam(lr=LEARNING_RATE)\n",
" model.compile(loss='mse',optimizer=adam)\n",
" \n",
" t = 0 # start of timesteps\n",
" D = deque()\n",
" while True:\n",
" \n",
" loss = 0\n",
" Q_sa = 0\n",
" action_index = 0\n",
" r_t = 0 #reward at 4\n",
" a_t = np.zeros([ACTIONS]) # action at t\n",
" if new_ep:\n",
" clear_output(wait=True)\n",
" print('%f%% done, reward: %f' % (env.total_steps_ever / 10000, env.record[\"Total_Score\"].mean()))\n",
" \n",
"# if (solutions and\n",
"# random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS):\n",
"# solutions = sorted(solutions, key=lambda x: np.mean(x[0]))\n",
"# best_pair = solutions[-1]\n",
"# new_rew = exploit(env, best_pair[1])\n",
"# best_pair[0].append(new_rew)\n",
"# print('replayed best with reward %f' % new_rew)\n",
"# continue\n",
"# else:\n",
" env.reset()\n",
" new_ep = False\n",
" if random.random() <= epsilon: #randomly explore an action\n",
"# print(\"----------Random Action----------\")\n",
" action_index = random.randrange(len(actions[:]))\n",
" env.actions.loc[len(env.actions)]= {'Action':action_index, 'Intention': 'Random'}\n",
"\n",
" \n",
" else: # predict the output\n",
"# print(\"----------Predicted----------\")\n",
" q = model.predict(s_t) #input a stack of 4 images, get the prediction\n",
" max_Q = np.argmax(q) # chosing index with maximum q value\n",
" action_index = max_Q \n",
" env.actions.loc[len(env.actions)]= {'Action':action_index, 'Intention': 'Predicted'}\n",
" \n",
" \n",
" #We reduced the epsilon (exploration parameter) gradually\n",
" if epsilon > FINAL_EPSILON and t > OBSERVE:\n",
" epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE \n",
" \n",
"# action_index=2;\n",
"# x_t1 ~ obs? terminal ~ done\n",
" #run the selected action and observed next state and reward\n",
"# x_t1, r_t, terminal = game_state.get_state(a_t)\n",
"# print(t,'doing action',action_index)\n",
" reward, done, obs, choice = random_move(env,Choice=action_index)\n",
" x_t1 = process_img(obs)\n",
"# show_img(obs)\n",
"# if t % FRAMERATE ==0:\n",
"# env.render()\n",
" x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x168x320x1\n",
" s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one\n",
" \n",
" D.append((s_t, action_index, reward, s_t1, done))\n",
"\n",
" if len(D) > REPLAY_MEMORY:\n",
" D.popleft()\n",
" \n",
" #only train if done observing\n",
" if t > OBSERVE: \n",
" \n",
" #sample a minibatch to train on\n",
"\n",
" minibatch = random.sample(D, BATCH)\n",
" inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3])) #32, 20, 40, 4\n",
" targets = np.zeros((inputs.shape[0], ACTIONS)) #32, 2\n",
"\n",
" #Now we do the experience replay\n",
" for i in range(0, len(minibatch)):\n",
" state_t = minibatch[i][0] # 4D stack of images\n",
" action_t = minibatch[i][1] #This is action index\n",
" reward_t = minibatch[i][2] #reward at state_t due to action_t\n",
" state_t1 = minibatch[i][3] #next state\n",
" terminal = minibatch[i][4] #wheather the agent died or survided due the action\n",
" \n",
"\n",
" inputs[i:i + 1] = state_t \n",
"\n",
" targets[i] = model.predict(state_t) # predicted q values\n",
" Q_sa = model.predict(state_t1) #predict q values for next step\n",
" \n",
" if terminal:\n",
" targets[i, action_t] = reward_t # if terminated, only equals reward\n",
" else:\n",
" targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)\n",
"\n",
" loss += model.train_on_batch(inputs, targets)\n",
" loss_df.loc[len(loss_df)] = loss\n",
" s_t = initial_state if done else s_t1 #reset game to initial frame if terminate\n",
" t = t + 1\n",
"# env.render()\n",
"# print(t, env.total_reward )\n",
"# print(env.record)\n",
" state = \"\"\n",
" if t <= OBSERVE:\n",
" state = \"observe\"\n",
" elif t > OBSERVE and t <= OBSERVE + EXPLORE:\n",
" state = \"explore\"\n",
" else:\n",
" state = \"train\"\n",
" if t % 100 == 0:\n",
" print(\"T\", t, \"/ STATE\", state,\"/ ε\", round(epsilon,3), \"/ REWARD\", round(env.total_reward),\"/ Q_MAX \" , np.max(Q_sa), \"/ Loss \", loss)\n",
"\n",
" if done:\n",
" new_ep = True\n",
" \n",
" if t % 1000 == 0:\n",
" model.save_weights(output_dir+\"model_weights.h5\", overwrite=True)\n",
"# save_obj(D,output_dir+\"D\") #saving episodes\n",
" save_obj(t,output_dir+\"time\") #caching time steps\n",
" save_obj(epsilon,output_dir+\"epsilon\") #cache epsilon to avoid repeated randomness in actions\n",
" loss_df.to_csv(output_dir+\"loss_df.csv\",index=False)\n",
" env.record.to_csv(output_dir+\"records.csv\",index=False)\n",
" env.actions.to_csv(output_dir+\"actions.csv\",index=False)\n",
" with open(\"model.json\", \"w\") as outfile:\n",
" json.dump(model.to_json(), outfile)\n",
" if t >1000000:\n",
" exit();\n",
"# # rew, new_ep = move_n_learn(env, 1)\n",
"# if not new_ep and rew <= 0:\n",
"# print('backtracking due to negative reward: %f' % rew)\n",
"# _, new_ep = move_n_learn(env, 70, left=True)\n",
"# if new_ep:\n",
"# solutions.append(([max(env.reward_history)], env.best_sequence()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"if __name__ == '__main__':\n",
" try:\n",
" main()\n",
" except gre.GymRemoteError as exc:\n",
" print('exception', exc)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Finially, there a a function that can take the hardcoded folder name of a previous run, and show you the loss over time as well as the distribution in moves between the random & model predictions. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def show_plots():\n",
" rundir = 'run-20180519-14:14'\n",
" fig, axs = plt.subplots(ncols=1,nrows =3,figsize=(15,15))\n",
" axs[0].set_title('Loss')\n",
" axs[1].set_title('Game Score progress')\n",
" loss_df = pd.read_csv(\"./\"+rundir+\"/loss_df.csv\")#.set_yscale('log')\n",
" scores_df = pd.read_csv(\"./\"+rundir+\"/records.csv\")\n",
" actions_df = pd.read_csv(\"./\"+rundir+\"/actions.csv\")\n",
" actions_df['Action'] = actions_df['Action'].astype('float') \n",
" loss_df['loss'] = loss_df['loss'].astype('float') \n",
" loss_df.plot(use_index=True,ax=axs[0]).set_yscale('log')\n",
"\n",
"\n",
"\n",
" sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Predicted'])\n",
" sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Random'])\n",
" scores_df.plot(ax=axs[1])\n",
" imgg = fig.canvas.draw()\n",
"show_plots()"
]
}
],
"metadata": {
"celltoolbar": "Raw Cell Format",
"kernelspec": {
"display_name": "reverie_agent",
"language": "python",
"name": "reverie_agent"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment