tristansokol/calc-reward.py

## calc-reward.py
##############################################################
# Calculate the average reward of a set of runs by averaging all the runs, similar to the contest scoring
#
# $ python3 ./scripts/calc_reward.py jerk-agentv12
# 99.900000% done, reward: 6970.131268
#
# https://medium.com/@tristansokol/making-fun-visuals-history-maps-and-other-tool-improvements-eb5ffe187fd3
##############################################################

import numpy as np
import os
dir_path = os.getcwd()

import sys
firstarg=sys.argv[1]

r, _, _ = np.loadtxt(dir_path+'/results/'+firstarg+'/monitor.csv', delimiter=',', unpack=True, skiprows=1)
l, _ = np.loadtxt(dir_path+'/results/'+firstarg+'/log.csv', delimiter=',', unpack=True, skiprows=1)
print('%f%% done, reward: %f' % (max(l)/10000 ,np.mean(r)))

## loss-n-moves-distribution.py
#############################################################################
# Takes a stored array of actions made from learning and prediction and plots them, also plots loss.
#
# You'll need an actions.csv with two columns, an enumerated action and a label ('Predicted & Random')
# Example move distribution: https://cdn-images-1.medium.com/max/1600/1*xXjXwhP_eJe5Is9oG_lcOg.png
# Example loss graph: https://cdn-images-1.medium.com/max/1600/1*3SoU7ILDnjLVSf24jN7CSA.png
#
# https://medium.com/@tristansokol/discovering-q-learning-f7780a77b927
#############################################################################
def show_plots():
    rundir = 'run-20180519-14:14'
    fig, axs = plt.subplots(ncols=1,nrows =3,figsize=(15,15))
    axs[0].set_title('Loss')
    axs[1].set_title('Game Score progress')
    loss_df = pd.read_csv("./"+rundir+"/loss_df.csv")#.set_yscale('log')
    scores_df = pd.read_csv("./"+rundir+"/records.csv")
    actions_df = pd.read_csv("./"+rundir+"/actions.csv")
    actions_df['Action'] = actions_df['Action'].astype('float')
    loss_df['loss'] = loss_df['loss'].astype('float')
    loss_df.plot(use_index=True,ax=axs[0]).set_yscale('log')


    sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Predicted'])
    sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Random'])
    scores_df.plot(ax=axs[1])
    imgg = fig.canvas.draw()
show_plots()

## map-paths.py
##############################################################
# Create a visualization of Sonic episodes in a level
# You'll need an level map for the episode you are using (in the code I have it as 01.PNG)
#
# See an example: https://cdn-images-1.medium.com/max/2000/1*HBbNOiCYuylkYpkRyCknrA.jpeg
#
# https://medium.com/@tristansokol/making-fun-visuals-history-maps-and-other-tool-improvements-eb5ffe187fd3
##############################################################

#!/usr/bin/python

import sys
import retro
import numpy as np
from os import listdir
from os.path import isfile, join, isdir, dirname, realpath
from PIL import Image


# find level maps here: http://info.sonicretro.org/Sonic_the_Hedgehog_(16-bit)_level_maps
mp = Image.open(dirname(realpath(__file__))+"/01.PNG")
mp.load()
level_map =np.array(mp.convert(mode='RGB'), dtype="uint8" )
hf = 10 # highlight factor

def render(file):
    movie = retro.Movie(file)
    movie.step()
    env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL)
    env.initial_state = movie.get_state()
    env.reset()
    while movie.step():
        keys = []
        for i in range(env.NUM_BUTTONS):
            keys.append(movie.get_key(i))
        _obs, _rew, _done, _info = env.step(keys)
        y = _info['y']
        x = _info['x']

        highlight = [[[min(x[0][0]+hf,255), min(x[0][1]+hf,255), min(x[0][2]+hf,255)],
        [min(x[1][0]+hf,255), min(x[1][1]+hf,255), min(x[1][2]+hf,255)],
        [min(x[2][0]+hf,255), min(x[2][1]+hf,255), min(x[2][2]+hf,255)],
        [min(x[3][0]+hf,255), min(x[3][1]+hf,255), min(x[3][2]+hf,255)],
        [min(x[4][0]+hf,255), min(x[4][1]+hf,255), min(x[4][2]+hf,255)],
        [min(x[5][0]+hf,255), min(x[5][1]+hf,255), min(x[5][2]+hf,255)],
        [min(x[6][0]+hf,255), min(x[6][1]+hf,255), min(x[6][2]+hf,255)],
        [min(x[7][0]+hf,255), min(x[7][1]+hf,255), min(x[7][2]+hf,255)],
        ] for x in level_map[y:(y+8), x:(x+8)]]

        level_map[y:(y+8), x:(x+8)] = highlight

    env.close()

if isdir(sys.argv[1]):
    onlyfiles = [f for f in listdir(sys.argv[1]) if isfile(join(sys.argv[1], f))]
    onlyfiles.sort()
    c = 0
    for file in onlyfiles:
        if ".bk2" in file :
            print('playing', file)
            render(sys.argv[1]+file)
        if c % 5==0:
            lm = Image.fromarray(level_map)
            lm.save('levelmap.jpeg')
        c+=1
    lm.show()
else:
    print('playing', sys.argv[1])
    render(sys.argv[1])
    lm = Image.fromarray(level_map)
    # lm.save('levelmap.jpeg')
    lm.show()

## plot.py
##############################################################
# Create a visualization of Reward and episode length from a monitor.csv
#
# See an example: https://cdn-images-1.medium.com/max/1600/1*f2Mowizhx1sit7jMip9-AQ.png
#
# https://medium.com/@tristansokol/making-fun-visuals-history-maps-and-other-tool-improvements-eb5ffe187fd3
##############################################################
import matplotlib.pyplot as plt
import numpy as np
import os
dir_path = os.getcwd()

r, l, t = np.loadtxt(dir_path+'/results/jerk-agentv10/monitor.csv', delimiter=',', unpack=True, skiprows=1)
fig = plt.figure(num=None, figsize=(14, 6), dpi=80, facecolor='w', edgecolor='k')

ax  = fig.add_subplot(111)
ax.plot(t, r, c='b', label='Reward',linewidth=0.5)
ax.plot(t, l, c='r', label='Episode Length',linewidth=0.5)
plt.xlabel('Wall Time')
# plt.ylabel('y')
plt.title('Reward & Episode Length')
plt.legend()
plt.show()

## render.py
#############################################################################
# Render .bk2 files with a variable speed framerate, with support for entire directories
#
# Usage: python3 ./scripts/render.py ./results/bk2/
#
# https://medium.com/@tristansokol/day-6-of-the-openai-retro-contest-playback-tooling-3844ba655919
#############################################################################
#!/usr/bin/python

import sys
import retro
from os import listdir
from os.path import isfile, join, isdir


def render(file):
    movie = retro.Movie(file)
    movie.step()

    env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL)
    env.initial_state = movie.get_state()
    env.reset()
    frame = 0
    framerate = 2
    while movie.step():
        if frame == framerate:
            env.render()
            frame = 0
        else:
            frame += 1

        keys = []
        for i in range(env.NUM_BUTTONS):
            keys.append(movie.get_key(i))
        _obs, _rew, _done, _info = env.step(keys)
    env.close()
if isdir(sys.argv[1]):
    onlyfiles = [f for f in listdir(sys.argv[1]) if isfile(join(sys.argv[1], f))]
    onlyfiles.sort()
    for file in onlyfiles:
        if ".bk2" in file :
            print('playing', file)
            render(sys.argv[1]+file)
else:
    print('playing', sys.argv[1])
    render(sys.argv[1])

## split-video.sh
#############################################################################
# Takes two videos and combines them for side by side comparisons.
#
# You'll need to convert the .bk2 to an .mp4 with retro.scripts.playback_movie
# Example: https://www.youtube.com/watch?v=03yqHZjsgcA
#
# https://medium.com/@tristansokol/running-the-ppo-baseline-and-giving-up-on-local-evaluation-1c7d171e5bc8
#############################################################################

ffmpeg \
-i results/SonicTheHedgehog-Genesis-GreenHillZone.Act1-0001.mp4 \
-i results/SonicTheHedgehog-Genesis-GreenHillZone.Act1-0729.mp4 \
-filter_complex '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]' \
-map [vid] \
-c:v libx264 \
-crf 23 \
-preset veryfast \
output.mp4
	##############################################################
	# Calculate the average reward of a set of runs by averaging all the runs, similar to the contest scoring
	#
	# $ python3 ./scripts/calc_reward.py jerk-agentv12
	# 99.900000% done, reward: 6970.131268
	#
	# https://medium.com/@tristansokol/making-fun-visuals-history-maps-and-other-tool-improvements-eb5ffe187fd3
	##############################################################

	import numpy as np
	import os
	dir_path = os.getcwd()

	import sys
	firstarg=sys.argv[1]

	r, _, _ = np.loadtxt(dir_path+'/results/'+firstarg+'/monitor.csv', delimiter=',', unpack=True, skiprows=1)
	l, _ = np.loadtxt(dir_path+'/results/'+firstarg+'/log.csv', delimiter=',', unpack=True, skiprows=1)
	print('%f%% done, reward: %f' % (max(l)/10000 ,np.mean(r)))
	#############################################################################
	# Takes a stored array of actions made from learning and prediction and plots them, also plots loss.
	#
	# You'll need an actions.csv with two columns, an enumerated action and a label ('Predicted & Random')
	# Example move distribution: https://cdn-images-1.medium.com/max/1600/1*xXjXwhP_eJe5Is9oG_lcOg.png
	# Example loss graph: https://cdn-images-1.medium.com/max/1600/1*3SoU7ILDnjLVSf24jN7CSA.png
	#
	# https://medium.com/@tristansokol/discovering-q-learning-f7780a77b927
	#############################################################################
	def show_plots():
	rundir = 'run-20180519-14:14'
	fig, axs = plt.subplots(ncols=1,nrows =3,figsize=(15,15))
	axs[0].set_title('Loss')
	axs[1].set_title('Game Score progress')
	loss_df = pd.read_csv("./"+rundir+"/loss_df.csv")#.set_yscale('log')
	scores_df = pd.read_csv("./"+rundir+"/records.csv")
	actions_df = pd.read_csv("./"+rundir+"/actions.csv")
	actions_df['Action'] = actions_df['Action'].astype('float')
	loss_df['loss'] = loss_df['loss'].astype('float')
	loss_df.plot(use_index=True,ax=axs[0]).set_yscale('log')



	sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Predicted'])
	sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Random'])
	scores_df.plot(ax=axs[1])
	imgg = fig.canvas.draw()
	show_plots()
	##############################################################
	# Create a visualization of Sonic episodes in a level
	# You'll need an level map for the episode you are using (in the code I have it as 01.PNG)
	#
	# See an example: https://cdn-images-1.medium.com/max/2000/1*HBbNOiCYuylkYpkRyCknrA.jpeg
	#
	# https://medium.com/@tristansokol/making-fun-visuals-history-maps-and-other-tool-improvements-eb5ffe187fd3
	##############################################################

	#!/usr/bin/python

	import sys
	import retro
	import numpy as np
	from os import listdir
	from os.path import isfile, join, isdir, dirname, realpath
	from PIL import Image


	# find level maps here: http://info.sonicretro.org/Sonic_the_Hedgehog_(16-bit)_level_maps
	mp = Image.open(dirname(realpath(__file__))+"/01.PNG")
	mp.load()
	level_map =np.array(mp.convert(mode='RGB'), dtype="uint8" )
	hf = 10 # highlight factor

	def render(file):
	movie = retro.Movie(file)
	movie.step()
	env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL)
	env.initial_state = movie.get_state()
	env.reset()
	while movie.step():
	keys = []
	for i in range(env.NUM_BUTTONS):
	keys.append(movie.get_key(i))
	_obs, _rew, _done, _info = env.step(keys)
	y = _info['y']
	x = _info['x']

	highlight = [[[min(x[0][0]+hf,255), min(x[0][1]+hf,255), min(x[0][2]+hf,255)],
	[min(x[1][0]+hf,255), min(x[1][1]+hf,255), min(x[1][2]+hf,255)],
	[min(x[2][0]+hf,255), min(x[2][1]+hf,255), min(x[2][2]+hf,255)],
	[min(x[3][0]+hf,255), min(x[3][1]+hf,255), min(x[3][2]+hf,255)],
	[min(x[4][0]+hf,255), min(x[4][1]+hf,255), min(x[4][2]+hf,255)],
	[min(x[5][0]+hf,255), min(x[5][1]+hf,255), min(x[5][2]+hf,255)],
	[min(x[6][0]+hf,255), min(x[6][1]+hf,255), min(x[6][2]+hf,255)],
	[min(x[7][0]+hf,255), min(x[7][1]+hf,255), min(x[7][2]+hf,255)],
	] for x in level_map[y:(y+8), x:(x+8)]]

	level_map[y:(y+8), x:(x+8)] = highlight

	env.close()

	if isdir(sys.argv[1]):
	onlyfiles = [f for f in listdir(sys.argv[1]) if isfile(join(sys.argv[1], f))]
	onlyfiles.sort()
	c = 0
	for file in onlyfiles:
	if ".bk2" in file :
	print('playing', file)
	render(sys.argv[1]+file)
	if c % 5==0:
	lm = Image.fromarray(level_map)
	lm.save('levelmap.jpeg')
	c+=1
	lm.show()
	else:
	print('playing', sys.argv[1])
	render(sys.argv[1])
	lm = Image.fromarray(level_map)
	# lm.save('levelmap.jpeg')
	lm.show()
	##############################################################
	# Create a visualization of Reward and episode length from a monitor.csv
	#
	# See an example: https://cdn-images-1.medium.com/max/1600/1*f2Mowizhx1sit7jMip9-AQ.png
	#
	# https://medium.com/@tristansokol/making-fun-visuals-history-maps-and-other-tool-improvements-eb5ffe187fd3
	##############################################################
	import matplotlib.pyplot as plt
	import numpy as np
	import os
	dir_path = os.getcwd()

	r, l, t = np.loadtxt(dir_path+'/results/jerk-agentv10/monitor.csv', delimiter=',', unpack=True, skiprows=1)
	fig = plt.figure(num=None, figsize=(14, 6), dpi=80, facecolor='w', edgecolor='k')

	ax = fig.add_subplot(111)
	ax.plot(t, r, c='b', label='Reward',linewidth=0.5)
	ax.plot(t, l, c='r', label='Episode Length',linewidth=0.5)
	plt.xlabel('Wall Time')
	# plt.ylabel('y')
	plt.title('Reward & Episode Length')
	plt.legend()
	plt.show()
	#############################################################################
	# Render .bk2 files with a variable speed framerate, with support for entire directories
	#
	# Usage: python3 ./scripts/render.py ./results/bk2/
	#
	# https://medium.com/@tristansokol/day-6-of-the-openai-retro-contest-playback-tooling-3844ba655919
	#############################################################################
	#!/usr/bin/python

	import sys
	import retro
	from os import listdir
	from os.path import isfile, join, isdir


	def render(file):
	movie = retro.Movie(file)
	movie.step()

	env = retro.make(game=movie.get_game(), state=retro.STATE_NONE, use_restricted_actions=retro.ACTIONS_ALL)
	env.initial_state = movie.get_state()
	env.reset()
	frame = 0
	framerate = 2
	while movie.step():
	if frame == framerate:
	env.render()
	frame = 0
	else:
	frame += 1

	keys = []
	for i in range(env.NUM_BUTTONS):
	keys.append(movie.get_key(i))
	_obs, _rew, _done, _info = env.step(keys)
	env.close()
	if isdir(sys.argv[1]):
	onlyfiles = [f for f in listdir(sys.argv[1]) if isfile(join(sys.argv[1], f))]
	onlyfiles.sort()
	for file in onlyfiles:
	if ".bk2" in file :
	print('playing', file)
	render(sys.argv[1]+file)
	else:
	print('playing', sys.argv[1])
	render(sys.argv[1])
	#############################################################################
	# Takes two videos and combines them for side by side comparisons.
	#
	# You'll need to convert the .bk2 to an .mp4 with retro.scripts.playback_movie
	# Example: https://www.youtube.com/watch?v=03yqHZjsgcA
	#
	# https://medium.com/@tristansokol/running-the-ppo-baseline-and-giving-up-on-local-evaluation-1c7d171e5bc8
	#############################################################################

	ffmpeg \
	-i results/SonicTheHedgehog-Genesis-GreenHillZone.Act1-0001.mp4 \
	-i results/SonicTheHedgehog-Genesis-GreenHillZone.Act1-0729.mp4 \
	-filter_complex '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]' \
	-map [vid] \
	-c:v libx264 \
	-crf 23 \
	-preset veryfast \
	output.mp4