Skip to content

Instantly share code, notes, and snippets.

@reddragon
Created April 30, 2017 06:43
Show Gist options
  • Save reddragon/505e1a47310672245c6ff1cbe99845a4 to your computer and use it in GitHub Desktop.
Save reddragon/505e1a47310672245c6ff1cbe99845a4 to your computer and use it in GitHub Desktop.
Predicting whether there would be a goal in the next 20 steps in the ATARI Pong Game
import gym
import logging
import sys
import numpy as np
from gym import wrappers
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import matplotlib.image as mpimg
import cPickle as pickle
from math import sqrt, ceil
from torch.autograd import Variable
def visualize_grid(Xs, ubound=255.0, padding=1):
"""
Reshape a 4D tensor of image data to a grid for easy visualization.
Inputs:
- Xs: Data of shape (N, H, W, C)
- ubound: Output grid will have values scaled to the range [0, ubound]
- padding: The number of blank pixels between elements of the grid
"""
(N, H, W, C) = Xs.shape
grid_size = int(ceil(sqrt(N)))
grid_height = H * grid_size + padding * (grid_size - 1)
grid_width = W * grid_size + padding * (grid_size - 1)
grid = np.zeros((grid_height, grid_width, C))
next_idx = 0
y0, y1 = 0, H
for y in xrange(grid_size):
x0, x1 = 0, W
for x in xrange(grid_size):
if next_idx < N:
img = Xs[next_idx]
low, high = np.min(img), np.max(img)
grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
# grid[y0:y1, x0:x1] = Xs[next_idx]
next_idx += 1
x0 += W + padding
x1 += W + padding
y0 += H + padding
y1 += H + padding
# grid_max = np.max(grid)
# grid_min = np.min(grid)
# grid = ubound * (grid - grid_min) / (grid_max - grid_min)
return grid
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 1, 20)
self.fc2 = nn.Linear(1 * 191 * 141, 4)
def forward(self, x):
x = F.relu((self.conv1(x)))
x = x.view(-1, 1 * 191 * 141)
x = F.relu(self.fc2(x))
return F.log_softmax(x)
def preprocess(I):
I = I[35:195] # crop
I = I[::2,::2,0] # downsample by factor of 2
I[I == 144] = 0 # erase background (background type 1)
I[I == 109] = 0 # erase background (background type 2)
I[I != 0] = 1 # everything else (paddles, ball) just set to 1
return I.astype(np.float).ravel()
gym.undo_logger_setup()
logger = logging.getLogger()
formatter = logging.Formatter('[%(asctime)s] %(message)s')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(formatter)
logger.addHandler(handler)
# You can set the level to logging.DEBUG or logging.WARN if you
# want to change the amount of output.
logger.setLevel(logging.INFO)
outdir = 'rl-data'
env = gym.make('Pong-v0')
# env = wrappers.Monitor(env, directory=outdir, force=True)
env.seed(0)
iters = 0
total = 0
episodes = 10
# Shape of observation (210, 160, 3)
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.5)
# zero the parameter gradients
optimizer.zero_grad()
out = []
labels = []
while episodes > 0:
# Get the observation
ob = env.reset()
total = 0
ob_screens = []
episode_screens = []
while True:
iters += 1
ob, reward, done, _ = env.step(env.action_space.sample())
ob = preprocess(ob)
# ob_screens.append(ob)
episode_screens.append(ob.reshape(80, 80))
total += reward
if reward != 0:
print 'Received reward %d in iter %d. Total: %d' % (reward, iters, total)
#plt.imshow(ob.reshape(80, 80))
#plt.show()
step = 1.0 / (iters)
cur_val = 0.0
prev_screen = np.zeros((80, 80))
rem = 20
for screen in episode_screens[-22:-2]:
cur_val += step
cur_screen = screen.reshape(80, 80)
diff = cur_screen - prev_screen
out.append(diff)
# labels.append(cur_val)
labels.append(rem)
rem = rem - 1
prev_screen = cur_screen
done = True
iters = 0
episode_screens = []
if done:
print 'Done'
break
episodes = episodes - 1
d = [out, labels]
pickle.dump( d, open( "data.p", "wb" ), protocol=2 )
env.close()
import gym
import logging
import sys
import numpy as np
from gym import wrappers
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import matplotlib.image as mpimg
import cPickle as pickle
import torch.utils.data
import os as os
import imageio
from math import sqrt, ceil
from torch.autograd import Variable
from PIL import Image, ImageDraw
def preprocess(I):
I = I[35:195] # crop
I = I[::2,::2,0] # downsample by factor of 2
I[I == 144] = 0 # erase background (background type 1)
I[I == 109] = 0 # erase background (background type 2)
I[I != 0] = 1 # everything else (paddles, ball) just set to 1
return I.astype(np.float).ravel()
def opimg(I, pct):
pct = min(max(pct, 0), 1.0)
I = np.kron(I, np.ones((4,4)))
# print I.shape
I = np.append(I, np.ones((20,320)), axis=0)
img = Image.fromarray(np.uint8(I * 255))
img = img.convert('RGB')
d = ImageDraw.Draw(img)
pad = 3
maxwidth = 320
width = int(maxwidth * pct)
h = pct
green = int(max(2 * (h-0.5), 0) * 255)
red = int(max(2 * (0.5-h), 0) * 255)
blue = int(max(4 * min(0.75-h, h-0.25), 0) * 255)
# print("h: {}, green:{}, red: {}, blue: {}".format(h, green, red, blue))
d.rectangle([(0, 320), (maxwidth, 340)], fill=(220,220,220))
d.rectangle([(pad, 320+pad), (maxwidth-pad, 340-pad)], fill=(0,0,0))
d.rectangle([(pad, 320+pad), (width-pad, 340-pad)], fill=(red,green,blue))
return img
CHECKPOINT_FILE_PATH = 'next20_ckpt'
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 11)
self.fc2 = nn.Linear(1 * 70 * 70, 1)
self.fc1 = nn.Linear(1 * 80 * 80, 1)
def forward(self, x):
x = F.relu((self.conv1(x)))
x = x.view(-1, 1 * 70 * 70)
x = self.fc2(x)
return x
def save(net, optimizer, epoch):
state = {
'state_dict': net.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
}
print ("Saving checkpoint to file '{}'" . format(CHECKPOINT_FILE_PATH))
torch.save(state, CHECKPOINT_FILE_PATH)
# Returns net, optimizer, epoch
def load():
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.5)
epoch = 0
if os.path.isfile(CHECKPOINT_FILE_PATH):
print ("Loading checkpoint from file '{}'" . format(CHECKPOINT_FILE_PATH))
checkpoint = torch.load(CHECKPOINT_FILE_PATH)
epoch = checkpoint['epoch']
net.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
return net, optimizer, epoch
net, _, _ = load()
imgs = []
d = pickle.load(open('data2.p', 'r'))
diffs = np.asarray(d[0])
exps = np.asarray(d[1])
prev = np.zeros((80,80))
gym.undo_logger_setup()
logger = logging.getLogger()
formatter = logging.Formatter('[%(asctime)s] %(message)s')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
outdir = 'rl-data'
env = gym.make('Pong-v0')
env.seed(0)
iters = 0
total = 0
episodes = 40
imgs = []
while episodes > 0:
# Get the observation
ob = env.reset()
total = 0
ob_screens = []
episode_screens = []
while True:
iters += 1
ob, reward, done, _ = env.step(env.action_space.sample())
ob = preprocess(ob)
# ob_screens.append(ob)
episode_screens.append(ob.reshape(80, 80))
total += reward
if reward != 0:
prev_screen = np.zeros((80, 80))
for screen in episode_screens[-22:-2]:
cur_screen = screen.reshape(80, 80)
diff = cur_screen - prev_screen
diff = np.expand_dims(diff, axis=0)
diff = np.expand_dims(diff, axis=0)
dt = Variable(torch.Tensor(diff))
op = net(dt).data.numpy().reshape(-1)
pct = op[0] * 1.0 / 20.0
imgs.append(np.asarray(opimg(screen, pct)))
prev_screen = cur_screen
done = True
iters = 0
episode_screens = []
if done:
break
episodes = episodes - 1
imageio.mimsave('pong-next20.gif', imgs, duration=0.15)
import gym
import logging
import sys
import numpy as np
from gym import wrappers
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import matplotlib.image as mpimg
import cPickle as pickle
import torch.utils.data
import os as os
from math import sqrt, ceil
from torch.autograd import Variable
CHECKPOINT_FILE_PATH = 'next20_ckpt'
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 1, 11)
self.fc2 = nn.Linear(1 * 70 * 70, 1)
self.fc1 = nn.Linear(1 * 80 * 80, 1)
def forward(self, x):
x = F.relu((self.conv1(x)))
x = x.view(-1, 1 * 70 * 70)
x = self.fc2(x)
return x
def save(net, optimizer, epoch):
state = {
'state_dict': net.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
}
print ("Saving checkpoint to file '{}'" . format(CHECKPOINT_FILE_PATH))
torch.save(state, CHECKPOINT_FILE_PATH)
# Returns net, optimizer, epoch
def load():
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.5)
epoch = 0
if os.path.isfile(CHECKPOINT_FILE_PATH):
print ("Loading checkpoint from file '{}'" . format(CHECKPOINT_FILE_PATH))
checkpoint = torch.load(CHECKPOINT_FILE_PATH)
epoch = checkpoint['epoch']
net.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
return net, optimizer, epoch
def visualize_grid(Xs, ubound=255.0, padding=1):
"""
Reshape a 4D tensor of image data to a grid for easy visualization.
Inputs:
- Xs: Data of shape (N, H, W, C)
- ubound: Output grid will have values scaled to the range [0, ubound]
- padding: The number of blank pixels between elements of the grid
"""
(N, H, W, C) = Xs.shape
grid_size = int(ceil(sqrt(N)))
grid_height = H * grid_size + padding * (grid_size - 1)
grid_width = W * grid_size + padding * (grid_size - 1)
grid = np.zeros((grid_height, grid_width, C))
next_idx = 0
y0, y1 = 0, H
for y in xrange(grid_size):
x0, x1 = 0, W
for x in xrange(grid_size):
if next_idx < N:
img = Xs[next_idx]
low, high = np.min(img), np.max(img)
grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
# grid[y0:y1, x0:x1] = Xs[next_idx]
next_idx += 1
x0 += W + padding
x1 += W + padding
y0 += H + padding
y1 += H + padding
# grid_max = np.max(grid)
# grid_min = np.min(grid)
# grid = ubound * (grid - grid_min) / (grid_max - grid_min)
return grid
d = pickle.load(open('data.p', 'r'))
diffs = np.asarray(d[0])
diffs = diffs.reshape(-1, 1, 80, 80)
labels = np.asarray(d[1])
print diffs.shape
prev_img = np.zeros((80, 80))
screens = []
for idx in range(len(diffs)):
cur_img = diffs[idx].reshape(80, 80) + prev_img
# plt.imshow(cur_img)
# plt.show()
prev_img = cur_img
screens.append(cur_img)
dt = torch.FloatTensor(diffs)
lt = torch.FloatTensor(labels)
print ("Loading data")
td = torch.utils.data.TensorDataset(data_tensor=dt, target_tensor=lt)
print ("Done with loading the file")
criterion = nn.SmoothL1Loss()
trainloader = torch.utils.data.DataLoader(td, batch_size=10,
shuffle=True, num_workers=2)
net, optimizer, init_epoch = load()
for epoch in range(init_epoch, 100): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
labels = labels.float()
inputs, labels = Variable(inputs), Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.data[0]
if i % 5 == 4: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 100))
running_loss = 0.0
save(net, optimizer, epoch)
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
print np.count_nonzero(inputs.numpy())
outputs = net(Variable(inputs))
print labels
print outputs
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment