Train a single agent to drive three cars in Alpine
- Action space: steering-acceleration-brake
- Car models:
#!/usr/bin/env python | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import argparse | |
import collections | |
import json | |
import os |
""" | |
Gym Madras Env Wrapper. | |
This is an OpenAI gym environment wrapper for the MADRaS simulator. For more information on the OpenAI Gym interface, please refer to: https://gym.openai.com | |
Built on top of gym_torcs https://github.com/ugo-nama-kun/gym_torcs/blob/master/gym_torcs.py | |
The following enhancements were made for Multi-agent synchronization using exception handling: | |
- All the agents connect to the same TORCS engine through UDP ports | |
- If an agent fails to connect to the TORCS engine, it keeps on trying in a loop until successful |
import os | |
from glob import glob | |
import shutil | |
root_dir = '.' | |
for dir in glob(root_dir+'/*'): | |
dir_name = dir.split('/')[-1] | |
if os.path.exists(os.path.join(dir, 'log1.csv')): | |
shutil.copy2(os.path.join(dir, 'log1.csv'), os.path.join(root_dir, 'logdump', dir_name+'.csv')) |
""" | |
Gym interface to snakeoil3_gym.py. | |
Provides a gym interface to the traditional server-client model. | |
""" | |
import os | |
import subprocess | |
from mpi4py import MPI | |
import time |
import numpy as np | |
import sys | |
def ssv2csv(tsv_filename): | |
"""Converts tab-separated variables into comma-separated variables. | |
""" | |
f_in = open(tsv_filename, 'r') | |
f_out = open(tsv_filename.split('.')[0]+'.csv', 'w') | |
for line in f_in: | |
csv_line = line.strip().split('\t') |
import numpy as np | |
def hcope_estimator(X, c, delta): | |
""" | |
X : float, size = (size_of_history, ) | |
importance weighted trajectory rewards from the behavior policy | |
c : float, size = (size_of_history, ) | |
= scalar, if all samples from X have the same threshold | |
thresholds for each random variable in x | |
delta : float, size = scalar |
import tensorflow as tf | |
import numpy as np | |
inputs1 = tf.placeholder(shape=[1,16],dtype=tf.float32) | |
W = tf.Variable(tf.random_uniform([16,4],0,0.01)) | |
Qout = tf.matmul(inputs1,W) | |
grad_Qout_inputs1 = tf.gradients(Qout, inputs1) | |
init = tf.initialize_all_variables() | |
sess = tf.Session() |
model_G = nn.Sequential() | |
model_G:add(nn.JoinTable(2, 2)) | |
model_G:add(cudnn.SpatialConvolutionUpsample(3+1, 64, 7, 7, 1, 1)):add(cudnn.ReLU(true)) | |
model_G:add(nn.SpatialBatchNormalization(64, nil, nil, false)) | |
model_G:add(cudnn.SpatialConvolutionUpsample(64, 368, 7, 7, 1, 4)):add(cudnn.ReLU(true)) | |
model_G:add(nn.SpatialBatchNormalization(368, nil, nil, false)) | |
model_G:add(nn.SpatialDropout(0.5)) | |
model_G:add(cudnn.SpatialConvolutionUpsample(368, 128, 7, 7, 1, 4)):add(cudnn.ReLU(true)) | |
model_G:add(nn.SpatialBatchNormalization(128, nil, nil, false)) | |
model_G:add(nn.FeatureLPPooling(2,2,2,true)) |
---------------------------------------------------------------------- | |
-- CIFAR 8x8 | |
opt.scale = 8 | |
opt.geometry = {3, opt.scale, opt.scale} | |
local input_sz = opt.geometry[1] * opt.geometry[2] * opt.geometry[3] | |
local numhid = 600 | |
model_D = nn.Sequential() | |
model_D:add(nn.Reshape(input_sz)) | |
model_D:add(nn.Linear(input_sz, numhid)) | |
model_D:add(nn.ReLU()) |