Anirban Santara Santara

## rollout.py
#!/usr/bin/env python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import collections
import json
import os

## madras_experiments.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                Santara
                / madras_experiments.md
            
            
              Created
              January 16, 2020 12:53
            
          
    Experiment 1

Motivation

Train a single agent to drive three cars in Alpine
Details


Action space: steering-acceleration-brake
Car models:


## gym_madras_v2.py
"""
Gym Madras Env Wrapper.

This is an OpenAI gym environment wrapper for the MADRaS simulator. For more information on the OpenAI Gym interface, please refer to: https://gym.openai.com

Built on top of gym_torcs https://github.com/ugo-nama-kun/gym_torcs/blob/master/gym_torcs.py

The following enhancements were made for Multi-agent synchronization using exception handling:
- All the agents connect to the same TORCS engine through UDP ports
- If an agent fails to connect to the TORCS engine, it keeps on trying in a loop until successful

## gather_logs.py
import os
from glob import glob
import shutil

root_dir = '.'
for dir in glob(root_dir+'/*'):
    dir_name = dir.split('/')[-1]
    if os.path.exists(os.path.join(dir, 'log1.csv')):
        shutil.copy2(os.path.join(dir, 'log1.csv'), os.path.join(root_dir, 'logdump', dir_name+'.csv'))

## gym_torcs.py
"""
Gym interface to snakeoil3_gym.py.

Provides a gym interface to the traditional server-client model.
"""
import os
import subprocess
from mpi4py import MPI
import time

## ARS_linesearch_helpers.py
import numpy as np
import sys

def ssv2csv(tsv_filename):
    """Converts tab-separated variables into comma-separated variables.
    """
    f_in = open(tsv_filename, 'r')
    f_out = open(tsv_filename.split('.')[0]+'.csv', 'w')
    for line in f_in:
        csv_line = line.strip().split('\t')

## hcope.py
import numpy as np

def hcope_estimator(X, c, delta):
	"""
	X : float, size = (size_of_history, )
		importance weighted trajectory rewards from the behavior policy
	c : float, size = (size_of_history, )
					= scalar, if all samples from X have the same threshold
		thresholds for each random variable in x
	delta : float, size = scalar

## gradient tensorflow
import tensorflow as tf
import numpy as np

inputs1 = tf.placeholder(shape=[1,16],dtype=tf.float32)
W = tf.Variable(tf.random_uniform([16,4],0,0.01))
Qout = tf.matmul(inputs1,W)
grad_Qout_inputs1 = tf.gradients(Qout, inputs1)

init = tf.initialize_all_variables()
sess = tf.Session()

## gist:ab6f5b18f5441fe7742e6ce2fe654549
   model_G = nn.Sequential()
   model_G:add(nn.JoinTable(2, 2))
   model_G:add(cudnn.SpatialConvolutionUpsample(3+1, 64, 7, 7, 1, 1)):add(cudnn.ReLU(true))
   model_G:add(nn.SpatialBatchNormalization(64, nil, nil, false))
   model_G:add(cudnn.SpatialConvolutionUpsample(64, 368, 7, 7, 1, 4)):add(cudnn.ReLU(true))
   model_G:add(nn.SpatialBatchNormalization(368, nil, nil, false))
   model_G:add(nn.SpatialDropout(0.5))
   model_G:add(cudnn.SpatialConvolutionUpsample(368, 128, 7, 7, 1, 4)):add(cudnn.ReLU(true))
   model_G:add(nn.SpatialBatchNormalization(128, nil, nil, false))
   model_G:add(nn.FeatureLPPooling(2,2,2,true))

## gist:02d80e7efdea153ed157dc0eef3671df
----------------------------------------------------------------------
-- CIFAR 8x8
opt.scale = 8
opt.geometry = {3, opt.scale, opt.scale}
local input_sz = opt.geometry[1] * opt.geometry[2] * opt.geometry[3]
local numhid = 600
model_D = nn.Sequential()
model_D:add(nn.Reshape(input_sz))
model_D:add(nn.Linear(input_sz, numhid))
model_D:add(nn.ReLU())
	#!/usr/bin/env python

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import argparse
	import collections
	import json
	import os
	"""
	Gym Madras Env Wrapper.

	This is an OpenAI gym environment wrapper for the MADRaS simulator. For more information on the OpenAI Gym interface, please refer to: https://gym.openai.com

	Built on top of gym_torcs https://github.com/ugo-nama-kun/gym_torcs/blob/master/gym_torcs.py

	The following enhancements were made for Multi-agent synchronization using exception handling:
	- All the agents connect to the same TORCS engine through UDP ports
	- If an agent fails to connect to the TORCS engine, it keeps on trying in a loop until successful
	import os
	from glob import glob
	import shutil

	root_dir = '.'
	for dir in glob(root_dir+'/*'):
	dir_name = dir.split('/')[-1]
	if os.path.exists(os.path.join(dir, 'log1.csv')):
	shutil.copy2(os.path.join(dir, 'log1.csv'), os.path.join(root_dir, 'logdump', dir_name+'.csv'))
	"""
	Gym interface to snakeoil3_gym.py.

	Provides a gym interface to the traditional server-client model.
	"""
	import os
	import subprocess
	from mpi4py import MPI
	import time
	import numpy as np
	import sys

	def ssv2csv(tsv_filename):
	"""Converts tab-separated variables into comma-separated variables.
	"""
	f_in = open(tsv_filename, 'r')
	f_out = open(tsv_filename.split('.')[0]+'.csv', 'w')
	for line in f_in:
	csv_line = line.strip().split('\t')
	import numpy as np

	def hcope_estimator(X, c, delta):
	"""
	X : float, size = (size_of_history, )
	importance weighted trajectory rewards from the behavior policy
	c : float, size = (size_of_history, )
	= scalar, if all samples from X have the same threshold
	thresholds for each random variable in x
	delta : float, size = scalar
	import tensorflow as tf
	import numpy as np

	inputs1 = tf.placeholder(shape=[1,16],dtype=tf.float32)
	W = tf.Variable(tf.random_uniform([16,4],0,0.01))
	Qout = tf.matmul(inputs1,W)
	grad_Qout_inputs1 = tf.gradients(Qout, inputs1)

	init = tf.initialize_all_variables()
	sess = tf.Session()
	model_G = nn.Sequential()
	model_G:add(nn.JoinTable(2, 2))
	model_G:add(cudnn.SpatialConvolutionUpsample(3+1, 64, 7, 7, 1, 1)):add(cudnn.ReLU(true))
	model_G:add(nn.SpatialBatchNormalization(64, nil, nil, false))
	model_G:add(cudnn.SpatialConvolutionUpsample(64, 368, 7, 7, 1, 4)):add(cudnn.ReLU(true))
	model_G:add(nn.SpatialBatchNormalization(368, nil, nil, false))
	model_G:add(nn.SpatialDropout(0.5))
	model_G:add(cudnn.SpatialConvolutionUpsample(368, 128, 7, 7, 1, 4)):add(cudnn.ReLU(true))
	model_G:add(nn.SpatialBatchNormalization(128, nil, nil, false))
	model_G:add(nn.FeatureLPPooling(2,2,2,true))
	----------------------------------------------------------------------
	-- CIFAR 8x8
	opt.scale = 8
	opt.geometry = {3, opt.scale, opt.scale}
	local input_sz = opt.geometry[1] * opt.geometry[2] * opt.geometry[3]
	local numhid = 600
	model_D = nn.Sequential()
	model_D:add(nn.Reshape(input_sz))
	model_D:add(nn.Linear(input_sz, numhid))
	model_D:add(nn.ReLU())