Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@gdb

gdb/README.md Secret

Last active July 31, 2017 06:40
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save gdb/62d8d8f5e13270d4b116336ae61240db to your computer and use it in GitHub Desktop.
Save gdb/62d8d8f5e13270d4b116336ae61240db to your computer and use it in GitHub Desktop.
Random agent run on all environments

RandomAgent

If running headless, execute:

xvfb-run -s "-screen 0 1400x900x24" python random_agent.py -b /tmp/random-agent

And then run:

python uploader.py -b /tmp/random-agent -w https://gist.github.com/gdb/62d8d8f5e13270d4b116336ae61240db -a random-v3
import logging
import multiprocessing
import os
import shutil
import signal
import tempfile
import gym
from gym import monitoring
logger = logging.getLogger(__name__)
pool = None
class EnvRunner(object):
def __init__(self, algorithm_id, training_callable, complete_callable, base_dir=None, video_callable=None, processes=None, env_ids=None):
global pool
self.base_dir = base_dir or tempfile.mkdtemp()
self.training_callable = training_callable
self.complete_callable = complete_callable
self.algorithm_id = algorithm_id
self.video_callable = video_callable
if env_ids is not None:
self.specs = [gym.spec(env_id) for env_id in env_ids]
else:
self.specs = gym.envs.registry.all()
self.selected_specs = None
processes = processes or max(1, multiprocessing.cpu_count() - 1)
if not pool:
pool = multiprocessing.Pool(processes)
def run(self):
self.select_specs()
self.train()
def train(self):
work = []
for i, (spec, training_dir) in enumerate(self.selected_specs):
work.append((self, i, spec, training_dir))
try:
pool.map(run_training, work)
except KeyboardInterrupt:
pool.terminate()
pool.join()
raise
def select_specs(self):
specs = self.specs
selected_specs = []
for i, spec in enumerate(specs):
training_dir = self.env_dir(spec.id)
results = monitoring.load_results(training_dir)
if results and self.complete_callable(results):
logger.info('Skipping already-processed %s', spec.id)
continue
elif os.path.exists(training_dir):
shutil.rmtree(training_dir)
selected_specs.append((spec, training_dir))
self.selected_specs = selected_specs
def env_dir(self, id):
return os.path.join(self.base_dir, id)
# Actually run the training (in the worker)
def run_training((self, i, spec, training_dir)):
signal.signal(signal.SIGINT, signal.SIG_IGN)
logger.info('i=%s id=%s total=%s', i, spec.id, len(self.selected_specs))
env = spec.make()
env.monitor.start(training_dir,
video_callable=self.video_callable)
self.training_callable(env)
# Dump monitor info to disk
env.monitor.close()
#!/usr/bin/env python
import argparse
import logging
import sys
import gym
import env_runner
# In modules, use `logger = logging.getLogger(__name__)`
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler(sys.stderr))
class RandomAgent(object):
def __init__(self, action_space):
self.action_space = action_space
def act(self, observation, reward, done):
return self.action_space.sample()
def complete(results):
return len(results['episode_lengths']) == 250
def run_random(env):
episode_count = 250
agent = RandomAgent(env.action_space)
for i in xrange(episode_count):
ob = env.reset()
reward = done = None
while True:
action = agent.act(ob, reward, done)
ob, reward, done, _ = env.step(action)
if done:
break
def first_ten(id):
return id < 10
def main():
parser = argparse.ArgumentParser(description=None)
parser.add_argument('-b', '--base-dir', help='Set base dir.')
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
args = parser.parse_args()
if args.verbosity == 0:
logger.setLevel(logging.INFO)
elif args.verbosity >= 1:
logger.setLevel(logging.DEBUG)
runner = env_runner.EnvRunner('random-v3', run_random, complete, base_dir=args.base_dir, video_callable=first_ten)
runner.run()
return 0
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python
import argparse
import logging
import os
import sys
import gym
import env_runner
# In modules, use `logger = logging.getLogger(__name__)`
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler(sys.stderr))
class Uploader(object):
def __init__(self, base_dir, algorithm_id, writeup):
self.base_dir = base_dir
self.algorithm_id = algorithm_id
self.writeup = writeup
def run(self):
for entry in os.listdir(self.base_dir):
if entry in ['.', '..']:
continue
training_dir = os.path.join(self.base_dir, entry)
if not os.path.isdir(training_dir):
logger.info('Skipping: {}'.format(training_dir))
continue
gym.upload(training_dir, algorithm_id=self.algorithm_id, writeup=self.writeup)
def main():
parser = argparse.ArgumentParser(description=None)
parser.add_argument('-b', '--base-dir', required=True, help='Set base dir.')
parser.add_argument('-a', '--algorithm_id', required=True, help='Set the algorithm id.')
parser.add_argument('-w', '--writeup', help='Writeup to attach.')
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
args = parser.parse_args()
if args.verbosity == 0:
logger.setLevel(logging.INFO)
elif args.verbosity >= 1:
logger.setLevel(logging.DEBUG)
runner = Uploader(base_dir=args.base_dir, algorithm_id=args.algorithm_id, writeup=args.writeup)
runner.run()
return 0
if __name__ == '__main__':
sys.exit(main())
Copy link

ghost commented Jan 11, 2017

Hello Greg,

My name is Igor Fedorov. I am trying to repeat your results and I have some troubles with it. On my computer I created a folder and copied three files to it. Then, I run xvfb-run -s "-screen 0 1400x900x24" python random_agent.py -b /tmp/random-agent . I get the following message, which I am not sure how to fix:

gym.error.Error: env.monitor has been deprecated as of 12/23/2016. Remove your call to env.monitor.start(directory) and instead wrap your env with env = gym.wrappers.Monitor(env, directory) to record data.
[2017-01-10 20:58:52,640] i=192 id=Carnival-ram-v0 total=759
i=192 id=Carnival-ram-v0 total=759

I tried to change line 72 in env_runner.py (line 72,73: env.monitor.start(training_dir,
video_callable=self.video_callable) to env = gym.wrappers.Monitor(env, training_dir) but it did not work.

Could you help me to fix this problem, please?

Sincerely,
Igor Fedorov

@falcondai
Copy link

i think there is some issue with your solution. i noticed from watching the recorded snippet on https://gym.openai.com/evaluations/eval_9ZnTspgvSYK6O0PCkEFYQ that your agent is not actually moving the pointer on the input tape suggesting that it relies (solely) on the simulations to pick the correct output instead of learning the algorithm. I wonder if you agree with that observation and if so any ideas to improve it. Thanks for the writeup and code!

@bdbabiak
Copy link

Fedorovigor - you have to change the env.monitor.close call in line 74 or 75 right after to env.close as well

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment