If running headless, execute:
xvfb-run -s "-screen 0 1400x900x24" python random_agent.py -b /tmp/random-agent
And then run:
python uploader.py -b /tmp/random-agent -w https://gist.github.com/gdb/62d8d8f5e13270d4b116336ae61240db -a random-v3
import logging | |
import multiprocessing | |
import os | |
import shutil | |
import signal | |
import tempfile | |
import gym | |
from gym import monitoring | |
logger = logging.getLogger(__name__) | |
pool = None | |
class EnvRunner(object): | |
def __init__(self, algorithm_id, training_callable, complete_callable, base_dir=None, video_callable=None, processes=None, env_ids=None): | |
global pool | |
self.base_dir = base_dir or tempfile.mkdtemp() | |
self.training_callable = training_callable | |
self.complete_callable = complete_callable | |
self.algorithm_id = algorithm_id | |
self.video_callable = video_callable | |
if env_ids is not None: | |
self.specs = [gym.spec(env_id) for env_id in env_ids] | |
else: | |
self.specs = gym.envs.registry.all() | |
self.selected_specs = None | |
processes = processes or max(1, multiprocessing.cpu_count() - 1) | |
if not pool: | |
pool = multiprocessing.Pool(processes) | |
def run(self): | |
self.select_specs() | |
self.train() | |
def train(self): | |
work = [] | |
for i, (spec, training_dir) in enumerate(self.selected_specs): | |
work.append((self, i, spec, training_dir)) | |
try: | |
pool.map(run_training, work) | |
except KeyboardInterrupt: | |
pool.terminate() | |
pool.join() | |
raise | |
def select_specs(self): | |
specs = self.specs | |
selected_specs = [] | |
for i, spec in enumerate(specs): | |
training_dir = self.env_dir(spec.id) | |
results = monitoring.load_results(training_dir) | |
if results and self.complete_callable(results): | |
logger.info('Skipping already-processed %s', spec.id) | |
continue | |
elif os.path.exists(training_dir): | |
shutil.rmtree(training_dir) | |
selected_specs.append((spec, training_dir)) | |
self.selected_specs = selected_specs | |
def env_dir(self, id): | |
return os.path.join(self.base_dir, id) | |
# Actually run the training (in the worker) | |
def run_training((self, i, spec, training_dir)): | |
signal.signal(signal.SIGINT, signal.SIG_IGN) | |
logger.info('i=%s id=%s total=%s', i, spec.id, len(self.selected_specs)) | |
env = spec.make() | |
env.monitor.start(training_dir, | |
video_callable=self.video_callable) | |
self.training_callable(env) | |
# Dump monitor info to disk | |
env.monitor.close() |
#!/usr/bin/env python | |
import argparse | |
import logging | |
import sys | |
import gym | |
import env_runner | |
# In modules, use `logger = logging.getLogger(__name__)` | |
logger = logging.getLogger() | |
logger.addHandler(logging.StreamHandler(sys.stderr)) | |
class RandomAgent(object): | |
def __init__(self, action_space): | |
self.action_space = action_space | |
def act(self, observation, reward, done): | |
return self.action_space.sample() | |
def complete(results): | |
return len(results['episode_lengths']) == 250 | |
def run_random(env): | |
episode_count = 250 | |
agent = RandomAgent(env.action_space) | |
for i in xrange(episode_count): | |
ob = env.reset() | |
reward = done = None | |
while True: | |
action = agent.act(ob, reward, done) | |
ob, reward, done, _ = env.step(action) | |
if done: | |
break | |
def first_ten(id): | |
return id < 10 | |
def main(): | |
parser = argparse.ArgumentParser(description=None) | |
parser.add_argument('-b', '--base-dir', help='Set base dir.') | |
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') | |
args = parser.parse_args() | |
if args.verbosity == 0: | |
logger.setLevel(logging.INFO) | |
elif args.verbosity >= 1: | |
logger.setLevel(logging.DEBUG) | |
runner = env_runner.EnvRunner('random-v3', run_random, complete, base_dir=args.base_dir, video_callable=first_ten) | |
runner.run() | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
#!/usr/bin/env python | |
import argparse | |
import logging | |
import os | |
import sys | |
import gym | |
import env_runner | |
# In modules, use `logger = logging.getLogger(__name__)` | |
logger = logging.getLogger() | |
logger.addHandler(logging.StreamHandler(sys.stderr)) | |
class Uploader(object): | |
def __init__(self, base_dir, algorithm_id, writeup): | |
self.base_dir = base_dir | |
self.algorithm_id = algorithm_id | |
self.writeup = writeup | |
def run(self): | |
for entry in os.listdir(self.base_dir): | |
if entry in ['.', '..']: | |
continue | |
training_dir = os.path.join(self.base_dir, entry) | |
if not os.path.isdir(training_dir): | |
logger.info('Skipping: {}'.format(training_dir)) | |
continue | |
gym.upload(training_dir, algorithm_id=self.algorithm_id, writeup=self.writeup) | |
def main(): | |
parser = argparse.ArgumentParser(description=None) | |
parser.add_argument('-b', '--base-dir', required=True, help='Set base dir.') | |
parser.add_argument('-a', '--algorithm_id', required=True, help='Set the algorithm id.') | |
parser.add_argument('-w', '--writeup', help='Writeup to attach.') | |
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') | |
args = parser.parse_args() | |
if args.verbosity == 0: | |
logger.setLevel(logging.INFO) | |
elif args.verbosity >= 1: | |
logger.setLevel(logging.DEBUG) | |
runner = Uploader(base_dir=args.base_dir, algorithm_id=args.algorithm_id, writeup=args.writeup) | |
runner.run() | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
i think there is some issue with your solution. i noticed from watching the recorded snippet on https://gym.openai.com/evaluations/eval_9ZnTspgvSYK6O0PCkEFYQ that your agent is not actually moving the pointer on the input tape suggesting that it relies (solely) on the simulations to pick the correct output instead of learning the algorithm. I wonder if you agree with that observation and if so any ideas to improve it. Thanks for the writeup and code!
Fedorovigor - you have to change the env.monitor.close call in line 74 or 75 right after to env.close as well
Hello Greg,
My name is Igor Fedorov. I am trying to repeat your results and I have some troubles with it. On my computer I created a folder and copied three files to it. Then, I run xvfb-run -s "-screen 0 1400x900x24" python random_agent.py -b /tmp/random-agent . I get the following message, which I am not sure how to fix:
gym.error.Error: env.monitor has been deprecated as of 12/23/2016. Remove your call to
env.monitor.start(directory)
and instead wrap your env withenv = gym.wrappers.Monitor(env, directory)
to record data.[2017-01-10 20:58:52,640] i=192 id=Carnival-ram-v0 total=759
i=192 id=Carnival-ram-v0 total=759
I tried to change line 72 in env_runner.py (line 72,73: env.monitor.start(training_dir,
video_callable=self.video_callable) to env = gym.wrappers.Monitor(env, training_dir) but it did not work.
Could you help me to fix this problem, please?
Sincerely,
Igor Fedorov