If running headless, execute:
xvfb-run -s "-screen 0 1400x900x24" python random_agent.py -b /tmp/random-agent
And then run:
python uploader.py -b /tmp/random-agent -w https://gist.github.com/gdb/62d8d8f5e13270d4b116336ae61240db -a random-v3
import logging | |
import multiprocessing | |
import os | |
import shutil | |
import signal | |
import tempfile | |
import gym | |
from gym import monitoring | |
logger = logging.getLogger(__name__) | |
pool = None | |
class EnvRunner(object): | |
def __init__(self, algorithm_id, training_callable, complete_callable, base_dir=None, video_callable=None, processes=None, env_ids=None): | |
global pool | |
self.base_dir = base_dir or tempfile.mkdtemp() | |
self.training_callable = training_callable | |
self.complete_callable = complete_callable | |
self.algorithm_id = algorithm_id | |
self.video_callable = video_callable | |
if env_ids is not None: | |
self.specs = [gym.spec(env_id) for env_id in env_ids] | |
else: | |
self.specs = gym.envs.registry.all() | |
self.selected_specs = None | |
processes = processes or max(1, multiprocessing.cpu_count() - 1) | |
if not pool: | |
pool = multiprocessing.Pool(processes) | |
def run(self): | |
self.select_specs() | |
self.train() | |
def train(self): | |
work = [] | |
for i, (spec, training_dir) in enumerate(self.selected_specs): | |
work.append((self, i, spec, training_dir)) | |
try: | |
pool.map(run_training, work) | |
except KeyboardInterrupt: | |
pool.terminate() | |
pool.join() | |
raise | |
def select_specs(self): | |
specs = self.specs | |
selected_specs = [] | |
for i, spec in enumerate(specs): | |
training_dir = self.env_dir(spec.id) | |
results = monitoring.load_results(training_dir) | |
if results and self.complete_callable(results): | |
logger.info('Skipping already-processed %s', spec.id) | |
continue | |
elif os.path.exists(training_dir): | |
shutil.rmtree(training_dir) | |
selected_specs.append((spec, training_dir)) | |
self.selected_specs = selected_specs | |
def env_dir(self, id): | |
return os.path.join(self.base_dir, id) | |
# Actually run the training (in the worker) | |
def run_training((self, i, spec, training_dir)): | |
signal.signal(signal.SIGINT, signal.SIG_IGN) | |
logger.info('i=%s id=%s total=%s', i, spec.id, len(self.selected_specs)) | |
env = spec.make() | |
env.monitor.start(training_dir, | |
video_callable=self.video_callable) | |
self.training_callable(env) | |
# Dump monitor info to disk | |
env.monitor.close() |
#!/usr/bin/env python | |
import argparse | |
import logging | |
import sys | |
import gym | |
import env_runner | |
# In modules, use `logger = logging.getLogger(__name__)` | |
logger = logging.getLogger() | |
logger.addHandler(logging.StreamHandler(sys.stderr)) | |
class RandomAgent(object): | |
def __init__(self, action_space): | |
self.action_space = action_space | |
def act(self, observation, reward, done): | |
return self.action_space.sample() | |
def complete(results): | |
return len(results['episode_lengths']) == 250 | |
def run_random(env): | |
episode_count = 250 | |
agent = RandomAgent(env.action_space) | |
for i in xrange(episode_count): | |
ob = env.reset() | |
reward = done = None | |
while True: | |
action = agent.act(ob, reward, done) | |
ob, reward, done, _ = env.step(action) | |
if done: | |
break | |
def first_ten(id): | |
return id < 10 | |
def main(): | |
parser = argparse.ArgumentParser(description=None) | |
parser.add_argument('-b', '--base-dir', help='Set base dir.') | |
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') | |
args = parser.parse_args() | |
if args.verbosity == 0: | |
logger.setLevel(logging.INFO) | |
elif args.verbosity >= 1: | |
logger.setLevel(logging.DEBUG) | |
runner = env_runner.EnvRunner('random-v3', run_random, complete, base_dir=args.base_dir, video_callable=first_ten) | |
runner.run() | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
#!/usr/bin/env python | |
import argparse | |
import logging | |
import os | |
import sys | |
import gym | |
import env_runner | |
# In modules, use `logger = logging.getLogger(__name__)` | |
logger = logging.getLogger() | |
logger.addHandler(logging.StreamHandler(sys.stderr)) | |
class Uploader(object): | |
def __init__(self, base_dir, algorithm_id, writeup): | |
self.base_dir = base_dir | |
self.algorithm_id = algorithm_id | |
self.writeup = writeup | |
def run(self): | |
for entry in os.listdir(self.base_dir): | |
if entry in ['.', '..']: | |
continue | |
training_dir = os.path.join(self.base_dir, entry) | |
if not os.path.isdir(training_dir): | |
logger.info('Skipping: {}'.format(training_dir)) | |
continue | |
gym.upload(training_dir, algorithm_id=self.algorithm_id, writeup=self.writeup) | |
def main(): | |
parser = argparse.ArgumentParser(description=None) | |
parser.add_argument('-b', '--base-dir', required=True, help='Set base dir.') | |
parser.add_argument('-a', '--algorithm_id', required=True, help='Set the algorithm id.') | |
parser.add_argument('-w', '--writeup', help='Writeup to attach.') | |
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') | |
args = parser.parse_args() | |
if args.verbosity == 0: | |
logger.setLevel(logging.INFO) | |
elif args.verbosity >= 1: | |
logger.setLevel(logging.DEBUG) | |
runner = Uploader(base_dir=args.base_dir, algorithm_id=args.algorithm_id, writeup=args.writeup) | |
runner.run() | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
Fedorovigor - you have to change the env.monitor.close call in line 74 or 75 right after to env.close as well