cboettig/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Reproducibility problems

Install precise dependencies indicated:
pip install -r requirements.txt
Download the cached agent (alternately, skip this and re-train agent by uncommenting the learn and save steps above):
wget https://minio.thelio.carlboettiger.info/shared-data/sac_tuned.zip

Run example
python fishing_SAC_tuned.py

Example output:
mean reward: 7.756014 std: 0.0 tuned_value: 7.755079
mean reward from sims: 1.327337048649788
Note that the mean reward reported by evaluate_policy() is a nearly perfect match the
(claimed) tuned value, while the mean from env.simulate() is way less.  (Also check out
the example simulation figures in results dir, which show fish stock crashing).

  
## fishing_SAC_tuned.py
# Fishing using tuned SB3

import numpy as np
import stable_baselines3 as sb3
import gym
import gym_fishing

from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy

seed = 24
np.random.seed(seed)

# Create a fishing environment
env = gym.make("fishing-v1")

# Create an agent
hyper = {
    "gamma": 0.95,
    "learning_rate": 0.000115,
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_starts": 0,
    "train_freq": 128,
    "tau": 0.01,
    "policy_kwargs": {"log_std_init": 0.680754, "net_arch": [400, 300]},
}

agent = sb3.SAC("MlpPolicy", env, seed=seed, **hyper)

# Train the agent
#agent.learn(total_timesteps=300000)
#agent.save("sac_tuned")


agent = sb3.SAC.load("sac_tuned")

# Evaluate the trained agent
eval_env = Monitor(gym.make("fishing-v1"))
mean_reward, std_reward = evaluate_policy(agent, eval_env, n_eval_episodes=100)
print("mean reward:", mean_reward, "std:", std_reward, "tuned_value:", 7.755079)


agent_sims = env.simulate(agent, reps=100)
agent_policy = env.policyfn(agent, reps=5)

print("mean reward from sims:", np.sum(agent_sims["reward"]) / 100)

# Plot results
env.plot(agent_sims, "results/fishing_sac_tuned.png")
env.plot_policy(agent_policy, "results/fishing_sac_tuned_policy.png")

## requirements.tzt
absl-py==0.12.0
alabaster==0.7.12
alembic==1.6.5
apipkg==1.5
appdirs==1.4.4
atari-py==0.2.6
attrs==21.2.0
Babel==2.9.1
black==21.5b2
box2d-py==2.3.8
cachetools==4.2.2
certifi==2021.5.30
chardet==4.0.0
click==8.0.1
cliff==3.8.0
cloudpickle==1.6.0
cmaes==0.8.2
cmd2==1.5.0
colorama==0.4.4
colorlog==5.0.1
coverage==5.5
cycler==0.10.0
DataProperty==0.50.1
decorator==4.4.2
docutils==0.17.1
execnet==1.8.1
flake8==3.9.2
flake8-bugbear==21.4.3
google-auth==1.30.1
google-auth-oauthlib==0.4.4
greenlet==1.1.0
grpcio==1.38.0
gym==0.18.3
gym-conservation==0.0.5
gym-fishing==0.0.9
gym-minigrid==1.0.2
idna==2.10
imagesize==1.2.0
importlab==0.6.1
iniconfig==1.1.1
isort==5.8.0
Jinja2==3.0.1
joblib==1.0.1
kiwisolver==1.3.1
livereload==2.6.3
Mako==1.1.4
Markdown==3.3.4
MarkupSafe==2.0.1
matplotlib==3.4.2
mbstrdecoder==1.0.1
mccabe==0.6.1
msgfy==0.1.0
mypy-extensions==0.4.3
networkx==2.5.1
ninja==1.10.0.post2
numpy==1.20.3
oauthlib==3.1.1
opencv-python==4.5.2.52
optuna==2.7.0
packaging==20.9
pandas==1.2.4
pathspec==0.8.1
pathvalidate==2.4.1
pbr==5.6.0
Pillow==8.2.0
pluggy==0.13.1
prettytable==2.1.0
protobuf==3.17.1
psutil==5.8.0
py==1.10.0
pyaml==20.4.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pybullet==3.1.7
pycodestyle==2.7.0
pyenchant==3.2.0
pyflakes==2.3.1
pyglet==1.5.15
Pygments==2.9.0
pyparsing==2.4.7
pyperclip==1.8.2
pytablewriter==0.60.0
pytest==6.2.4
pytest-cov==2.12.1
pytest-env==0.6.2
pytest-forked==1.3.0
pytest-xdist==2.2.1
python-dateutil==2.8.1
python-editor==1.0.4
pytype==2021.5.25
pytz==2021.1
PyYAML==5.4.1
regex==2021.4.4
requests==2.25.1
requests-oauthlib==1.3.0
rsa==4.7.2
sb3-contrib==1.1.0a7
scikit-learn==0.24.2
scikit-optimize==0.8.1
scipy==1.6.3
seaborn==0.11.1
six==1.16.0
snowballstemmer==2.1.0
Sphinx==4.0.2
sphinx-autobuild==2021.3.14
sphinx-autodoc-typehints==1.12.0
sphinx-rtd-theme==0.5.2
sphinxcontrib-applehelp==1.0.2
sphinxcontrib-devhelp==1.0.2
sphinxcontrib-htmlhelp==2.0.0
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.3
sphinxcontrib-serializinghtml==1.1.5
sphinxcontrib-spelling==7.2.1
SQLAlchemy==1.4.17
stable-baselines3==1.1.0a7
stevedore==3.3.0
tabledata==1.1.4
tcolorpy==0.0.9
tensorboard==2.5.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
threadpoolctl==2.1.0
toml==0.10.2
torch==1.8.1
tornado==6.1
tqdm==4.61.0
typed-ast==1.4.3
typepy==1.1.5
typing-extensions==3.10.0.0
urllib3==1.26.5
wcwidth==0.2.5
Werkzeug==2.0.1
	# Fishing using tuned SB3

	import numpy as np
	import stable_baselines3 as sb3
	import gym
	import gym_fishing

	from stable_baselines3.common.monitor import Monitor
	from stable_baselines3.common.evaluation import evaluate_policy

	seed = 24
	np.random.seed(seed)

	# Create a fishing environment
	env = gym.make("fishing-v1")

	# Create an agent
	hyper = {
	"gamma": 0.95,
	"learning_rate": 0.000115,
	"batch_size": 128,
	"buffer_size": 100000,
	"learning_starts": 0,
	"train_freq": 128,
	"tau": 0.01,
	"policy_kwargs": {"log_std_init": 0.680754, "net_arch": [400, 300]},
	}

	agent = sb3.SAC("MlpPolicy", env, seed=seed, **hyper)

	# Train the agent
	#agent.learn(total_timesteps=300000)
	#agent.save("sac_tuned")


	agent = sb3.SAC.load("sac_tuned")

	# Evaluate the trained agent
	eval_env = Monitor(gym.make("fishing-v1"))
	mean_reward, std_reward = evaluate_policy(agent, eval_env, n_eval_episodes=100)
	print("mean reward:", mean_reward, "std:", std_reward, "tuned_value:", 7.755079)


	agent_sims = env.simulate(agent, reps=100)
	agent_policy = env.policyfn(agent, reps=5)

	print("mean reward from sims:", np.sum(agent_sims["reward"]) / 100)

	# Plot results
	env.plot(agent_sims, "results/fishing_sac_tuned.png")
	env.plot_policy(agent_policy, "results/fishing_sac_tuned_policy.png")
	absl-py==0.12.0
	alabaster==0.7.12
	alembic==1.6.5
	apipkg==1.5
	appdirs==1.4.4
	atari-py==0.2.6
	attrs==21.2.0
	Babel==2.9.1
	black==21.5b2
	box2d-py==2.3.8
	cachetools==4.2.2
	certifi==2021.5.30
	chardet==4.0.0
	click==8.0.1
	cliff==3.8.0
	cloudpickle==1.6.0
	cmaes==0.8.2
	cmd2==1.5.0
	colorama==0.4.4
	colorlog==5.0.1
	coverage==5.5
	cycler==0.10.0
	DataProperty==0.50.1
	decorator==4.4.2
	docutils==0.17.1
	execnet==1.8.1
	flake8==3.9.2
	flake8-bugbear==21.4.3
	google-auth==1.30.1
	google-auth-oauthlib==0.4.4
	greenlet==1.1.0
	grpcio==1.38.0
	gym==0.18.3
	gym-conservation==0.0.5
	gym-fishing==0.0.9
	gym-minigrid==1.0.2
	idna==2.10
	imagesize==1.2.0
	importlab==0.6.1
	iniconfig==1.1.1
	isort==5.8.0
	Jinja2==3.0.1
	joblib==1.0.1
	kiwisolver==1.3.1
	livereload==2.6.3
	Mako==1.1.4
	Markdown==3.3.4
	MarkupSafe==2.0.1
	matplotlib==3.4.2
	mbstrdecoder==1.0.1
	mccabe==0.6.1
	msgfy==0.1.0
	mypy-extensions==0.4.3
	networkx==2.5.1
	ninja==1.10.0.post2
	numpy==1.20.3
	oauthlib==3.1.1
	opencv-python==4.5.2.52
	optuna==2.7.0
	packaging==20.9
	pandas==1.2.4
	pathspec==0.8.1
	pathvalidate==2.4.1
	pbr==5.6.0
	Pillow==8.2.0
	pluggy==0.13.1
	prettytable==2.1.0
	protobuf==3.17.1
	psutil==5.8.0
	py==1.10.0
	pyaml==20.4.0
	pyasn1==0.4.8
	pyasn1-modules==0.2.8
	pybullet==3.1.7
	pycodestyle==2.7.0
	pyenchant==3.2.0
	pyflakes==2.3.1
	pyglet==1.5.15
	Pygments==2.9.0
	pyparsing==2.4.7
	pyperclip==1.8.2
	pytablewriter==0.60.0
	pytest==6.2.4
	pytest-cov==2.12.1
	pytest-env==0.6.2
	pytest-forked==1.3.0
	pytest-xdist==2.2.1
	python-dateutil==2.8.1
	python-editor==1.0.4
	pytype==2021.5.25
	pytz==2021.1
	PyYAML==5.4.1
	regex==2021.4.4
	requests==2.25.1
	requests-oauthlib==1.3.0
	rsa==4.7.2
	sb3-contrib==1.1.0a7
	scikit-learn==0.24.2
	scikit-optimize==0.8.1
	scipy==1.6.3
	seaborn==0.11.1
	six==1.16.0
	snowballstemmer==2.1.0
	Sphinx==4.0.2
	sphinx-autobuild==2021.3.14
	sphinx-autodoc-typehints==1.12.0
	sphinx-rtd-theme==0.5.2
	sphinxcontrib-applehelp==1.0.2
	sphinxcontrib-devhelp==1.0.2
	sphinxcontrib-htmlhelp==2.0.0
	sphinxcontrib-jsmath==1.0.1
	sphinxcontrib-qthelp==1.0.3
	sphinxcontrib-serializinghtml==1.1.5
	sphinxcontrib-spelling==7.2.1
	SQLAlchemy==1.4.17
	stable-baselines3==1.1.0a7
	stevedore==3.3.0
	tabledata==1.1.4
	tcolorpy==0.0.9
	tensorboard==2.5.0
	tensorboard-data-server==0.6.1
	tensorboard-plugin-wit==1.8.0
	threadpoolctl==2.1.0
	toml==0.10.2
	torch==1.8.1
	tornado==6.1
	tqdm==4.61.0
	typed-ast==1.4.3
	typepy==1.1.5
	typing-extensions==3.10.0.0
	urllib3==1.26.5
	wcwidth==0.2.5
	Werkzeug==2.0.1