Skip to content

Instantly share code, notes, and snippets.

@wassname
Created September 26, 2017 13:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wassname/034da69285524e206422759a76ee7b48 to your computer and use it in GitHub Desktop.
Save wassname/034da69285524e206422759a76ee7b48 to your computer and use it in GitHub Desktop.
tensorforce tqdm progress callback
from tqdm import tqdm_notebook, tqdm
class StepsProgressBarNotebook(object):
"""
HTML5 logger for tensorforce using tqdm_notebook for jupyter-notebook.
Usage:
`runner.run(episodes=np.inf, episode_finished=StepsProgressBar(steps=1e9, print_every=1000))`
"""
def __init__(self, steps, print_every=None, mean_of=100):
"""
steps - stop after this many total steps
print_every - print the mean metrics every log_intv episodes
mean_of - progress bar shows the reward as mean of the last n episodes
"""
self.print_every = print_every
self.mean_of = mean_of
self.steps = steps
self.progbar = tqdm_notebook(desc='',
total=steps,
leave=True,
# mininterval=1,
unit='steps'
)
def __call__(self, r):
desc = "reward: {reward: 2.4f} [{rewards_min: 2.0f}, {rewards_max: 2.0f}] episodes: {episodes}".format(
reward=np.mean(r.episode_rewards[-self.mean_of:]),
rewards_min=np.min(r.episode_rewards[-self.mean_of:]),
rewards_max=np.max(r.episode_rewards[-self.mean_of:]),
episodes=r.episode,
)
self.progbar.desc = desc
self.progbar.update(r.episode_lengths[-1])
if self.print_every and r.total_timesteps%self.print_every==0:
print(self.progbar.desc)
return r.total_timesteps<self.steps
class StepsProgressBar(object):
"""
Progress bar for tensorforce that stop after N steps.
Usage:
`runner.run(episodes=np.inf, episode_finished=StepsProgressBar(steps=1e9, print_every=1000))`
"""
def __init__(self, steps, print_every=None, mean_of=100):
"""
steps - stop after this many total steps
print_every - print the mean metrics every log_intv episodes
mean_of - progress bar shows the reward as mean of the last n episodes
"""
self.print_every = print_every
self.mean_of = mean_of
self.steps = steps
self.progbar = tqdm(desc='',
total=steps,
leave=True,
# mininterval=1,
unit='steps'
)
def __call__(self, r):
desc = "reward: {reward: 2.4f} [{rewards_min: 2.0f}, {rewards_max: 2.0f}] episodes: {episodes}".format(
reward=np.mean(r.episode_rewards[-self.mean_of:]),
rewards_min=np.min(r.episode_rewards[-self.mean_of:]),
rewards_max=np.max(r.episode_rewards[-self.mean_of:]),
episodes=r.episode,
)
self.progbar.desc = desc
self.progbar.update(r.episode_lengths[-1])
if self.print_every and r.total_timesteps%self.print_every==0:
print(self.progbar.desc)
return r.total_timesteps<self.steps
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment