Fernando Ribeiro ipsec

## stochastic_muzero.py
# Copyright 2022 DeepMind Technologies Limited.
# Licensed under the Apache License, Version 2.0 and CC BY 4.0.
# You may not use this file except in compliance with these licenses.
# Copies of the licenses can be found at https://www.apache.org/licenses/LICENSE-2.0
# and https://creativecommons.org/licenses/by/4.0/legalcode.

"""Pseudocode description of the Stochastic MuZero algorithm.

This pseudocode was adapted from the original MuZero pseudocode.
"""

## DQN_train.py
  def train(self, TargetNet):
        if len(self.experience['s']) < self.min_experiences:
            return 0
        ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
        states = np.asarray([self.experience['s'][i] for i in ids])
        actions = np.asarray([self.experience['a'][i] for i in ids])
        rewards = np.asarray([self.experience['r'][i] for i in ids])
        states_next = np.asarray([self.experience['s2'][i] for i in ids])
        dones = np.asarray([self.experience['done'][i] for i in ids])
        value_next = np.max(TargetNet.predict(states_next), axis=1)

## atomic_counter.py
"""An atomic, thread-safe incrementing counter."""

import threading


class AtomicCounter:
    """An atomic, thread-safe incrementing counter.

    >>> counter = AtomicCounter()
    >>> counter.increment()
	# Copyright 2022 DeepMind Technologies Limited.
	# Licensed under the Apache License, Version 2.0 and CC BY 4.0.
	# You may not use this file except in compliance with these licenses.
	# Copies of the licenses can be found at https://www.apache.org/licenses/LICENSE-2.0
	# and https://creativecommons.org/licenses/by/4.0/legalcode.

	"""Pseudocode description of the Stochastic MuZero algorithm.

	This pseudocode was adapted from the original MuZero pseudocode.
	"""
	def train(self, TargetNet):
	if len(self.experience['s']) < self.min_experiences:
	return 0
	ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
	states = np.asarray([self.experience['s'][i] for i in ids])
	actions = np.asarray([self.experience['a'][i] for i in ids])
	rewards = np.asarray([self.experience['r'][i] for i in ids])
	states_next = np.asarray([self.experience['s2'][i] for i in ids])
	dones = np.asarray([self.experience['done'][i] for i in ids])
	value_next = np.max(TargetNet.predict(states_next), axis=1)
	"""An atomic, thread-safe incrementing counter."""

	import threading


	class AtomicCounter:
	"""An atomic, thread-safe incrementing counter.

	>>> counter = AtomicCounter()
	>>> counter.increment()