Pi Esposito piEsposito

## imports.py
#VizDoom general import
from vizdoom import *

#Import de libs auxiliares
import numpy as np
import matplotlib.pyplot as plt
from collections import deque

#Torch imports
import torch

## device_steup.py
#On the tutorial, we use rather use Intel MKL because of its performance and availability to be used on lower end PCs
#Anyway, if you want it, you can try it on CUDA by uncommenting it.

#use_cuda = torch.cuda.is_available()
use_cuda = False
device = torch.device('cuda' if use_cuda else 'cpu')

FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
DoubleTensor = torch.cuda.DoubleTensor if use_cuda else torch.DoubleTensor

## doom_game_setup.py
#Doom game assets: game env creation and frame resize functiom
#We are using the config and scenario paths from Thomas Simonini tutorial
#Those files are available on my GitHub repo from the post.

game = DoomGame()
game.load_config("health_gathering.cfg")
game.set_doom_scenario_path("health_gathering.wad")
game.set_seed(42)
game.init()
doom_actions = np.identity(3, dtype=int).tolist()

## frame_stacker.py
class FrameStacker:
    def __init__(self):
        """
        We can set the memory size here.
        Our memory is a deque and, on each stack, it concatenates the frames in memory along the axis 0
        We also have a transformer from torch that handles the resizing.
        """
        self.memory_size = 4
        self.memory = deque(maxlen=self.memory_size)
        self.reset()

## policy_net_doom.py
class PolicyNetwork(nn.Module):
    def __init__(self, lr):
        """
        We've put Tanh as activation in order to introduce variance on the learning
        by making the model more sensible.
        I encourage you to try other architectures, optimizers and hyperparameters
        """
        super(PolicyNetwork, self).__init__()

        self.num_actions = 3

## optimize_policy_net.py
def update_policy(policy_network, rewards, log_probs):
    discounted_rewards = []

    for t in range(len(rewards)):
        Gt = 0
        pw = 0
        for r in rewards[t:]:
            Gt = Gt + GAMMA**pw * r
            pw += 1
        discounted_rewards.append(Gt)

## tensorboard_setup.py
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(flush_secs = 40)

## global_doom_variables.py
#here we set the global variables
GAMMA = .95
EPISODES = 5000
learning_rate = 0.01

#our net and frame-stacker
stacker = FrameStacker()
policy_net = PolicyNetwork(lr=learning_rate).to(device)

#some lists to write the values

## doom_train_loop.py
for episode in range(EPISODES):
    game.new_episode()
    curr_health = game.get_state().game_variables[0]
    state = game.get_state().screen_buffer

    state = stacker.stack(state)
    log_probs = []
    rewards = []

    done = False

## goroutine_example.go
package main

import (
    "fmt"
)

func hello() {
    fmt.Println("Hello world goroutine")
}
func main() {
	#VizDoom general import
	from vizdoom import *

	#Import de libs auxiliares
	import numpy as np
	import matplotlib.pyplot as plt
	from collections import deque

	#Torch imports
	import torch
	#On the tutorial, we use rather use Intel MKL because of its performance and availability to be used on lower end PCs
	#Anyway, if you want it, you can try it on CUDA by uncommenting it.

	#use_cuda = torch.cuda.is_available()
	use_cuda = False
	device = torch.device('cuda' if use_cuda else 'cpu')

	FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
	LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
	DoubleTensor = torch.cuda.DoubleTensor if use_cuda else torch.DoubleTensor
	#Doom game assets: game env creation and frame resize functiom
	#We are using the config and scenario paths from Thomas Simonini tutorial
	#Those files are available on my GitHub repo from the post.

	game = DoomGame()
	game.load_config("health_gathering.cfg")
	game.set_doom_scenario_path("health_gathering.wad")
	game.set_seed(42)
	game.init()
	doom_actions = np.identity(3, dtype=int).tolist()
	class FrameStacker:
	def __init__(self):
	"""
	We can set the memory size here.
	Our memory is a deque and, on each stack, it concatenates the frames in memory along the axis 0
	We also have a transformer from torch that handles the resizing.
	"""
	self.memory_size = 4
	self.memory = deque(maxlen=self.memory_size)
	self.reset()
	class PolicyNetwork(nn.Module):
	def __init__(self, lr):
	"""
	We've put Tanh as activation in order to introduce variance on the learning
	by making the model more sensible.
	I encourage you to try other architectures, optimizers and hyperparameters
	"""
	super(PolicyNetwork, self).__init__()

	self.num_actions = 3
	def update_policy(policy_network, rewards, log_probs):
	discounted_rewards = []

	for t in range(len(rewards)):
	Gt = 0
	pw = 0
	for r in rewards[t:]:
	Gt = Gt + GAMMA*pw r
	pw += 1
	discounted_rewards.append(Gt)
	from torch.utils.tensorboard import SummaryWriter
	writer = SummaryWriter(flush_secs = 40)
	#here we set the global variables
	GAMMA = .95
	EPISODES = 5000
	learning_rate = 0.01

	#our net and frame-stacker
	stacker = FrameStacker()
	policy_net = PolicyNetwork(lr=learning_rate).to(device)

	#some lists to write the values
	for episode in range(EPISODES):
	game.new_episode()
	curr_health = game.get_state().game_variables[0]
	state = game.get_state().screen_buffer

	state = stacker.stack(state)
	log_probs = []
	rewards = []

	done = False
	package main

	import (
	"fmt"
	)

	func hello() {
	fmt.Println("Hello world goroutine")
	}
	func main() {