Wah Loon Keng kengz

## slm-lab-run-episode.py
def run_episode(self):
    self.env.clock.tick('epi')
    reward, state, done = self.env.reset()
    self.agent.reset(state)
    while not done:
        self.env.clock.tick('t')
        action = self.agent.act(state)
        reward, state, done = self.env.step(action)
        self.agent.update(action, reward, state, done)
    self.agent.body.log_summary()

## slm-lab-run.py
def run(self):
    while self.env.clock.get('epi') <= self.env.max_episode:
        self.run_episode()
    self.data = analysis.analyze_session(self)  # session fitness
    self.close()
    return self.data

## slm-lab-pip-install-usage.py
'''
Demo: SLM Lab as pip module for lightweight usecases

Installation:

1. Clone SLM-Lab
```
git clone https://github.com/kengz/SLM-Lab.git
cd SLM-Lab
```

## sac_networks.py
    def init_nets(self, global_nets=None):
        '''
        Networks: net(actor/policy), q1_net, target_q1_net, q2_net, target_q2_net
        All networks are separate, and have the same hidden layer architectures and optim specs, so tuning is minimal
        '''
        self.shared = False  # SAC does not share networks
        NetClass = getattr(net, self.net_spec['type'])
        # main actor network
        self.net = NetClass(self.net_spec, self.body.state_dim, net_util.get_out_dim(self.body))
        self.net_names = ['net']

## sac_losses.py
    def calc_q(self, state, action, net):
        '''Forward-pass to calculate the predicted state-action-value from q1_net.'''
        q_pred = net(state, action).view(-1)
        return q_pred

    def calc_q_targets(self, batch):
        '''Q_tar = r + gamma * (target_Q(s', a') - alpha * log pi(a'|s'))'''
        next_states = batch['next_states']
        with torch.no_grad():
            pdparams = self.calc_pdparam(next_states)

## sac_log_prob.py
    def calc_log_prob_action(self, action_pd, reparam=False):
        '''Calculate log_probs and actions with option to reparametrize from paper eq. 11'''
        samples = action_pd.rsample() if reparam else action_pd.sample()
        if self.body.is_discrete:  # this is straightforward using GumbelSoftmax
            actions = samples
            log_probs = action_pd.log_prob(actions)
        else:
            mus = samples
            actions = self.scale_action(torch.tanh(mus))
            # paper Appendix C. Enforcing Action Bounds for continuous actions

## sac_training.py
    def train_alpha(self, alpha_loss):
        '''Custom method to train the alpha variable'''
        self.alpha_lr_scheduler.step(epoch=self.body.env.clock.frame)
        self.alpha_optim.zero_grad()
        alpha_loss.backward()
        self.alpha_optim.step()
        self.alpha = self.log_alpha.detach().exp()

    def train(self):
        '''Train actor critic by computing the loss in batch efficiently'''

## sac_benchmark.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                kengz
                / sac_benchmark.md
            
            
              Last active
              August 11, 2019 18:22
            
              
                SAC benchmark
              
          
    Roboschool (continuous control) Benchmark


Note that the Roboschool reward scales are different from MuJoCo's. All results are ran with 4 sessions with distinct random seeds. mean_returns_ma is the returns moving-average over 100 checkpoints from the sessions averaged.


Env. \ SAC
mean_returns_ma
graph


RoboschoolAnt
2451.55


RoboschoolHalfCheetah
2004.27


RoboschoolHopper
2090.52


RoboschoolWalker2d
1711.92


## History|-10f1514|1Scb.json
// Place your key bindings in this file to override the defaultsauto[]
[
  {
    "key": "ctrl+cmd+down",
    "command": "editor.action.moveLinesDownAction",
    "when": "editorTextFocus && !editorReadonly"
  },
  {
    "key": "alt+down",
    "command": "-editor.action.moveLinesDownAction",

## ubuntu_gpu_server_setup.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                kengz
                / ubuntu_gpu_server_setup.md
            
            
              Last active
              June 7, 2020 05:09
            
              
                Ubuntu GPU server setup
              
          
    Ubuntu GPU Server Setup

Quick and smooth setup of Ubuntu GPU server, including proper installation of Nvidia driver.
Estimated time: < 1 hour

Download the “alternative” server image from Ubuntu: Alternative downloads | Ubuntu
Create a bootable USB stick on macOS | Ubuntu
Go to BIOS, disable secure boot. Then boot UEFI. Install Ubuntu, overwrite full partition, add SSH Server. Finish installation and login.
You can now ssh in with password. Login and install nvidia driver. Since secure boot is disable, nvidia installation should go smoothly.
	def run_episode(self):
	self.env.clock.tick('epi')
	reward, state, done = self.env.reset()
	self.agent.reset(state)
	while not done:
	self.env.clock.tick('t')
	action = self.agent.act(state)
	reward, state, done = self.env.step(action)
	self.agent.update(action, reward, state, done)
	self.agent.body.log_summary()
	def run(self):
	while self.env.clock.get('epi') <= self.env.max_episode:
	self.run_episode()
	self.data = analysis.analyze_session(self) # session fitness
	self.close()
	return self.data
	'''
	Demo: SLM Lab as pip module for lightweight usecases

	Installation:

	1. Clone SLM-Lab
	```
	git clone https://github.com/kengz/SLM-Lab.git
	cd SLM-Lab
	```
	def init_nets(self, global_nets=None):
	'''
	Networks: net(actor/policy), q1_net, target_q1_net, q2_net, target_q2_net
	All networks are separate, and have the same hidden layer architectures and optim specs, so tuning is minimal
	'''
	self.shared = False # SAC does not share networks
	NetClass = getattr(net, self.net_spec['type'])
	# main actor network
	self.net = NetClass(self.net_spec, self.body.state_dim, net_util.get_out_dim(self.body))
	self.net_names = ['net']
	def calc_q(self, state, action, net):
	'''Forward-pass to calculate the predicted state-action-value from q1_net.'''
	q_pred = net(state, action).view(-1)
	return q_pred

	def calc_q_targets(self, batch):
	'''Q_tar = r + gamma * (target_Q(s', a') - alpha * log pi(a'\|s'))'''
	next_states = batch['next_states']
	with torch.no_grad():
	pdparams = self.calc_pdparam(next_states)
	def calc_log_prob_action(self, action_pd, reparam=False):
	'''Calculate log_probs and actions with option to reparametrize from paper eq. 11'''
	samples = action_pd.rsample() if reparam else action_pd.sample()
	if self.body.is_discrete: # this is straightforward using GumbelSoftmax
	actions = samples
	log_probs = action_pd.log_prob(actions)
	else:
	mus = samples
	actions = self.scale_action(torch.tanh(mus))
	# paper Appendix C. Enforcing Action Bounds for continuous actions
	def train_alpha(self, alpha_loss):
	'''Custom method to train the alpha variable'''
	self.alpha_lr_scheduler.step(epoch=self.body.env.clock.frame)
	self.alpha_optim.zero_grad()
	alpha_loss.backward()
	self.alpha_optim.step()
	self.alpha = self.log_alpha.detach().exp()

	def train(self):
	'''Train actor critic by computing the loss in batch efficiently'''
Env. \ SAC	`mean_returns_ma`	graph
RoboschoolAnt	2451.55
RoboschoolHalfCheetah	2004.27
RoboschoolHopper	2090.52
RoboschoolWalker2d	1711.92
	// Place your key bindings in this file to override the defaultsauto[]
	[
	{
	"key": "ctrl+cmd+down",
	"command": "editor.action.moveLinesDownAction",
	"when": "editorTextFocus && !editorReadonly"
	},
	{
	"key": "alt+down",
	"command": "-editor.action.moveLinesDownAction",