Skip to content

Instantly share code, notes, and snippets.

View kengz's full-sized avatar

Wah Loon Keng kengz

View GitHub Profile
def run_episode(self):
self.env.clock.tick('epi')
reward, state, done = self.env.reset()
self.agent.reset(state)
while not done:
self.env.clock.tick('t')
action = self.agent.act(state)
reward, state, done = self.env.step(action)
self.agent.update(action, reward, state, done)
self.agent.body.log_summary()
def run(self):
while self.env.clock.get('epi') <= self.env.max_episode:
self.run_episode()
self.data = analysis.analyze_session(self) # session fitness
self.close()
return self.data
@kengz
kengz / slm-lab-pip-install-usage.py
Last active September 30, 2018 18:32
Demo: SLM Lab as pip module for lightweight usecases
'''
Demo: SLM Lab as pip module for lightweight usecases
Installation:
1. Clone SLM-Lab
```
git clone https://github.com/kengz/SLM-Lab.git
cd SLM-Lab
```
@kengz
kengz / sac_networks.py
Last active August 11, 2019 18:15
SAC networks
def init_nets(self, global_nets=None):
'''
Networks: net(actor/policy), q1_net, target_q1_net, q2_net, target_q2_net
All networks are separate, and have the same hidden layer architectures and optim specs, so tuning is minimal
'''
self.shared = False # SAC does not share networks
NetClass = getattr(net, self.net_spec['type'])
# main actor network
self.net = NetClass(self.net_spec, self.body.state_dim, net_util.get_out_dim(self.body))
self.net_names = ['net']
@kengz
kengz / sac_losses.py
Last active August 11, 2019 18:16
SAC loss functions
def calc_q(self, state, action, net):
'''Forward-pass to calculate the predicted state-action-value from q1_net.'''
q_pred = net(state, action).view(-1)
return q_pred
def calc_q_targets(self, batch):
'''Q_tar = r + gamma * (target_Q(s', a') - alpha * log pi(a'|s'))'''
next_states = batch['next_states']
with torch.no_grad():
pdparams = self.calc_pdparam(next_states)
@kengz
kengz / sac_log_prob.py
Last active August 11, 2019 18:16
SAC log probs
def calc_log_prob_action(self, action_pd, reparam=False):
'''Calculate log_probs and actions with option to reparametrize from paper eq. 11'''
samples = action_pd.rsample() if reparam else action_pd.sample()
if self.body.is_discrete: # this is straightforward using GumbelSoftmax
actions = samples
log_probs = action_pd.log_prob(actions)
else:
mus = samples
actions = self.scale_action(torch.tanh(mus))
# paper Appendix C. Enforcing Action Bounds for continuous actions
@kengz
kengz / sac_training.py
Created August 10, 2019 06:50
SAC training loop
def train_alpha(self, alpha_loss):
'''Custom method to train the alpha variable'''
self.alpha_lr_scheduler.step(epoch=self.body.env.clock.frame)
self.alpha_optim.zero_grad()
alpha_loss.backward()
self.alpha_optim.step()
self.alpha = self.log_alpha.detach().exp()
def train(self):
'''Train actor critic by computing the loss in batch efficiently'''
@kengz
kengz / sac_benchmark.md
Last active August 11, 2019 18:22
SAC benchmark

Roboschool (continuous control) Benchmark

Note that the Roboschool reward scales are different from MuJoCo's. All results are ran with 4 sessions with distinct random seeds. mean_returns_ma is the returns moving-average over 100 checkpoints from the sessions averaged.

Env. \ SAC mean_returns_ma graph
RoboschoolAnt 2451.55 sac
RoboschoolHalfCheetah 2004.27 sac
RoboschoolHopper 2090.52 sac
RoboschoolWalker2d 1711.92 sac
@kengz
kengz / History|-10f1514|1Scb.json
Last active July 19, 2022 17:16
Visual Studio Code Settings Sync Gist
// Place your key bindings in this file to override the defaultsauto[]
[
{
"key": "ctrl+cmd+down",
"command": "editor.action.moveLinesDownAction",
"when": "editorTextFocus && !editorReadonly"
},
{
"key": "alt+down",
"command": "-editor.action.moveLinesDownAction",
@kengz
kengz / ubuntu_gpu_server_setup.md
Last active June 7, 2020 05:09
Ubuntu GPU server setup

Ubuntu GPU Server Setup

Quick and smooth setup of Ubuntu GPU server, including proper installation of Nvidia driver.

Estimated time: < 1 hour

  1. Download the “alternative” server image from Ubuntu: Alternative downloads | Ubuntu
  2. Create a bootable USB stick on macOS | Ubuntu
  3. Go to BIOS, disable secure boot. Then boot UEFI. Install Ubuntu, overwrite full partition, add SSH Server. Finish installation and login.
  4. You can now ssh in with password. Login and install nvidia driver. Since secure boot is disable, nvidia installation should go smoothly.