franroldans/config_sac_her_pnp.yaml

## config_sac_her_pnp.yaml
# environment
info: Train_OfflineHER_FetchPickAndPlace
env: FetchPickAndPlace-v1
use_her: True
norm_obs: False
seed: 5
strategy: offline
curriculum: False

# buffer
use_PER: False
per_alpha: 0.7
per_beta: 0.5
buffer_size: 6400000
replay_k: 8

# network
hidden_sizes: [256, 256]

# train
actor_lr: 1.0e-3
critic_lr: 1.0e-3
start_timesteps: 64000 # warmup
epoch: 10 # save times
step_per_epoch: 64000 # save interval (50repeat)
step_per_collect: 6400 #(64path) after collect #, update
update_per_step: 0.1 # repeat update time
estimation_step: 1 # look ahead time steps
batch_size: 4096

# parallel
device: cuda # cuda
training_num: 64
test_num: 8

# Algorithm
alpha: 0.2 # entropy regularization coefficient
auto_alpha: True
alpha_lr: 0.0003 # works if open auto alpha
tau: 0.005
gamma: 0.95

# dir
logdir: log
resume_path: null

# render
watch_train: False # if show demo directly
record_test: False
render: 0.00 # render time rate

## copy-of-sac_her_mujoco.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              copy-of-sac_her_mujoco.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	# environment
	info: Train_OfflineHER_FetchPickAndPlace
	env: FetchPickAndPlace-v1
	use_her: True
	norm_obs: False
	seed: 5
	strategy: offline
	curriculum: False

	# buffer
	use_PER: False
	per_alpha: 0.7
	per_beta: 0.5
	buffer_size: 6400000
	replay_k: 8

	# network
	hidden_sizes: [256, 256]

	# train
	actor_lr: 1.0e-3
	critic_lr: 1.0e-3
	start_timesteps: 64000 # warmup
	epoch: 10 # save times
	step_per_epoch: 64000 # save interval (50repeat)
	step_per_collect: 6400 #(64path) after collect #, update
	update_per_step: 0.1 # repeat update time
	estimation_step: 1 # look ahead time steps
	batch_size: 4096

	# parallel
	device: cuda # cuda
	training_num: 64
	test_num: 8

	# Algorithm
	alpha: 0.2 # entropy regularization coefficient
	auto_alpha: True
	alpha_lr: 0.0003 # works if open auto alpha
	tau: 0.005
	gamma: 0.95

	# dir
	logdir: log
	resume_path: null

	# render
	watch_train: False # if show demo directly
	record_test: False
	render: 0.00 # render time rate