slm_lab/spec/experimental/misc/lunar_pg.json
{
"reinforce_mlp_lunar": {
"agent": [{
"name": "Reinforce",
"algorithm": {
"name": "Reinforce",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"hid_layers": [400, 200],
"hid_layers_activation": "selu",
"clip_grad_val": null,
"loss_spec": {
"name": "MSELoss"
},
"optim_spec": {
"name": "Adam",
"lr": 0.0005
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [2, 3, 4],
"entropy_coef_spec": {
"start_val__uniform": [0.001, 0.05],
"end_val__uniform": [0.0001, 0.001],
"start_step__choice": [90000, 100000, 110000, 120000],
"end_step__choice": [150000, 160000, 170000, 180000],
},
},
"net": {
"lr_scheduler_spec": {
"milestones__choice": [
[60000],
[70000],
[80000],
[60000, 80000],
[70000, 90000],
[80000, 100000]
],
"gamma__choice": [0.1, 0.25, 0.5, 0.75]
}
}
}]
}
},
"reinforce_rnn_lunar": {
"agent": [{
"name": "Reinforce",
"algorithm": {
"name": "Reinforce",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "RecurrentNet",
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"loss_spec": {
"name": "MSELoss"
},
"optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [2, 3, 4],
"entropy_coef_spec": {
"start_val__uniform": [0.001, 0.05],
"end_val__uniform": [0.0001, 0.001],
"start_step__choice": [90000, 100000, 110000, 120000],
"end_step__choice": [150000, 160000, 170000, 180000],
},
},
"net": {
"lr_scheduler_spec": {
"milestones__choice": [
[60000],
[70000],
[80000],
[60000, 80000],
[70000, 90000],
[80000, 100000]
],
"gamma__choice": [0.1, 0.25, 0.5, 0.75]
},
"seq_len__choice": [2, 3, 4, 5, 6],
"rnn_hidden_size__choice": [32, 64, 128],
}
}]
}
},
"a2c_gae_mlp_separate_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.0001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 1.0,
"training_frequency": 3
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": false,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.1,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [2, 3, 4],
"entropy_coef_spec": {
"start_val__uniform": [0.001, 0.05],
"end_val__uniform": [0.0001, 0.001],
"start_step__choice": [90000, 100000, 110000, 120000],
"end_step__choice": [150000, 160000, 170000, 180000],
},
},
"net": {
"lr_scheduler_spec": {
"milestones__choice": [
[60000],
[70000],
[80000],
[60000, 80000],
[70000, 90000],
[80000, 100000]
],
"gamma__choice": [0.1, 0.25, 0.5, 0.75]
}
}
}]
}
},
"a2c_gae_rnn_separate_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 1.0,
"training_frequency": 3
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "RecurrentNet",
"shared": false,
"cell_type": "GRU",
"fc_hid_layers": [400, 200],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [2, 3, 4],
"entropy_coef_spec": {
"start_val__uniform": [0.001, 0.05],
"end_val__uniform": [0.0001, 0.001],
"start_step__choice": [90000, 100000, 110000, 120000],
"end_step__choice": [150000, 160000, 170000, 180000],
},
},
"net": {
"lr_scheduler_spec": {
"milestones__choice": [
[60000],
[70000],
[80000],
[60000, 80000],
[70000, 90000],
[80000, 100000]
],
"gamma__choice": [0.1, 0.25, 0.5, 0.75]
},
"seq_len__choice": [2, 3, 4, 5, 6],
"rnn_hidden_size__choice": [32, 64, 128],
}
}]
}
},
"a2c_nstep_mlp_separate_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": null,
"num_step_returns": 1,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 1.0,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [400, 200],
"hid_layers_activation": "tanh",
"clip_grad_val": null,
"use_same_optim": false,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [2, 3, 4],
"num_step_returns__choice": [1, 2, 5, 10],
"entropy_coef_spec": {
"start_val__uniform": [0.001, 0.05],
"end_val__uniform": [0.0001, 0.001],
"start_step__choice": [90000, 100000, 110000, 120000],
"end_step__choice": [150000, 160000, 170000, 180000],
},
},
"net": {
"lr_scheduler_spec": {
"milestones__choice": [
[60000],
[70000],
[80000],
[60000, 80000],
[70000, 90000],
[80000, 100000]
],
"gamma__choice": [0.1, 0.25, 0.5, 0.75]
}
}
}]
}
},
"a2c_nstep_rnn_separate_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": null,
"num_step_returns": 1,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "RecurrentNet",
"shared": false,
"cell_type": "GRU",
"fc_hid_layers": [400, 200],
"hid_layers_activation": "tanh",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": false,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [2, 3, 4],
"num_step_returns__choice": [1, 2, 5, 10],
"entropy_coef_spec": {
"start_val__uniform": [0.001, 0.05],
"end_val__uniform": [0.0001, 0.001],
"start_step__choice": [90000, 100000, 110000, 120000],
"end_step__choice": [150000, 160000, 170000, 180000],
},
},
"net": {
"lr_scheduler_spec": {
"milestones__choice": [
[60000],
[70000],
[80000],
[60000, 80000],
[70000, 90000],
[80000, 100000]
],
"gamma__choice": [0.1, 0.25, 0.5, 0.75],
"seq_len__choice": [2, 3, 4, 5, 6],
"rnn_hidden_size__choice": [32, 64, 128],
}
}
}]
}
},
"ppo_mlp_separate_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"training_frequency": 1,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [256, 128],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
},
"critic_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"ppo_rnn_separate_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"training_frequency": 1,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "RecurrentNet",
"shared": false,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
},
"critic_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"a2c_gae_mlp_shared_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": true,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"entropy_coef_spec": {
"start_val__uniform": [0.001, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"lam__uniform": [0.9, 1.0]
},
"net": {
"lr_decay_frequency__choice": [50000, 60000, 80000, 100000],
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"actor_optim_spec": {
"lr__uniform": [0.0001, 0.01]
}
}
}]
}
},
"a2csil_gae_mlp_separate_lunar": {
"agent": [{
"name": "A2CSIL",
"algorithm": {
"name": "SIL",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 1.0,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 32,
"max_size": 50000,
"use_cer": false
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": false,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.02
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.02
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [1, 2],
"entropy_coef__uniform": [0.04, 0.09],
"lam__uniform": [0.9, 1.0]
},
"net": {
"lr_decay_frequency__choice": [40000, 60000, 80000, 100000],
"lr_anneal_timestep__choice": [200000, 250000, 300000, 350000, 400000],
"actor_optim_spec": {
"lr__uniform": [0.004, 0.008]
},
"critic_optim_spec": {
"lr__uniform": [0.0001, 0.001]
}
},
"memory": {
"use_cer__choice": [true, false],
}
}]
}
},
"a2c_gae_rnn_shared_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "RecurrentNet",
"shared": true,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"lam__uniform": [0.9, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"a2c_nstep_mlp_shared_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": null,
"num_step_returns": 1,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 1.0,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": true,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [2, 3, 4],
"training_batch_iter_choice": [4, 6, 8, 10],
"entropy_coef_spec": {
"start_val__uniform": [0.001, 0.05],
},
"num_step_returns__choice": [1, 2, 3, 4, 5, 10],
},
"net": {
"use_same_optim__choice": [true, false],
"clip_grad__choice": [true, false],
"critic_optim_spec": {
"lr__uniform": [0.001, 0.01]
}
}
}]
}
},
"a2csil_nstep_mlp_separate_lunar": {
"agent": [{
"name": "A2CSIL",
"algorithm": {
"name": "SIL",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": null,
"num_step_returns": 1,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 1.0,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 32,
"max_size": 50000,
"use_cer": true
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [400, 200],
"hid_layers_activation": "tanh",
"clip_grad_val": null,
"use_same_optim": false,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.02
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.02
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"training_frequency__choice": [1, 2, 3],
"entropy_coef__uniform": [0.01, 0.1],
"num_step_returns__choice": [1, 4, 5]
},
"net": {
"lr_decay_frequency__choice": [40000, 60000, 80000, 100000],
"lr_anneal_timestep__choice": [200000, 250000, 300000, 350000, 400000],
"actor_optim_spec": {
"lr__uniform": [0.001, 0.008]
},
"critic_optim_spec": {
"lr__uniform": [0.0001, 0.001]
},
},
"memory": {
"use_cer__choice": [true, false],
}
}]
}
},
"a2c_nstep_rnn_shared_lunar": {
"agent": [{
"name": "A2C",
"algorithm": {
"name": "ActorCritic",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": null,
"num_step_returns": 1,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"training_frequency": 1
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "RecurrentNet",
"shared": true,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"num_step_returns__choice": [1, 4, 8],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"ppo_mlp_shared_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"training_frequency": 1,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": true,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"ppo_rnn_shared_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"training_frequency": 1,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "RecurrentNet",
"shared": true,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"a2c_sil_mlp_shared_lunar": {
"agent": [{
"name": "SIL",
"algorithm": {
"name": "SIL",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 64,
"max_size": 10000,
"use_cer": true
},
"net": {
"type": "MLPNet",
"shared": true,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"a2c_sil_mlp_separate_lunar": {
"agent": [{
"name": "SIL",
"algorithm": {
"name": "SIL",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 64,
"max_size": 10000,
"use_cer": true
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
},
"critic_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"a2c_sil_rnn_shared_lunar": {
"agent": [{
"name": "SIL",
"algorithm": {
"name": "SIL",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 64,
"max_size": 10000,
"use_cer": true
},
"net": {
"type": "RecurrentNet",
"shared": true,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"a2c_sil_rnn_separate_lunar": {
"agent": [{
"name": "SIL",
"algorithm": {
"name": "SIL",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"num_step_returns": null,
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"policy_loss_coef": 1.0,
"val_loss_coef": 0.01,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 64,
"max_size": 10000,
"use_cer": true
},
"net": {
"type": "RecurrentNet",
"shared": false,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
},
"critic_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"ppo_sil_mlp_shared_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": true,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"ppo_sil_mlp_separate_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay"
},
"net": {
"type": "MLPNet",
"shared": false,
"hid_layers": [400, 200],
"hid_layers_activation": "relu",
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
},
"critic_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"ppo_sil_rnn_shared_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 64,
"max_size": 10000,
"use_cer": true
},
"net": {
"type": "RecurrentNet",
"shared": true,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
"ppo_sil_rnn_separate_lunar": {
"agent": [{
"name": "PPO",
"algorithm": {
"name": "PPO",
"action_pdtype": "default",
"action_policy": "default",
"explore_var_spec": null,
"gamma": 0.993,
"lam": 0.921,
"clip_eps_spec": {
"name": "linear_decay",
"start_val": 0.10,
"end_val": 0.01,
"start_step": 30000,
"end_step": 40000,
},
"entropy_coef_spec": {
"name": "linear_decay",
"start_val": 0.01,
"end_val": 0.001,
"start_step": 30000,
"end_step": 40000,
},
"val_loss_coef": 0.1,
"sil_policy_loss_coef": 1.0,
"sil_val_loss_coef": 0.1,
"training_frequency": 1,
"training_batch_iter": 10,
"training_epoch": 8
},
"memory": {
"name": "OnPolicyReplay",
"sil_replay_name": "Replay",
"batch_size": 64,
"max_size": 10000,
"use_cer": true
},
"net": {
"type": "RecurrentNet",
"shared": false,
"cell_type": "GRU",
"fc_hid_layers": [256, 128],
"hid_layers_activation": "relu",
"rnn_hidden_size": 64,
"rnn_num_layers": 1,
"seq_len": 4,
"clip_grad_val": null,
"use_same_optim": true,
"actor_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"critic_optim_spec": {
"name": "Adam",
"lr": 0.008,
},
"lr_scheduler_spec": {
"name": "MultiStepLR",
"milestones": [80000],
"gamma": 0.9,
},
"gpu": true
}
}],
"env": [{
"name": "LunarLander-v2",
"max_t": null,
"max_frame": 400000,
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"eval_frequency": 1000,
"max_session": 2,
"max_trial": 95,
"search": "RandomSearch",
},
"search": {
"agent": [{
"algorithm": {
"clip_eps__uniform": [0.1, 1.0],
"entropy_coef_spec": {
"start_val__uniform": [0.01, 1.0],
},
"val_loss_coef__uniform": [0.01, 1.0],
"sil_policy_loss_coef__uniform": [0.01, 1.0],
"sil_val_loss_coef__uniform": [0.01, 1.0]
},
"net": {
"hid_layers_activation__choice": ["tanh", "relu", "selu"],
"rnn_hidden_size__choice": [16, 32, 64],
"actor_optim_spec": {
"lr__uniform": [0.00001, 0.01]
},
"critic_optim_spec": {
"lr__uniform": [0.00001, 0.01]
}
}
}]
}
},
}