File size: 5,220 Bytes
ad95118 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
{
"train": {
"project": "LunarLanderContinuous-v2-QGPO-VPSDE",
"device": "cuda",
"wandb": {
"project": "IQL-LunarLanderContinuous-v2-QGPO-VPSDE"
},
"simulator": {
"type": "GymEnvSimulator",
"args": {
"env_id": "LunarLanderContinuous-v2"
}
},
"model": {
"QGPOPolicy": {
"device": "cuda",
"critic": {
"device": "cuda",
"q_alpha": 1.0,
"DoubleQNetwork": {
"backbone": {
"type": "ConcatenateMLP",
"args": {
"hidden_sizes": [
10,
256,
256
],
"output_size": 1,
"activation": "relu"
}
}
}
},
"diffusion_model": {
"device": "cuda",
"x_size": 2,
"alpha": 1.0,
"solver": {
"type": "DPMSolver",
"args": {
"order": 2,
"device": "cuda",
"steps": 17
}
},
"path": {
"type": "linear_vp_sde",
"beta_0": 0.1,
"beta_1": 20.0
},
"reverse_path": {
"type": "linear_vp_sde",
"beta_0": 0.1,
"beta_1": 20.0
},
"model": {
"type": "noise_function",
"args": {
"t_encoder": {
"type": "GaussianFourierProjectionTimeEncoder",
"args": {
"embed_dim": 32,
"scale": 30.0
}
},
"backbone": {
"type": "TemporalSpatialResidualNet",
"args": {
"hidden_sizes": [
512,
256,
128
],
"output_dim": 2,
"t_dim": 32,
"condition_dim": 8,
"condition_hidden_dim": 32,
"t_condition_hidden_dim": 128
}
}
}
},
"energy_guidance": {
"t_encoder": {
"type": "GaussianFourierProjectionTimeEncoder",
"args": {
"embed_dim": 32,
"scale": 30.0
}
},
"backbone": {
"type": "ConcatenateMLP",
"args": {
"hidden_sizes": [
42,
256,
256
],
"output_size": 1,
"activation": "silu"
}
}
}
}
}
},
"parameter": {
"behaviour_policy": {
"batch_size": 1024,
"learning_rate": 0.0001,
"epochs": 500
},
"action_augment_num": 16,
"fake_data_t_span": null,
"energy_guided_policy": {
"batch_size": 256
},
"critic": {
"stop_training_epochs": 500,
"learning_rate": 0.0001,
"discount_factor": 0.99,
"update_momentum": 0.005
},
"energy_guidance": {
"epochs": 1000,
"learning_rate": 0.0001
},
"evaluation": {
"evaluation_interval": 50,
"guidance_scale": [
0.0,
1.0,
2.0
]
},
"checkpoint_path": "./LunarLanderContinuous-v2-QGPO"
}
},
"deploy": {
"device": "cuda",
"env": {
"env_id": "LunarLanderContinuous-v2",
"seed": 0
},
"num_deploy_steps": 1000,
"t_span": null
}
} |