|
{ |
|
"train": { |
|
"project": "LunarLanderContinuous-v2-QGPO-VPSDE", |
|
"device": "cuda", |
|
"wandb": { |
|
"project": "IQL-LunarLanderContinuous-v2-QGPO-VPSDE" |
|
}, |
|
"simulator": { |
|
"type": "GymEnvSimulator", |
|
"args": { |
|
"env_id": "LunarLanderContinuous-v2" |
|
} |
|
}, |
|
"model": { |
|
"QGPOPolicy": { |
|
"device": "cuda", |
|
"critic": { |
|
"device": "cuda", |
|
"q_alpha": 1.0, |
|
"DoubleQNetwork": { |
|
"backbone": { |
|
"type": "ConcatenateMLP", |
|
"args": { |
|
"hidden_sizes": [ |
|
10, |
|
256, |
|
256 |
|
], |
|
"output_size": 1, |
|
"activation": "relu" |
|
} |
|
} |
|
} |
|
}, |
|
"diffusion_model": { |
|
"device": "cuda", |
|
"x_size": 2, |
|
"alpha": 1.0, |
|
"solver": { |
|
"type": "DPMSolver", |
|
"args": { |
|
"order": 2, |
|
"device": "cuda", |
|
"steps": 17 |
|
} |
|
}, |
|
"path": { |
|
"type": "linear_vp_sde", |
|
"beta_0": 0.1, |
|
"beta_1": 20.0 |
|
}, |
|
"reverse_path": { |
|
"type": "linear_vp_sde", |
|
"beta_0": 0.1, |
|
"beta_1": 20.0 |
|
}, |
|
"model": { |
|
"type": "noise_function", |
|
"args": { |
|
"t_encoder": { |
|
"type": "GaussianFourierProjectionTimeEncoder", |
|
"args": { |
|
"embed_dim": 32, |
|
"scale": 30.0 |
|
} |
|
}, |
|
"backbone": { |
|
"type": "TemporalSpatialResidualNet", |
|
"args": { |
|
"hidden_sizes": [ |
|
512, |
|
256, |
|
128 |
|
], |
|
"output_dim": 2, |
|
"t_dim": 32, |
|
"condition_dim": 8, |
|
"condition_hidden_dim": 32, |
|
"t_condition_hidden_dim": 128 |
|
} |
|
} |
|
} |
|
}, |
|
"energy_guidance": { |
|
"t_encoder": { |
|
"type": "GaussianFourierProjectionTimeEncoder", |
|
"args": { |
|
"embed_dim": 32, |
|
"scale": 30.0 |
|
} |
|
}, |
|
"backbone": { |
|
"type": "ConcatenateMLP", |
|
"args": { |
|
"hidden_sizes": [ |
|
42, |
|
256, |
|
256 |
|
], |
|
"output_size": 1, |
|
"activation": "silu" |
|
} |
|
} |
|
} |
|
} |
|
} |
|
}, |
|
"parameter": { |
|
"behaviour_policy": { |
|
"batch_size": 1024, |
|
"learning_rate": 0.0001, |
|
"epochs": 500 |
|
}, |
|
"action_augment_num": 16, |
|
"fake_data_t_span": null, |
|
"energy_guided_policy": { |
|
"batch_size": 256 |
|
}, |
|
"critic": { |
|
"stop_training_epochs": 500, |
|
"learning_rate": 0.0001, |
|
"discount_factor": 0.99, |
|
"update_momentum": 0.005 |
|
}, |
|
"energy_guidance": { |
|
"epochs": 1000, |
|
"learning_rate": 0.0001 |
|
}, |
|
"evaluation": { |
|
"evaluation_interval": 50, |
|
"guidance_scale": [ |
|
0.0, |
|
1.0, |
|
2.0 |
|
] |
|
}, |
|
"checkpoint_path": "./LunarLanderContinuous-v2-QGPO" |
|
} |
|
}, |
|
"deploy": { |
|
"device": "cuda", |
|
"env": { |
|
"env_id": "LunarLanderContinuous-v2", |
|
"seed": 0 |
|
}, |
|
"num_deploy_steps": 1000, |
|
"t_span": null |
|
} |
|
} |