|
data_cfgs: |
|
eval_data_files: null |
|
eval_datasets: null |
|
eval_optional_args: [] |
|
eval_size: null |
|
eval_split: null |
|
eval_subset: null |
|
eval_template: null |
|
ptx_data_files: extracted_panda.json |
|
ptx_datasets: /aifs4su/yaodong/datasets/ShareGPT4Video/extracted |
|
ptx_optional_args: [] |
|
ptx_size: 25000 |
|
ptx_split: train |
|
ptx_subset: null |
|
ptx_template: NExTQA |
|
train_data_files: extracted_preference_10k_washed.json |
|
train_datasets: /aifs4su/yaodong/datasets/aaa_dataset/TV2T-preference/extracted |
|
train_optional_args: [] |
|
train_size: null |
|
train_split: train |
|
train_subset: null |
|
train_template: NExTQA_preference |
|
logger_cfgs: |
|
cache_dir: null |
|
log_project: align-anything |
|
log_run_name: ppo |
|
log_type: wandb |
|
output_dir: ../outputs/ppo_qwen2vl_10k_baseline |
|
save_interval: 300.0 |
|
model_cfgs: |
|
actor_model_name_or_path: /aifs4su/yaodong/models/Qwen2-VL-7B-Instruct |
|
model_max_length: 2048 |
|
repetition_penalty: 1.0 |
|
reward_critic_model_name_or_path: /aifs4su/yaodong/projects/hantao/dev_cham/align-anything/outputs/rm_tiv2t_10k_baseline |
|
reward_model_name_or_path: /aifs4su/yaodong/projects/hantao/dev_cham/align-anything/outputs/rm_tiv2t_10k_baseline |
|
temperature: 1.0 |
|
top_p: 1.0 |
|
trust_remote_code: true |
|
special_tokens: null |
|
train_cfgs: |
|
actor_gradient_checkpointing: true |
|
actor_lr: 5.0e-07 |
|
actor_lr_scheduler_type: cosine |
|
actor_lr_warmup_ratio: 0.03 |
|
actor_weight_decay: 0.0 |
|
adam_betas: |
|
- 0.9 |
|
- 0.95 |
|
bf16: true |
|
clip_range_ratio: 0.2 |
|
clip_range_score: 50.0 |
|
clip_range_value: 5.0 |
|
critic_gradient_checkpointing: true |
|
critic_lr: 5.0e-07 |
|
critic_lr_scheduler_type: constant |
|
critic_lr_warmup_ratio: 0.03 |
|
critic_weight_decay: 0.0 |
|
ds_cfgs: ds_z3_config.json |
|
epochs: 3 |
|
eval_interval: 10 |
|
eval_strategy: epoch |
|
fp16: false |
|
freeze_language_model: false |
|
freeze_mm_proj: false |
|
freeze_vision_tower: true |
|
gae_lambda: 0.95 |
|
gamma: 1.0 |
|
gradient_accumulation_steps: 1 |
|
kl_coeff: 0.02 |
|
normalize_reward: false |
|
per_device_eval_batch_size: 2 |
|
per_device_prompt_batch_size: 2 |
|
per_device_train_batch_size: 2 |
|
ptx_coeff: 16.0 |
|
seed: 42 |
|
update_iters: 1 |
|
|