ppo_qwen2vl_10k_baseline / arguments.yaml
htlou's picture
Upload folder using huggingface_hub
04f245c verified
raw
history blame
2.12 kB
data_cfgs:
eval_data_files: null
eval_datasets: null
eval_optional_args: []
eval_size: null
eval_split: null
eval_subset: null
eval_template: null
ptx_data_files: extracted_panda.json
ptx_datasets: /aifs4su/yaodong/datasets/ShareGPT4Video/extracted
ptx_optional_args: []
ptx_size: 25000
ptx_split: train
ptx_subset: null
ptx_template: NExTQA
train_data_files: extracted_preference_10k_washed.json
train_datasets: /aifs4su/yaodong/datasets/aaa_dataset/TV2T-preference/extracted
train_optional_args: []
train_size: null
train_split: train
train_subset: null
train_template: NExTQA_preference
logger_cfgs:
cache_dir: null
log_project: align-anything
log_run_name: ppo
log_type: wandb
output_dir: ../outputs/ppo_qwen2vl_10k_baseline
save_interval: 300.0
model_cfgs:
actor_model_name_or_path: /aifs4su/yaodong/models/Qwen2-VL-7B-Instruct
model_max_length: 2048
repetition_penalty: 1.0
reward_critic_model_name_or_path: /aifs4su/yaodong/projects/hantao/dev_cham/align-anything/outputs/rm_tiv2t_10k_baseline
reward_model_name_or_path: /aifs4su/yaodong/projects/hantao/dev_cham/align-anything/outputs/rm_tiv2t_10k_baseline
temperature: 1.0
top_p: 1.0
trust_remote_code: true
special_tokens: null
train_cfgs:
actor_gradient_checkpointing: true
actor_lr: 5.0e-07
actor_lr_scheduler_type: cosine
actor_lr_warmup_ratio: 0.03
actor_weight_decay: 0.0
adam_betas:
- 0.9
- 0.95
bf16: true
clip_range_ratio: 0.2
clip_range_score: 50.0
clip_range_value: 5.0
critic_gradient_checkpointing: true
critic_lr: 5.0e-07
critic_lr_scheduler_type: constant
critic_lr_warmup_ratio: 0.03
critic_weight_decay: 0.0
ds_cfgs: ds_z3_config.json
epochs: 3
eval_interval: 10
eval_strategy: epoch
fp16: false
freeze_language_model: false
freeze_mm_proj: false
freeze_vision_tower: true
gae_lambda: 0.95
gamma: 1.0
gradient_accumulation_steps: 1
kl_coeff: 0.02
normalize_reward: false
per_device_eval_batch_size: 2
per_device_prompt_batch_size: 2
per_device_train_batch_size: 2
ptx_coeff: 16.0
seed: 42
update_iters: 1