|
{ |
|
"rl_model_name": null, |
|
"rl_model_class": null, |
|
"num_rollouts": 64, |
|
"chunk_size": 16, |
|
"ppo_epochs": 4, |
|
"init_kl_coef": 0.05, |
|
"num_layers_unfrozen": 2, |
|
"rm_bits": 8, |
|
"rl_lora_rank": 8, |
|
"rl_lora_alpha": 32, |
|
"rl_lora_dropout": 0.1, |
|
"use_qlora_in_rl": false, |
|
"use_rl_peft_config": true, |
|
"lora_compute_dtype": "bfloat16", |
|
"steps_per_print": 10, |
|
"logdir": "aim-repo", |
|
"aim_repo": null, |
|
"experiment_name": "rag_dmf_lora_with_hallucination_citation_8b_instruct_1021_final_granite_v6", |
|
"stage": 2, |
|
"overlap_comm": false, |
|
"contiguous_gradients": false, |
|
"cpu_offload": false, |
|
"optimizer": { |
|
"optimizer_class": "FusedAdam", |
|
"lr": 1e-05, |
|
"weight_decay": 0.1, |
|
"betas": [ |
|
0.9, |
|
0.95 |
|
], |
|
"eps": 1e-10 |
|
}, |
|
"lr_schedule": "linear", |
|
"warmup_steps": 200, |
|
"datasets": [ |
|
{ |
|
"data_class": "JSONLinesDatasetStructuredRAGChat", |
|
"data_name": "simulator-rag", |
|
"data_path": "data/rag_dmf_data/structured_1021_release_with_hallucination_citation", |
|
"data_sampling_proportion": 1, |
|
"max_input_tokens": 3600, |
|
"max_output_tokens": 800 |
|
} |
|
], |
|
"seed": 42, |
|
"training_inference_type": "lora_finetuning", |
|
"prompt_tuning_init": null, |
|
"prompt_tuning_init_text": null, |
|
"num_virtual_tokens": null, |
|
"load_path": null, |
|
"peft_num_layers": null, |
|
"peft_num_attention_heads": null, |
|
"model_name": "/dccstor/gma/shared/dmf_models/granite-3.0-8b-instruct-r241014a", |
|
"tokenizer_name": null, |
|
"model_class": "AutoModelForCausalLM", |
|
"gma_model_class": "Model", |
|
"dtype": "bfloat16", |
|
"trust_remote_code": false, |
|
"padding_side": null, |
|
"lora_rank": 8, |
|
"lora_alpha": 8, |
|
"lora_dropout": 0.1, |
|
"lora_target_modules": [ |
|
"q_proj", |
|
"k_proj", |
|
"v_proj", |
|
"o_proj", |
|
"up_proj", |
|
"down_proj", |
|
"gate_proj" |
|
], |
|
"save_huggingface_checkpoint": true, |
|
"quantization_method": "fp4", |
|
"bnb_4bit_use_double_quant": false, |
|
"use_quantization_for_inference": false, |
|
"max_seq_len": 2048, |
|
"attention_implementation": "flash_attention_2", |
|
"num_labels": 1, |
|
"use_sdpa_attention": false, |
|
"save_path": "checkpoints/rag_dmf_lora_with_hallucination_citation_8b_instruct_1021_final_granite_v6", |
|
"ignore_sampling_proportion_for_validation": false, |
|
"num_training_steps": 200000, |
|
"gradient_accumulation_steps": 1, |
|
"eval_interval": 10000, |
|
"save_interval": 10000, |
|
"batch_size_per_gpu": 1, |
|
"coeff": 1.0, |
|
"eval_during_training": true, |
|
"smart_token_allocation": false, |
|
"max_new_tokens": 0, |
|
"gradient_checkpointing": true |
|
} |