{ "rl_model_name": null, "rl_model_class": null, "num_rollouts": 64, "chunk_size": 16, "ppo_epochs": 4, "init_kl_coef": 0.05, "num_layers_unfrozen": 2, "rm_bits": 8, "rl_lora_rank": 8, "rl_lora_alpha": 32, "rl_lora_dropout": 0.1, "use_qlora_in_rl": false, "use_rl_peft_config": true, "lora_compute_dtype": "bfloat16", "steps_per_print": 10, "logdir": "aim-repo", "aim_repo": null, "experiment_name": "rag_dmf_lora_with_hallucination_citation_8b_instruct_1021_final_granite_v6", "stage": 2, "overlap_comm": false, "contiguous_gradients": false, "cpu_offload": false, "optimizer": { "optimizer_class": "FusedAdam", "lr": 1e-05, "weight_decay": 0.1, "betas": [ 0.9, 0.95 ], "eps": 1e-10 }, "lr_schedule": "linear", "warmup_steps": 200, "datasets": [ { "data_class": "JSONLinesDatasetStructuredRAGChat", "data_name": "simulator-rag", "data_path": "data/rag_dmf_data/structured_1021_release_with_hallucination_citation", "data_sampling_proportion": 1, "max_input_tokens": 3600, "max_output_tokens": 800 } ], "seed": 42, "training_inference_type": "lora_finetuning", "prompt_tuning_init": null, "prompt_tuning_init_text": null, "num_virtual_tokens": null, "load_path": null, "peft_num_layers": null, "peft_num_attention_heads": null, "model_name": "/dccstor/gma/shared/dmf_models/granite-3.0-8b-instruct-r241014a", "tokenizer_name": null, "model_class": "AutoModelForCausalLM", "gma_model_class": "Model", "dtype": "bfloat16", "trust_remote_code": false, "padding_side": null, "lora_rank": 8, "lora_alpha": 8, "lora_dropout": 0.1, "lora_target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj" ], "save_huggingface_checkpoint": true, "quantization_method": "fp4", "bnb_4bit_use_double_quant": false, "use_quantization_for_inference": false, "max_seq_len": 2048, "attention_implementation": "flash_attention_2", "num_labels": 1, "use_sdpa_attention": false, "save_path": "checkpoints/rag_dmf_lora_with_hallucination_citation_8b_instruct_1021_final_granite_v6", "ignore_sampling_proportion_for_validation": false, "num_training_steps": 200000, "gradient_accumulation_steps": 1, "eval_interval": 10000, "save_interval": 10000, "batch_size_per_gpu": 1, "coeff": 1.0, "eval_during_training": true, "smart_token_allocation": false, "max_new_tokens": 0, "gradient_checkpointing": true }