{ | |
"project": "iterative preference learning", | |
"exp_name": "qwen-2-7b-default_iter3", | |
"cache_dir": ".cache", | |
"result_dir": "results", | |
"data": null, | |
"data_path": null, | |
"sample_size": null, | |
"prompt_max_length": 1024, | |
"max_length": 2048, | |
"model_name_or_path": ".cache/qwen-2-7b-default_iter2", | |
"ref_model_name_or_path": ".cache/qwen-2-7b-default-iter0", | |
"beta": 0.1, | |
"n_epochs": 1, | |
"per_device_batch_size": 1, | |
"gradient_accumulation_steps": 32, | |
"lr": 5e-07, | |
"warmup_ratio": 0.03, | |
"max_grad_norm": 1, | |
"open_port": 39067 | |
} |