{ "project": "iterative preference learning", "exp_name": "qwen-2-7b-default_iter3", "cache_dir": ".cache", "result_dir": "results", "data": null, "data_path": null, "sample_size": null, "prompt_max_length": 1024, "max_length": 2048, "model_name_or_path": ".cache/qwen-2-7b-default_iter2", "ref_model_name_or_path": ".cache/qwen-2-7b-default-iter0", "beta": 0.1, "n_epochs": 1, "per_device_batch_size": 1, "gradient_accumulation_steps": 32, "lr": 5e-07, "warmup_ratio": 0.03, "max_grad_norm": 1, "open_port": 39067 }