Hyperparameter
deepspeed --include localhost:0,1,2,3 sft.py --deepspeed dp_zero3.json \
--model_name_or_path="/home/shenyl/cached_models/meta-llama/Llama-2-7b-chat-hf" \
--dataset_name="timdettmers/openassistant-guanaco" \
--dataset_text_field="text" \
--report_to="tensorboard" \
--learning_rate=1e-5 \
--per_device_train_batch_size=6 \
--gradient_accumulation_steps=8 \
--output_dir="guanaco_Llama-2-7b-chat-hf" \
--logging_steps=1 \
--num_train_epochs=15 \
--max_steps=-1 \
--gradient_checkpointing \
--save_steps=0.3
Dataset
timdettmers/openassistant-guanaco
- Downloads last month
- 0