robertmyers
/

sakura-3b

Text Generation

Model card Files Files and versions Community

sakura-3b / wandb /offline-run-20230725_192932-2k0486q8 /files /wandb-metadata.json

3v324v23's picture

first commit

cf05c06 over 1 year ago

history blame contribute delete

1.62 kB

	{
	"os": "Linux-5.4.0-147-generic-x86_64-with-glibc2.31",
	"python": "3.9.16",
	"heartbeatAt": "2023-07-25T19:29:32.638362",
	"startedAt": "2023-07-25T19:29:32.561287",
	"docker": null,
	"gpu": "NVIDIA A100-SXM4-80GB",
	"gpu_count": 8,
	"cpu_count": 64,
	"cuda": null,
	"args": [
	"--local_rank=0",
	"--train_datasets",
	"bt",
	"--model_name_or_path",
	"cerebras/btlm-3b-8k-base",
	"--max_length",
	"8092",
	"--trust_remote_code",
	"True",
	"--epochs",
	"3",
	"--per_device_train_batch_size",
	"2",
	"--per_device_eval_batch_size",
	"82",
	"--gradient_accumulation_steps",
	"1",
	"--gradient_checkpointing",
	"--learning_rate",
	"2e-5",
	"--lr_scheduler_type",
	"cosine",
	"--num_warmup_steps",
	"20",
	"--weight_decay",
	"0.0",
	"--seed",
	"42",
	"--output_dir",
	"/home/paperspace/safe-rlhf/output/sft",
	"--log_type",
	"wandb",
	"--log_project",
	"BT-Training",
	"--zero_stage",
	"3",
	"--bf16",
	"True",
	"--tf32",
	"True"
	],
	"state": "running",
	"program": "-m safe_rlhf.finetune.__main__",
	"git": {
	"remote": "https://github.com/robertalanm/safe-rlhf",
	"commit": "4b5266714a6d66aea11ec473fe29b6b57c48e40a"
	},
	"email": null,
	"root": "/home/paperspace/safe-rlhf",
	"host": "psiohuej6",
	"username": "paperspace",
	"executable": "/usr/bin/python3.9"
	}