mamba-2.8b-slimpj-OpenOrca_1ep / training_parameters.json
IggoOnCode
First version of the mamba-2.8b-slimpj-OpenOrca_1ep model and tokenizer (copy of EleutherAI/gpt-neox-20b).
b44e736
raw
history blame
453 Bytes
{
"trained_model_name": "mamba-2.8b-slimpj-OpenOrc_1ep",
"save_steps": 500000.0,
"micro_batch_size": 4,
"batch_size": 128,
"epochs": 1.0,
"learning_rate": "3e-4",
"lr_scheduler_type": "linear",
"cutoff_len": 256,
"dataset": "OpenOrca",
"eval_dataset": "None",
"format": "openorca-format",
"warmup_steps": 100.0,
"optimizer": "paged_adamw_8bit",
"hard_cut_string": "\\n\\n\\n",
"add_eos_token": false,
"min_chars": 0.0,
}