IggoOnCode
First version of the mamba-2.8b-slimpj-OpenOrca_1ep model and tokenizer (copy of EleutherAI/gpt-neox-20b).
b44e736
{ | |
"trained_model_name": "mamba-2.8b-slimpj-OpenOrc_1ep", | |
"save_steps": 500000.0, | |
"micro_batch_size": 4, | |
"batch_size": 128, | |
"epochs": 1.0, | |
"learning_rate": "3e-4", | |
"lr_scheduler_type": "linear", | |
"cutoff_len": 256, | |
"dataset": "OpenOrca", | |
"eval_dataset": "None", | |
"format": "openorca-format", | |
"warmup_steps": 100.0, | |
"optimizer": "paged_adamw_8bit", | |
"hard_cut_string": "\\n\\n\\n", | |
"add_eos_token": false, | |
"min_chars": 0.0, | |
} | |