wordllama-indic / sarvam1_2b.toml
aravindhank's picture
Upload 3 files
3c15818 verified
raw
history blame contribute delete
615 Bytes
[model]
dim = 2048
n_vocab = 68096
hf_model_id = "sarvamai/sarvam-1"
pad_token = "</s>"
[tokenizer]
return_tensors = "pt"
return_attention_mask = true
max_length = 256
padding = "longest"
truncation = true
add_special_tokens = false
[training]
output_dir = "output/matryoshka_sarvam1"
num_train_epochs = 20
per_device_train_batch_size = 128
warmup_steps = 256
evaluation_strategy = "steps"
eval_steps = 2000
save_steps = 2000
fp16 = true
include_num_input_tokens_seen = false
learning_rate = 3e-4
multi_dataset_batch_sampler = "PROPORTIONAL"
binarizer_ste = "tanh"
[matryoshka]
dims = [1024, 512, 256, 128, 64]