|
[model] |
|
dim = 2048 |
|
n_vocab = 68096 |
|
hf_model_id = "sarvamai/sarvam-1" |
|
pad_token = "</s>" |
|
|
|
[tokenizer] |
|
return_tensors = "pt" |
|
return_attention_mask = true |
|
max_length = 256 |
|
padding = "longest" |
|
truncation = true |
|
add_special_tokens = false |
|
|
|
[training] |
|
output_dir = "output/matryoshka_sarvam1" |
|
num_train_epochs = 20 |
|
per_device_train_batch_size = 128 |
|
warmup_steps = 256 |
|
evaluation_strategy = "steps" |
|
eval_steps = 2000 |
|
save_steps = 2000 |
|
fp16 = true |
|
include_num_input_tokens_seen = false |
|
learning_rate = 3e-4 |
|
multi_dataset_batch_sampler = "PROPORTIONAL" |
|
binarizer_ste = "tanh" |
|
|
|
[matryoshka] |
|
dims = [1024, 512, 256, 128, 64] |
|
|
|
|