PEFT
Safetensors
llama-1 / trainer_state.json
sameearif's picture
Upload folder using huggingface_hub
521c1e9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9974554707379135,
"eval_steps": 500,
"global_step": 49,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.020356234096692113,
"grad_norm": 0.1697569042444229,
"learning_rate": 2e-05,
"loss": 1.8967,
"step": 1
},
{
"epoch": 0.04071246819338423,
"grad_norm": 0.16455122828483582,
"learning_rate": 4e-05,
"loss": 1.9742,
"step": 2
},
{
"epoch": 0.061068702290076333,
"grad_norm": 0.1334070861339569,
"learning_rate": 6e-05,
"loss": 1.8263,
"step": 3
},
{
"epoch": 0.08142493638676845,
"grad_norm": 0.19785284996032715,
"learning_rate": 8e-05,
"loss": 1.8359,
"step": 4
},
{
"epoch": 0.10178117048346055,
"grad_norm": 0.16298870742321014,
"learning_rate": 0.0001,
"loss": 1.8452,
"step": 5
},
{
"epoch": 0.12213740458015267,
"grad_norm": 0.1491611897945404,
"learning_rate": 0.00012,
"loss": 1.8554,
"step": 6
},
{
"epoch": 0.14249363867684478,
"grad_norm": 0.14379450678825378,
"learning_rate": 0.00014,
"loss": 1.7928,
"step": 7
},
{
"epoch": 0.1628498727735369,
"grad_norm": 0.11020653694868088,
"learning_rate": 0.00016,
"loss": 1.8638,
"step": 8
},
{
"epoch": 0.183206106870229,
"grad_norm": 0.20387399196624756,
"learning_rate": 0.00018,
"loss": 1.6861,
"step": 9
},
{
"epoch": 0.2035623409669211,
"grad_norm": 0.18219953775405884,
"learning_rate": 0.0002,
"loss": 1.7959,
"step": 10
},
{
"epoch": 0.22391857506361323,
"grad_norm": 0.15169978141784668,
"learning_rate": 0.00019967573081342103,
"loss": 1.827,
"step": 11
},
{
"epoch": 0.24427480916030533,
"grad_norm": 0.08840368688106537,
"learning_rate": 0.00019870502626379127,
"loss": 1.8324,
"step": 12
},
{
"epoch": 0.26463104325699743,
"grad_norm": 0.08657640218734741,
"learning_rate": 0.0001970941817426052,
"loss": 1.8853,
"step": 13
},
{
"epoch": 0.28498727735368956,
"grad_norm": 0.10179179906845093,
"learning_rate": 0.00019485364419471454,
"loss": 1.8181,
"step": 14
},
{
"epoch": 0.3053435114503817,
"grad_norm": 0.08559463918209076,
"learning_rate": 0.00019199794436588243,
"loss": 1.9153,
"step": 15
},
{
"epoch": 0.3256997455470738,
"grad_norm": 0.1002177819609642,
"learning_rate": 0.000188545602565321,
"loss": 1.869,
"step": 16
},
{
"epoch": 0.3460559796437659,
"grad_norm": 0.08648160099983215,
"learning_rate": 0.0001845190085543795,
"loss": 1.7103,
"step": 17
},
{
"epoch": 0.366412213740458,
"grad_norm": 0.0936996191740036,
"learning_rate": 0.00017994427634035015,
"loss": 1.906,
"step": 18
},
{
"epoch": 0.38676844783715014,
"grad_norm": 0.09296493977308273,
"learning_rate": 0.00017485107481711012,
"loss": 1.8598,
"step": 19
},
{
"epoch": 0.4071246819338422,
"grad_norm": 0.08220034092664719,
"learning_rate": 0.00016927243535095997,
"loss": 1.7883,
"step": 20
},
{
"epoch": 0.42748091603053434,
"grad_norm": 0.08138112723827362,
"learning_rate": 0.00016324453755953773,
"loss": 1.839,
"step": 21
},
{
"epoch": 0.44783715012722647,
"grad_norm": 0.089788056910038,
"learning_rate": 0.00015680647467311557,
"loss": 1.9511,
"step": 22
},
{
"epoch": 0.4681933842239186,
"grad_norm": 0.08236150443553925,
"learning_rate": 0.00015000000000000001,
"loss": 1.7003,
"step": 23
},
{
"epoch": 0.48854961832061067,
"grad_norm": 0.0797291249036789,
"learning_rate": 0.00014286925614030542,
"loss": 1.8304,
"step": 24
},
{
"epoch": 0.5089058524173028,
"grad_norm": 0.11056578904390335,
"learning_rate": 0.00013546048870425356,
"loss": 1.8363,
"step": 25
},
{
"epoch": 0.5292620865139949,
"grad_norm": 0.08862095326185226,
"learning_rate": 0.0001278217463916453,
"loss": 1.8398,
"step": 26
},
{
"epoch": 0.549618320610687,
"grad_norm": 0.07724796235561371,
"learning_rate": 0.00012000256937760445,
"loss": 1.8102,
"step": 27
},
{
"epoch": 0.5699745547073791,
"grad_norm": 0.08631068468093872,
"learning_rate": 0.0001120536680255323,
"loss": 1.7571,
"step": 28
},
{
"epoch": 0.5903307888040712,
"grad_norm": 0.0852205753326416,
"learning_rate": 0.00010402659401094152,
"loss": 1.7623,
"step": 29
},
{
"epoch": 0.6106870229007634,
"grad_norm": 0.09490972757339478,
"learning_rate": 9.597340598905852e-05,
"loss": 1.7686,
"step": 30
},
{
"epoch": 0.6310432569974554,
"grad_norm": 0.0830993726849556,
"learning_rate": 8.79463319744677e-05,
"loss": 1.8706,
"step": 31
},
{
"epoch": 0.6513994910941476,
"grad_norm": 0.1042318046092987,
"learning_rate": 7.999743062239557e-05,
"loss": 1.8053,
"step": 32
},
{
"epoch": 0.6717557251908397,
"grad_norm": 0.07309404015541077,
"learning_rate": 7.217825360835473e-05,
"loss": 1.7835,
"step": 33
},
{
"epoch": 0.6921119592875318,
"grad_norm": 0.07798247784376144,
"learning_rate": 6.453951129574644e-05,
"loss": 1.8351,
"step": 34
},
{
"epoch": 0.712468193384224,
"grad_norm": 0.10470892488956451,
"learning_rate": 5.713074385969457e-05,
"loss": 1.6869,
"step": 35
},
{
"epoch": 0.732824427480916,
"grad_norm": 0.062327221035957336,
"learning_rate": 5.000000000000002e-05,
"loss": 1.8249,
"step": 36
},
{
"epoch": 0.7531806615776081,
"grad_norm": 0.09498722851276398,
"learning_rate": 4.3193525326884435e-05,
"loss": 1.6566,
"step": 37
},
{
"epoch": 0.7735368956743003,
"grad_norm": 0.07665014266967773,
"learning_rate": 3.675546244046228e-05,
"loss": 1.766,
"step": 38
},
{
"epoch": 0.7938931297709924,
"grad_norm": 0.07265973836183548,
"learning_rate": 3.072756464904006e-05,
"loss": 1.789,
"step": 39
},
{
"epoch": 0.8142493638676844,
"grad_norm": 0.07454241812229156,
"learning_rate": 2.514892518288988e-05,
"loss": 1.831,
"step": 40
},
{
"epoch": 0.8346055979643766,
"grad_norm": 0.07916758209466934,
"learning_rate": 2.0055723659649904e-05,
"loss": 1.8673,
"step": 41
},
{
"epoch": 0.8549618320610687,
"grad_norm": 0.0858256071805954,
"learning_rate": 1.5480991445620542e-05,
"loss": 1.6331,
"step": 42
},
{
"epoch": 0.8753180661577609,
"grad_norm": 0.09055773913860321,
"learning_rate": 1.1454397434679021e-05,
"loss": 1.6856,
"step": 43
},
{
"epoch": 0.8956743002544529,
"grad_norm": 0.08801382780075073,
"learning_rate": 8.002055634117578e-06,
"loss": 1.7838,
"step": 44
},
{
"epoch": 0.916030534351145,
"grad_norm": 0.08115601539611816,
"learning_rate": 5.146355805285452e-06,
"loss": 1.7345,
"step": 45
},
{
"epoch": 0.9363867684478372,
"grad_norm": 0.07437321543693542,
"learning_rate": 2.905818257394799e-06,
"loss": 1.84,
"step": 46
},
{
"epoch": 0.9567430025445293,
"grad_norm": 0.08822711557149887,
"learning_rate": 1.2949737362087156e-06,
"loss": 1.6885,
"step": 47
},
{
"epoch": 0.9770992366412213,
"grad_norm": 0.09608830511569977,
"learning_rate": 3.2426918657900704e-07,
"loss": 1.6825,
"step": 48
},
{
"epoch": 0.9974554707379135,
"grad_norm": 0.07894308120012283,
"learning_rate": 0.0,
"loss": 1.6886,
"step": 49
}
],
"logging_steps": 1,
"max_steps": 49,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.32140082998018e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}