|
{ |
|
"best_metric": 0.9707943925233645, |
|
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-1870", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 1870, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"grad_norm": 4.650041580200195, |
|
"learning_rate": 0.004973262032085562, |
|
"loss": 1.5063, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"grad_norm": 3.0658373832702637, |
|
"learning_rate": 0.004946524064171123, |
|
"loss": 0.8711, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16042780748663102, |
|
"grad_norm": 2.9676272869110107, |
|
"learning_rate": 0.004919786096256685, |
|
"loss": 0.8, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21390374331550802, |
|
"grad_norm": 2.5159189701080322, |
|
"learning_rate": 0.004893048128342246, |
|
"loss": 0.7794, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26737967914438504, |
|
"grad_norm": 2.4576735496520996, |
|
"learning_rate": 0.004868983957219251, |
|
"loss": 0.8748, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32085561497326204, |
|
"grad_norm": 1.9533675909042358, |
|
"learning_rate": 0.004842245989304813, |
|
"loss": 0.6213, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37433155080213903, |
|
"grad_norm": 3.91825795173645, |
|
"learning_rate": 0.004815508021390374, |
|
"loss": 0.6883, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.42780748663101603, |
|
"grad_norm": 3.228422164916992, |
|
"learning_rate": 0.004788770053475936, |
|
"loss": 0.7019, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48128342245989303, |
|
"grad_norm": 4.45206356048584, |
|
"learning_rate": 0.004762032085561497, |
|
"loss": 0.5394, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"grad_norm": 2.184957504272461, |
|
"learning_rate": 0.004735294117647059, |
|
"loss": 0.5543, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 2.246079206466675, |
|
"learning_rate": 0.00470855614973262, |
|
"loss": 0.5738, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6417112299465241, |
|
"grad_norm": 2.6914820671081543, |
|
"learning_rate": 0.004681818181818182, |
|
"loss": 0.6209, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6951871657754011, |
|
"grad_norm": 2.5458545684814453, |
|
"learning_rate": 0.0046550802139037435, |
|
"loss": 0.5597, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7486631016042781, |
|
"grad_norm": 2.676391363143921, |
|
"learning_rate": 0.004628342245989305, |
|
"loss": 0.5273, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8021390374331551, |
|
"grad_norm": 2.5059385299682617, |
|
"learning_rate": 0.0046016042780748665, |
|
"loss": 0.5199, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8556149732620321, |
|
"grad_norm": 1.451249122619629, |
|
"learning_rate": 0.004574866310160428, |
|
"loss": 0.5509, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 2.5957276821136475, |
|
"learning_rate": 0.00454812834224599, |
|
"loss": 0.5336, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9625668449197861, |
|
"grad_norm": 2.4229955673217773, |
|
"learning_rate": 0.004521390374331551, |
|
"loss": 0.4657, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9094626168224299, |
|
"eval_f1": 0.8972949130385568, |
|
"eval_loss": 0.2451503425836563, |
|
"eval_precision": 0.8964084875867973, |
|
"eval_recall": 0.9082806506629539, |
|
"eval_runtime": 10.2386, |
|
"eval_samples_per_second": 167.21, |
|
"eval_steps_per_second": 10.451, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0160427807486632, |
|
"grad_norm": 2.3994851112365723, |
|
"learning_rate": 0.004494652406417113, |
|
"loss": 0.4772, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0695187165775402, |
|
"grad_norm": 1.985571265220642, |
|
"learning_rate": 0.004467914438502674, |
|
"loss": 0.5995, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1229946524064172, |
|
"grad_norm": 2.3798632621765137, |
|
"learning_rate": 0.004441176470588235, |
|
"loss": 0.5686, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 3.1128406524658203, |
|
"learning_rate": 0.004414438502673797, |
|
"loss": 0.4984, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2299465240641712, |
|
"grad_norm": 2.8572049140930176, |
|
"learning_rate": 0.004387700534759359, |
|
"loss": 0.5027, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2834224598930482, |
|
"grad_norm": 5.178213119506836, |
|
"learning_rate": 0.00436096256684492, |
|
"loss": 0.4864, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.3368983957219251, |
|
"grad_norm": 1.9515773057937622, |
|
"learning_rate": 0.004334224598930481, |
|
"loss": 0.4528, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3903743315508021, |
|
"grad_norm": 3.023959159851074, |
|
"learning_rate": 0.0043074866310160425, |
|
"loss": 0.5513, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.4438502673796791, |
|
"grad_norm": 2.371218204498291, |
|
"learning_rate": 0.004280748663101605, |
|
"loss": 0.442, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4973262032085561, |
|
"grad_norm": 2.111191987991333, |
|
"learning_rate": 0.004254010695187166, |
|
"loss": 0.6163, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.5508021390374331, |
|
"grad_norm": 2.123419761657715, |
|
"learning_rate": 0.004227272727272727, |
|
"loss": 0.5522, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6042780748663101, |
|
"grad_norm": 1.6425999402999878, |
|
"learning_rate": 0.004200534759358289, |
|
"loss": 0.4601, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6577540106951871, |
|
"grad_norm": 3.847395420074463, |
|
"learning_rate": 0.00417379679144385, |
|
"loss": 0.5434, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7112299465240641, |
|
"grad_norm": 1.8732799291610718, |
|
"learning_rate": 0.004147058823529412, |
|
"loss": 0.4952, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 1.4881893396377563, |
|
"learning_rate": 0.004120320855614973, |
|
"loss": 0.4926, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 1.9936500787734985, |
|
"learning_rate": 0.004093582887700535, |
|
"loss": 0.4582, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.8716577540106951, |
|
"grad_norm": 4.784737586975098, |
|
"learning_rate": 0.004066844919786096, |
|
"loss": 0.4839, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9251336898395723, |
|
"grad_norm": 2.403982162475586, |
|
"learning_rate": 0.004040106951871658, |
|
"loss": 0.5868, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.9786096256684491, |
|
"grad_norm": 1.7464922666549683, |
|
"learning_rate": 0.004013368983957219, |
|
"loss": 0.4327, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9182242990654206, |
|
"eval_f1": 0.9007413709436916, |
|
"eval_loss": 0.21109923720359802, |
|
"eval_precision": 0.9299210483133126, |
|
"eval_recall": 0.8921235393972065, |
|
"eval_runtime": 10.4332, |
|
"eval_samples_per_second": 164.091, |
|
"eval_steps_per_second": 10.256, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.0320855614973263, |
|
"grad_norm": 1.444707989692688, |
|
"learning_rate": 0.003986631016042781, |
|
"loss": 0.478, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.085561497326203, |
|
"grad_norm": 1.4123905897140503, |
|
"learning_rate": 0.003959893048128342, |
|
"loss": 0.5, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.1390374331550803, |
|
"grad_norm": 2.96335768699646, |
|
"learning_rate": 0.003933155080213904, |
|
"loss": 0.5348, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.192513368983957, |
|
"grad_norm": 1.4397529363632202, |
|
"learning_rate": 0.0039064171122994654, |
|
"loss": 0.4571, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.2459893048128343, |
|
"grad_norm": 1.821366548538208, |
|
"learning_rate": 0.0038796791443850265, |
|
"loss": 0.4982, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.299465240641711, |
|
"grad_norm": 2.112130641937256, |
|
"learning_rate": 0.0038529411764705885, |
|
"loss": 0.4343, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 1.942734956741333, |
|
"learning_rate": 0.00382620320855615, |
|
"loss": 0.5078, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.406417112299465, |
|
"grad_norm": 2.774502754211426, |
|
"learning_rate": 0.003799465240641711, |
|
"loss": 0.4016, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.4598930481283423, |
|
"grad_norm": 2.139463424682617, |
|
"learning_rate": 0.0037727272727272726, |
|
"loss": 0.5415, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.5133689839572195, |
|
"grad_norm": 1.9148341417312622, |
|
"learning_rate": 0.003745989304812834, |
|
"loss": 0.4417, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.5668449197860963, |
|
"grad_norm": 1.9109567403793335, |
|
"learning_rate": 0.003719251336898396, |
|
"loss": 0.4273, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.620320855614973, |
|
"grad_norm": 2.2219059467315674, |
|
"learning_rate": 0.0036925133689839572, |
|
"loss": 0.5218, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.6737967914438503, |
|
"grad_norm": 3.378606081008911, |
|
"learning_rate": 0.0036657754010695188, |
|
"loss": 0.4318, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 1.668760061264038, |
|
"learning_rate": 0.0036390374331550803, |
|
"loss": 0.4447, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.7807486631016043, |
|
"grad_norm": 1.830342411994934, |
|
"learning_rate": 0.0036122994652406414, |
|
"loss": 0.4507, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.834224598930481, |
|
"grad_norm": 2.2146425247192383, |
|
"learning_rate": 0.0035855614973262034, |
|
"loss": 0.4127, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.8877005347593583, |
|
"grad_norm": 1.3959295749664307, |
|
"learning_rate": 0.003558823529411765, |
|
"loss": 0.4353, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 1.844604253768921, |
|
"learning_rate": 0.0035320855614973264, |
|
"loss": 0.3488, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.9946524064171123, |
|
"grad_norm": 1.421885371208191, |
|
"learning_rate": 0.0035053475935828875, |
|
"loss": 0.3977, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9339953271028038, |
|
"eval_f1": 0.924420495312186, |
|
"eval_loss": 0.17427141964435577, |
|
"eval_precision": 0.9228598461246502, |
|
"eval_recall": 0.928247943129569, |
|
"eval_runtime": 9.981, |
|
"eval_samples_per_second": 171.527, |
|
"eval_steps_per_second": 10.72, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 3.0481283422459895, |
|
"grad_norm": 2.2883894443511963, |
|
"learning_rate": 0.003478609625668449, |
|
"loss": 0.3909, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.1016042780748663, |
|
"grad_norm": 2.4753079414367676, |
|
"learning_rate": 0.003451871657754011, |
|
"loss": 0.4352, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.1550802139037435, |
|
"grad_norm": 2.298736572265625, |
|
"learning_rate": 0.0034251336898395725, |
|
"loss": 0.4641, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.2085561497326203, |
|
"grad_norm": 1.4368634223937988, |
|
"learning_rate": 0.0033983957219251336, |
|
"loss": 0.4225, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.2620320855614975, |
|
"grad_norm": 1.462842583656311, |
|
"learning_rate": 0.003371657754010695, |
|
"loss": 0.3958, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.3155080213903743, |
|
"grad_norm": 2.449066638946533, |
|
"learning_rate": 0.0033449197860962567, |
|
"loss": 0.3784, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.3689839572192515, |
|
"grad_norm": 1.5616710186004639, |
|
"learning_rate": 0.0033181818181818186, |
|
"loss": 0.4476, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.4224598930481283, |
|
"grad_norm": 2.284454345703125, |
|
"learning_rate": 0.0032914438502673797, |
|
"loss": 0.3725, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.4759358288770055, |
|
"grad_norm": 1.5143663883209229, |
|
"learning_rate": 0.0032647058823529413, |
|
"loss": 0.4597, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.5294117647058822, |
|
"grad_norm": 1.6112128496170044, |
|
"learning_rate": 0.003237967914438503, |
|
"loss": 0.4198, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.5828877005347595, |
|
"grad_norm": 1.2612804174423218, |
|
"learning_rate": 0.003211229946524064, |
|
"loss": 0.4785, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 2.0233500003814697, |
|
"learning_rate": 0.0031844919786096254, |
|
"loss": 0.4276, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.6898395721925135, |
|
"grad_norm": 1.2161093950271606, |
|
"learning_rate": 0.0031577540106951874, |
|
"loss": 0.3865, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.7433155080213902, |
|
"grad_norm": 1.835656762123108, |
|
"learning_rate": 0.003131016042780749, |
|
"loss": 0.3202, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.7967914438502675, |
|
"grad_norm": 2.9908785820007324, |
|
"learning_rate": 0.00310427807486631, |
|
"loss": 0.3879, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.8502673796791442, |
|
"grad_norm": 1.587223768234253, |
|
"learning_rate": 0.0030775401069518715, |
|
"loss": 0.3682, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.9037433155080214, |
|
"grad_norm": 2.0039021968841553, |
|
"learning_rate": 0.003050802139037433, |
|
"loss": 0.4148, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.9572192513368982, |
|
"grad_norm": 1.8037409782409668, |
|
"learning_rate": 0.003024064171122995, |
|
"loss": 0.3318, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9351635514018691, |
|
"eval_f1": 0.928485806906975, |
|
"eval_loss": 0.17756415903568268, |
|
"eval_precision": 0.9248343621199285, |
|
"eval_recall": 0.9352570988138212, |
|
"eval_runtime": 10.1719, |
|
"eval_samples_per_second": 168.307, |
|
"eval_steps_per_second": 10.519, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 4.010695187165775, |
|
"grad_norm": 2.230004072189331, |
|
"learning_rate": 0.002997326203208556, |
|
"loss": 0.4071, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.064171122994653, |
|
"grad_norm": 2.1018853187561035, |
|
"learning_rate": 0.0029705882352941177, |
|
"loss": 0.3498, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.117647058823529, |
|
"grad_norm": 1.6814857721328735, |
|
"learning_rate": 0.002943850267379679, |
|
"loss": 0.4085, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.171122994652406, |
|
"grad_norm": 2.0869903564453125, |
|
"learning_rate": 0.0029171122994652403, |
|
"loss": 0.4481, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.224598930481283, |
|
"grad_norm": 1.4043067693710327, |
|
"learning_rate": 0.0028903743315508022, |
|
"loss": 0.3234, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.278074866310161, |
|
"grad_norm": 2.0766959190368652, |
|
"learning_rate": 0.0028636363636363638, |
|
"loss": 0.3719, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.331550802139038, |
|
"grad_norm": 1.85934317111969, |
|
"learning_rate": 0.0028368983957219253, |
|
"loss": 0.4784, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.385026737967914, |
|
"grad_norm": 2.3728232383728027, |
|
"learning_rate": 0.0028101604278074864, |
|
"loss": 0.3704, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.438502673796791, |
|
"grad_norm": 1.2759883403778076, |
|
"learning_rate": 0.002783422459893048, |
|
"loss": 0.3283, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.491978609625669, |
|
"grad_norm": 1.2006633281707764, |
|
"learning_rate": 0.00275668449197861, |
|
"loss": 0.3792, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"grad_norm": 2.0884652137756348, |
|
"learning_rate": 0.0027299465240641714, |
|
"loss": 0.4041, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.598930481283422, |
|
"grad_norm": 1.281827688217163, |
|
"learning_rate": 0.0027032085561497325, |
|
"loss": 0.352, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.652406417112299, |
|
"grad_norm": 1.7143138647079468, |
|
"learning_rate": 0.002676470588235294, |
|
"loss": 0.3896, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 2.069678544998169, |
|
"learning_rate": 0.0026497326203208556, |
|
"loss": 0.335, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.759358288770054, |
|
"grad_norm": 1.6988319158554077, |
|
"learning_rate": 0.0026229946524064175, |
|
"loss": 0.3693, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.81283422459893, |
|
"grad_norm": 1.6188457012176514, |
|
"learning_rate": 0.0025962566844919786, |
|
"loss": 0.337, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.866310160427807, |
|
"grad_norm": 2.0478222370147705, |
|
"learning_rate": 0.00256951871657754, |
|
"loss": 0.3156, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.919786096256685, |
|
"grad_norm": 1.7088401317596436, |
|
"learning_rate": 0.0025427807486631017, |
|
"loss": 0.3414, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.973262032085562, |
|
"grad_norm": 1.161230444908142, |
|
"learning_rate": 0.002516042780748663, |
|
"loss": 0.3461, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9380841121495327, |
|
"eval_f1": 0.9304948103477649, |
|
"eval_loss": 0.17028363049030304, |
|
"eval_precision": 0.9311071354745837, |
|
"eval_recall": 0.9344001562456381, |
|
"eval_runtime": 10.2604, |
|
"eval_samples_per_second": 166.855, |
|
"eval_steps_per_second": 10.428, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 5.026737967914438, |
|
"grad_norm": 1.723848819732666, |
|
"learning_rate": 0.0024893048128342248, |
|
"loss": 0.3622, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.080213903743315, |
|
"grad_norm": 2.0140602588653564, |
|
"learning_rate": 0.002462566844919786, |
|
"loss": 0.3973, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.133689839572193, |
|
"grad_norm": 1.5653032064437866, |
|
"learning_rate": 0.002435828877005348, |
|
"loss": 0.3106, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.18716577540107, |
|
"grad_norm": 1.7829616069793701, |
|
"learning_rate": 0.002409090909090909, |
|
"loss": 0.3723, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.240641711229946, |
|
"grad_norm": 0.9940521717071533, |
|
"learning_rate": 0.0023823529411764704, |
|
"loss": 0.3453, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.294117647058823, |
|
"grad_norm": 1.1114059686660767, |
|
"learning_rate": 0.002355614973262032, |
|
"loss": 0.3769, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.347593582887701, |
|
"grad_norm": 0.9444433450698853, |
|
"learning_rate": 0.0023288770053475935, |
|
"loss": 0.3489, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.401069518716578, |
|
"grad_norm": 2.0856947898864746, |
|
"learning_rate": 0.002302139037433155, |
|
"loss": 0.374, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.454545454545454, |
|
"grad_norm": 1.679477572441101, |
|
"learning_rate": 0.0022754010695187166, |
|
"loss": 0.3738, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.508021390374331, |
|
"grad_norm": 1.3019518852233887, |
|
"learning_rate": 0.002248663101604278, |
|
"loss": 0.3634, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.561497326203209, |
|
"grad_norm": 1.467846155166626, |
|
"learning_rate": 0.0022219251336898396, |
|
"loss": 0.3457, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.614973262032086, |
|
"grad_norm": 1.6348631381988525, |
|
"learning_rate": 0.002195187165775401, |
|
"loss": 0.3216, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.668449197860962, |
|
"grad_norm": 1.158215880393982, |
|
"learning_rate": 0.0021684491978609627, |
|
"loss": 0.3033, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.721925133689839, |
|
"grad_norm": 0.8872423768043518, |
|
"learning_rate": 0.002141711229946524, |
|
"loss": 0.2919, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.775401069518717, |
|
"grad_norm": 1.9146243333816528, |
|
"learning_rate": 0.0021149732620320857, |
|
"loss": 0.3228, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.828877005347594, |
|
"grad_norm": 1.7084169387817383, |
|
"learning_rate": 0.0020882352941176473, |
|
"loss": 0.2754, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 1.0626111030578613, |
|
"learning_rate": 0.0020614973262032084, |
|
"loss": 0.3165, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.935828877005347, |
|
"grad_norm": 1.8155293464660645, |
|
"learning_rate": 0.00203475935828877, |
|
"loss": 0.2815, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.989304812834225, |
|
"grad_norm": 1.8623782396316528, |
|
"learning_rate": 0.0020080213903743314, |
|
"loss": 0.3309, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9369158878504673, |
|
"eval_f1": 0.9334719219156348, |
|
"eval_loss": 0.19556888937950134, |
|
"eval_precision": 0.9335706750233659, |
|
"eval_recall": 0.9396740716392903, |
|
"eval_runtime": 10.2767, |
|
"eval_samples_per_second": 166.591, |
|
"eval_steps_per_second": 10.412, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 6.042780748663102, |
|
"grad_norm": 1.1055293083190918, |
|
"learning_rate": 0.001981283422459893, |
|
"loss": 0.3202, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.096256684491979, |
|
"grad_norm": 1.7265422344207764, |
|
"learning_rate": 0.0019545454545454545, |
|
"loss": 0.2973, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.149732620320855, |
|
"grad_norm": 2.0242912769317627, |
|
"learning_rate": 0.001927807486631016, |
|
"loss": 0.302, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.2032085561497325, |
|
"grad_norm": 1.0210644006729126, |
|
"learning_rate": 0.0019010695187165775, |
|
"loss": 0.2785, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.25668449197861, |
|
"grad_norm": 1.5111178159713745, |
|
"learning_rate": 0.001874331550802139, |
|
"loss": 0.2873, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.310160427807487, |
|
"grad_norm": 1.060488224029541, |
|
"learning_rate": 0.0018475935828877006, |
|
"loss": 0.321, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.363636363636363, |
|
"grad_norm": 1.0627189874649048, |
|
"learning_rate": 0.0018208556149732621, |
|
"loss": 0.2682, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.4171122994652405, |
|
"grad_norm": 1.1237576007843018, |
|
"learning_rate": 0.0017941176470588236, |
|
"loss": 0.2383, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.470588235294118, |
|
"grad_norm": 1.6101592779159546, |
|
"learning_rate": 0.001767379679144385, |
|
"loss": 0.3197, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.524064171122995, |
|
"grad_norm": 0.6864691972732544, |
|
"learning_rate": 0.0017406417112299467, |
|
"loss": 0.2307, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.577540106951871, |
|
"grad_norm": 1.339308500289917, |
|
"learning_rate": 0.001713903743315508, |
|
"loss": 0.2534, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.6310160427807485, |
|
"grad_norm": 1.3319642543792725, |
|
"learning_rate": 0.0016871657754010698, |
|
"loss": 0.32, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.684491978609626, |
|
"grad_norm": 1.4089816808700562, |
|
"learning_rate": 0.001660427807486631, |
|
"loss": 0.285, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.737967914438503, |
|
"grad_norm": 1.212084174156189, |
|
"learning_rate": 0.0016336898395721924, |
|
"loss": 0.2217, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.791443850267379, |
|
"grad_norm": 1.6609482765197754, |
|
"learning_rate": 0.0016069518716577541, |
|
"loss": 0.2952, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.8449197860962565, |
|
"grad_norm": 1.060892105102539, |
|
"learning_rate": 0.0015802139037433154, |
|
"loss": 0.2524, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.898395721925134, |
|
"grad_norm": 1.3365124464035034, |
|
"learning_rate": 0.001553475935828877, |
|
"loss": 0.2694, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.951871657754011, |
|
"grad_norm": 1.1521918773651123, |
|
"learning_rate": 0.0015267379679144385, |
|
"loss": 0.3088, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9532710280373832, |
|
"eval_f1": 0.9461125894090557, |
|
"eval_loss": 0.11792106181383133, |
|
"eval_precision": 0.9426583892398479, |
|
"eval_recall": 0.952515495389921, |
|
"eval_runtime": 10.3853, |
|
"eval_samples_per_second": 164.849, |
|
"eval_steps_per_second": 10.303, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 7.005347593582887, |
|
"grad_norm": 0.8682220578193665, |
|
"learning_rate": 0.0015, |
|
"loss": 0.2627, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 2.279827356338501, |
|
"learning_rate": 0.0014732620320855616, |
|
"loss": 0.2796, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.112299465240642, |
|
"grad_norm": 1.3697049617767334, |
|
"learning_rate": 0.001446524064171123, |
|
"loss": 0.2369, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.165775401069519, |
|
"grad_norm": 0.8857790231704712, |
|
"learning_rate": 0.0014197860962566844, |
|
"loss": 0.2648, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.219251336898395, |
|
"grad_norm": 2.053224802017212, |
|
"learning_rate": 0.0013930481283422461, |
|
"loss": 0.212, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 1.619578242301941, |
|
"learning_rate": 0.0013663101604278075, |
|
"loss": 0.2229, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.32620320855615, |
|
"grad_norm": 1.3765966892242432, |
|
"learning_rate": 0.0013395721925133692, |
|
"loss": 0.2311, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.379679144385027, |
|
"grad_norm": 1.2967066764831543, |
|
"learning_rate": 0.0013128342245989305, |
|
"loss": 0.2402, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.433155080213904, |
|
"grad_norm": 1.2961163520812988, |
|
"learning_rate": 0.0012860962566844918, |
|
"loss": 0.2318, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.4866310160427805, |
|
"grad_norm": 1.6240290403366089, |
|
"learning_rate": 0.0012593582887700536, |
|
"loss": 0.2669, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.540106951871658, |
|
"grad_norm": 1.1457808017730713, |
|
"learning_rate": 0.0012326203208556149, |
|
"loss": 0.2887, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.593582887700535, |
|
"grad_norm": 1.303931474685669, |
|
"learning_rate": 0.0012058823529411764, |
|
"loss": 0.2862, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.647058823529412, |
|
"grad_norm": 0.9429693222045898, |
|
"learning_rate": 0.001179144385026738, |
|
"loss": 0.2282, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.7005347593582885, |
|
"grad_norm": 1.349269986152649, |
|
"learning_rate": 0.0011524064171122995, |
|
"loss": 0.2414, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.754010695187166, |
|
"grad_norm": 1.185160517692566, |
|
"learning_rate": 0.001125668449197861, |
|
"loss": 0.219, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.807486631016043, |
|
"grad_norm": 1.5935460329055786, |
|
"learning_rate": 0.0010989304812834225, |
|
"loss": 0.2109, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.86096256684492, |
|
"grad_norm": 1.4563795328140259, |
|
"learning_rate": 0.001072192513368984, |
|
"loss": 0.2943, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.9144385026737964, |
|
"grad_norm": 1.2570650577545166, |
|
"learning_rate": 0.0010454545454545454, |
|
"loss": 0.2275, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.967914438502674, |
|
"grad_norm": 0.6930679082870483, |
|
"learning_rate": 0.001018716577540107, |
|
"loss": 0.2129, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9637850467289719, |
|
"eval_f1": 0.9610548371575116, |
|
"eval_loss": 0.09920904040336609, |
|
"eval_precision": 0.9569323583080014, |
|
"eval_recall": 0.9673920345290172, |
|
"eval_runtime": 10.543, |
|
"eval_samples_per_second": 162.382, |
|
"eval_steps_per_second": 10.149, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 8.02139037433155, |
|
"grad_norm": 1.4018137454986572, |
|
"learning_rate": 0.0009919786096256684, |
|
"loss": 0.2638, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.074866310160427, |
|
"grad_norm": 1.2713522911071777, |
|
"learning_rate": 0.00096524064171123, |
|
"loss": 0.2099, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.128342245989305, |
|
"grad_norm": 1.004296064376831, |
|
"learning_rate": 0.0009385026737967915, |
|
"loss": 0.1801, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.181818181818182, |
|
"grad_norm": 0.7041844129562378, |
|
"learning_rate": 0.0009117647058823529, |
|
"loss": 0.1829, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.235294117647058, |
|
"grad_norm": 1.3204301595687866, |
|
"learning_rate": 0.0008850267379679144, |
|
"loss": 0.2444, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.288770053475936, |
|
"grad_norm": 1.261974573135376, |
|
"learning_rate": 0.000858288770053476, |
|
"loss": 0.2431, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.342245989304812, |
|
"grad_norm": 0.9899649024009705, |
|
"learning_rate": 0.0008315508021390375, |
|
"loss": 0.1808, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 8.39572192513369, |
|
"grad_norm": 1.150225281715393, |
|
"learning_rate": 0.0008048128342245989, |
|
"loss": 0.2048, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 8.449197860962567, |
|
"grad_norm": 0.9454184770584106, |
|
"learning_rate": 0.0007780748663101605, |
|
"loss": 0.1919, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 8.502673796791443, |
|
"grad_norm": 1.26669442653656, |
|
"learning_rate": 0.000751336898395722, |
|
"loss": 0.1837, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 8.556149732620321, |
|
"grad_norm": 0.8547130823135376, |
|
"learning_rate": 0.0007245989304812835, |
|
"loss": 0.1774, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.609625668449198, |
|
"grad_norm": 1.8781049251556396, |
|
"learning_rate": 0.000697860962566845, |
|
"loss": 0.2202, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.663101604278076, |
|
"grad_norm": 0.7876987457275391, |
|
"learning_rate": 0.0006711229946524064, |
|
"loss": 0.1781, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.716577540106952, |
|
"grad_norm": 1.2137806415557861, |
|
"learning_rate": 0.0006443850267379679, |
|
"loss": 0.1722, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.770053475935828, |
|
"grad_norm": 1.6328903436660767, |
|
"learning_rate": 0.0006176470588235294, |
|
"loss": 0.2085, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 0.9435901641845703, |
|
"learning_rate": 0.0005909090909090909, |
|
"loss": 0.2335, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.877005347593583, |
|
"grad_norm": 1.1905876398086548, |
|
"learning_rate": 0.0005641711229946525, |
|
"loss": 0.2387, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.93048128342246, |
|
"grad_norm": 0.8758776783943176, |
|
"learning_rate": 0.0005374331550802139, |
|
"loss": 0.2265, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.983957219251337, |
|
"grad_norm": 1.3745719194412231, |
|
"learning_rate": 0.0005106951871657754, |
|
"loss": 0.2049, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.967873831775701, |
|
"eval_f1": 0.9651132770824573, |
|
"eval_loss": 0.08469934016466141, |
|
"eval_precision": 0.9626628225985181, |
|
"eval_recall": 0.9683070024371949, |
|
"eval_runtime": 10.3829, |
|
"eval_samples_per_second": 164.887, |
|
"eval_steps_per_second": 10.305, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 9.037433155080214, |
|
"grad_norm": 0.9230683445930481, |
|
"learning_rate": 0.0004839572192513369, |
|
"loss": 0.1654, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"grad_norm": 0.8362302184104919, |
|
"learning_rate": 0.0004572192513368984, |
|
"loss": 0.1918, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.144385026737968, |
|
"grad_norm": 1.3025470972061157, |
|
"learning_rate": 0.0004304812834224599, |
|
"loss": 0.1497, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.197860962566844, |
|
"grad_norm": 0.8339858055114746, |
|
"learning_rate": 0.00040374331550802143, |
|
"loss": 0.196, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.251336898395722, |
|
"grad_norm": 1.3273382186889648, |
|
"learning_rate": 0.00037700534759358285, |
|
"loss": 0.1912, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 9.304812834224599, |
|
"grad_norm": 0.5822441577911377, |
|
"learning_rate": 0.0003502673796791444, |
|
"loss": 0.1452, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 9.358288770053475, |
|
"grad_norm": 0.8451639413833618, |
|
"learning_rate": 0.0003235294117647059, |
|
"loss": 0.1877, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 1.0270066261291504, |
|
"learning_rate": 0.0002967914438502674, |
|
"loss": 0.1964, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.46524064171123, |
|
"grad_norm": 1.0621460676193237, |
|
"learning_rate": 0.00027005347593582886, |
|
"loss": 0.2015, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 9.518716577540108, |
|
"grad_norm": 0.9587564468383789, |
|
"learning_rate": 0.00024331550802139036, |
|
"loss": 0.1962, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 9.572192513368984, |
|
"grad_norm": 0.719536304473877, |
|
"learning_rate": 0.00021657754010695186, |
|
"loss": 0.1389, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 9.62566844919786, |
|
"grad_norm": 0.89113450050354, |
|
"learning_rate": 0.0001898395721925134, |
|
"loss": 0.1783, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.679144385026738, |
|
"grad_norm": 0.8831282258033752, |
|
"learning_rate": 0.0001631016042780749, |
|
"loss": 0.1871, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.732620320855615, |
|
"grad_norm": 0.6015557646751404, |
|
"learning_rate": 0.00013636363636363637, |
|
"loss": 0.1414, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.786096256684491, |
|
"grad_norm": 1.1582796573638916, |
|
"learning_rate": 0.00010962566844919787, |
|
"loss": 0.2408, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.83957219251337, |
|
"grad_norm": 0.7856789231300354, |
|
"learning_rate": 8.288770053475936e-05, |
|
"loss": 0.145, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.893048128342246, |
|
"grad_norm": 1.1010181903839111, |
|
"learning_rate": 5.614973262032086e-05, |
|
"loss": 0.1758, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.946524064171124, |
|
"grad_norm": 0.7676904797554016, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.1683, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.4464507102966309, |
|
"learning_rate": 2.6737967914438504e-06, |
|
"loss": 0.2007, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9707943925233645, |
|
"eval_f1": 0.9697517307733657, |
|
"eval_loss": 0.07853860408067703, |
|
"eval_precision": 0.9668363312878312, |
|
"eval_recall": 0.9737482240908748, |
|
"eval_runtime": 10.3924, |
|
"eval_samples_per_second": 164.735, |
|
"eval_steps_per_second": 10.296, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1870, |
|
"total_flos": 9.328175742872125e+18, |
|
"train_loss": 0.3662890907277398, |
|
"train_runtime": 1600.7009, |
|
"train_samples_per_second": 74.711, |
|
"train_steps_per_second": 1.168 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1870, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 9.328175742872125e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|