|
{ |
|
"best_metric": 0.8063241106719368, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-pi33ffzs/checkpoint-800", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 800, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 14.099751472473145, |
|
"learning_rate": 9.582405619536138e-05, |
|
"loss": 0.6743, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.672331809997559, |
|
"learning_rate": 9.522138917526476e-05, |
|
"loss": 0.6209, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 2.4487009048461914, |
|
"learning_rate": 9.461872215516814e-05, |
|
"loss": 0.608, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.88143253326416, |
|
"learning_rate": 9.401605513507153e-05, |
|
"loss": 0.5719, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 7.989026069641113, |
|
"learning_rate": 9.341338811497491e-05, |
|
"loss": 0.5645, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.12394380569458, |
|
"learning_rate": 9.281072109487831e-05, |
|
"loss": 0.4137, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 6.6350579261779785, |
|
"learning_rate": 9.22080540747817e-05, |
|
"loss": 0.5479, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.6092991828918457, |
|
"learning_rate": 9.160538705468508e-05, |
|
"loss": 0.5195, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 22.4118595123291, |
|
"learning_rate": 9.100272003458847e-05, |
|
"loss": 0.8602, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.869532585144043, |
|
"learning_rate": 9.040005301449186e-05, |
|
"loss": 0.9557, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 5.411820888519287, |
|
"learning_rate": 8.979738599439524e-05, |
|
"loss": 0.4593, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.3322949409484863, |
|
"learning_rate": 8.919471897429864e-05, |
|
"loss": 0.5916, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 2.0190422534942627, |
|
"learning_rate": 8.859205195420202e-05, |
|
"loss": 0.5737, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.8667893409729, |
|
"learning_rate": 8.798938493410541e-05, |
|
"loss": 0.4983, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 1.2699512243270874, |
|
"learning_rate": 8.738671791400879e-05, |
|
"loss": 0.5618, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 5.28828763961792, |
|
"learning_rate": 8.678405089391219e-05, |
|
"loss": 0.7923, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 9.879742622375488, |
|
"learning_rate": 8.618138387381557e-05, |
|
"loss": 0.8174, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.6045369505882263, |
|
"learning_rate": 8.557871685371895e-05, |
|
"loss": 0.284, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 0.8522588610649109, |
|
"learning_rate": 8.497604983362234e-05, |
|
"loss": 0.3718, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 9.719417572021484, |
|
"learning_rate": 8.437338281352574e-05, |
|
"loss": 1.0217, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 15.228802680969238, |
|
"learning_rate": 8.377071579342912e-05, |
|
"loss": 0.5518, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 6.107767105102539, |
|
"learning_rate": 8.316804877333251e-05, |
|
"loss": 0.5761, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 3.676274061203003, |
|
"learning_rate": 8.25653817532359e-05, |
|
"loss": 0.2779, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.597080647945404, |
|
"learning_rate": 8.196271473313928e-05, |
|
"loss": 0.2176, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.3770584762096405, |
|
"learning_rate": 8.136004771304267e-05, |
|
"loss": 0.6285, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 23.122947692871094, |
|
"learning_rate": 8.075738069294607e-05, |
|
"loss": 0.5774, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 0.3317395746707916, |
|
"learning_rate": 8.015471367284945e-05, |
|
"loss": 0.76, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.390415906906128, |
|
"learning_rate": 7.955204665275283e-05, |
|
"loss": 0.502, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 4.841302871704102, |
|
"learning_rate": 7.894937963265622e-05, |
|
"loss": 0.4339, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 7.026674747467041, |
|
"learning_rate": 7.83467126125596e-05, |
|
"loss": 0.6468, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 0.8100307583808899, |
|
"learning_rate": 7.7744045592463e-05, |
|
"loss": 0.6812, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.845278739929199, |
|
"learning_rate": 7.714137857236639e-05, |
|
"loss": 0.3495, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 22.603090286254883, |
|
"learning_rate": 7.653871155226977e-05, |
|
"loss": 0.5564, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.4318823218345642, |
|
"learning_rate": 7.593604453217316e-05, |
|
"loss": 0.5139, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 12.046862602233887, |
|
"learning_rate": 7.533337751207655e-05, |
|
"loss": 0.7725, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 15.319775581359863, |
|
"learning_rate": 7.473071049197993e-05, |
|
"loss": 0.4356, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 25.348968505859375, |
|
"learning_rate": 7.412804347188333e-05, |
|
"loss": 0.4134, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 21.941987991333008, |
|
"learning_rate": 7.35253764517867e-05, |
|
"loss": 0.8063, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 4.438033103942871, |
|
"learning_rate": 7.29227094316901e-05, |
|
"loss": 0.5669, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.872718811035156, |
|
"learning_rate": 7.232004241159348e-05, |
|
"loss": 0.5456, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8075, |
|
"eval_f1": 0.5792349726775956, |
|
"eval_loss": 0.5038720369338989, |
|
"eval_precision": 0.8833333333333333, |
|
"eval_recall": 0.43089430894308944, |
|
"eval_runtime": 1.5184, |
|
"eval_samples_per_second": 263.438, |
|
"eval_steps_per_second": 16.465, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 0.6440209150314331, |
|
"learning_rate": 7.171737539149688e-05, |
|
"loss": 0.3559, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 10.962855339050293, |
|
"learning_rate": 7.111470837140027e-05, |
|
"loss": 0.2384, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 0.5640668869018555, |
|
"learning_rate": 7.051204135130364e-05, |
|
"loss": 0.5156, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.4815268814563751, |
|
"learning_rate": 6.990937433120703e-05, |
|
"loss": 0.2444, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 6.384509086608887, |
|
"learning_rate": 6.930670731111043e-05, |
|
"loss": 0.2974, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.1514960080385208, |
|
"learning_rate": 6.870404029101381e-05, |
|
"loss": 0.3388, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 8.39075756072998, |
|
"learning_rate": 6.81013732709172e-05, |
|
"loss": 0.7306, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.8284034132957458, |
|
"learning_rate": 6.749870625082059e-05, |
|
"loss": 0.5125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 7.400186538696289, |
|
"learning_rate": 6.689603923072397e-05, |
|
"loss": 0.2146, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.23411034047603607, |
|
"learning_rate": 6.629337221062736e-05, |
|
"loss": 0.2123, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 4.417282581329346, |
|
"learning_rate": 6.569070519053076e-05, |
|
"loss": 0.5131, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 5.702905178070068, |
|
"learning_rate": 6.508803817043414e-05, |
|
"loss": 0.794, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 0.25387826561927795, |
|
"learning_rate": 6.448537115033752e-05, |
|
"loss": 0.268, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 4.115130424499512, |
|
"learning_rate": 6.388270413024091e-05, |
|
"loss": 0.5022, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 1.3796603679656982, |
|
"learning_rate": 6.32800371101443e-05, |
|
"loss": 0.4664, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 9.150830268859863, |
|
"learning_rate": 6.267737009004769e-05, |
|
"loss": 0.496, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 0.4542056918144226, |
|
"learning_rate": 6.207470306995108e-05, |
|
"loss": 0.2549, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 29.90502166748047, |
|
"learning_rate": 6.147203604985446e-05, |
|
"loss": 0.6464, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 1.0268361568450928, |
|
"learning_rate": 6.0869369029757845e-05, |
|
"loss": 0.2584, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.35682424902915955, |
|
"learning_rate": 6.026670200966124e-05, |
|
"loss": 0.5534, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 8.86542797088623, |
|
"learning_rate": 5.966403498956463e-05, |
|
"loss": 0.5656, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 8.581369400024414, |
|
"learning_rate": 5.9061367969468016e-05, |
|
"loss": 0.5376, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 4.579167366027832, |
|
"learning_rate": 5.84587009493714e-05, |
|
"loss": 0.3394, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.4072767198085785, |
|
"learning_rate": 5.7856033929274785e-05, |
|
"loss": 0.8387, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 0.8675380945205688, |
|
"learning_rate": 5.725336690917818e-05, |
|
"loss": 0.4963, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 5.402355670928955, |
|
"learning_rate": 5.665069988908157e-05, |
|
"loss": 0.6346, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 0.9370933175086975, |
|
"learning_rate": 5.6048032868984955e-05, |
|
"loss": 0.3953, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 5.052032947540283, |
|
"learning_rate": 5.5445365848888336e-05, |
|
"loss": 0.4608, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 0.40091681480407715, |
|
"learning_rate": 5.4842698828791724e-05, |
|
"loss": 0.4279, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.348732590675354, |
|
"learning_rate": 5.424003180869511e-05, |
|
"loss": 0.0545, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 0.14755772054195404, |
|
"learning_rate": 5.3637364788598506e-05, |
|
"loss": 0.5262, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 11.312381744384766, |
|
"learning_rate": 5.3034697768501894e-05, |
|
"loss": 0.5846, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.825, |
|
"grad_norm": 0.31017163395881653, |
|
"learning_rate": 5.2432030748405275e-05, |
|
"loss": 0.4936, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.26326823234558105, |
|
"learning_rate": 5.182936372830866e-05, |
|
"loss": 0.1889, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.2871868312358856, |
|
"learning_rate": 5.122669670821205e-05, |
|
"loss": 0.1512, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.174193874001503, |
|
"learning_rate": 5.062402968811544e-05, |
|
"loss": 0.1859, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.925, |
|
"grad_norm": 5.3941330909729, |
|
"learning_rate": 5.0021362668018834e-05, |
|
"loss": 0.8953, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 5.847171783447266, |
|
"learning_rate": 4.941869564792221e-05, |
|
"loss": 0.4321, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.975, |
|
"grad_norm": 5.563133716583252, |
|
"learning_rate": 4.88160286278256e-05, |
|
"loss": 0.61, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.7091472148895264, |
|
"learning_rate": 4.821336160772899e-05, |
|
"loss": 0.3231, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8775, |
|
"eval_f1": 0.8063241106719368, |
|
"eval_loss": 0.375855952501297, |
|
"eval_precision": 0.7846153846153846, |
|
"eval_recall": 0.8292682926829268, |
|
"eval_runtime": 1.5214, |
|
"eval_samples_per_second": 262.914, |
|
"eval_steps_per_second": 16.432, |
|
"step": 800 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 423630740901888.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 9.642672321545798e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 4, |
|
"per_device_train_batch_size": 4, |
|
"seed": 12 |
|
} |
|
} |
|
|