{ "best_global_step": 2550, "best_metric": 0.08080464601516724, "best_model_checkpoint": "./turkish-toxic-bert-full\\checkpoint-2550", "epoch": 0.749173705471906, "eval_steps": 850, "global_step": 2550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.049944913698127064, "grad_norm": 10.951066970825195, "learning_rate": 9.941176470588236e-06, "loss": 0.6808, "step": 170 }, { "epoch": 0.09988982739625413, "grad_norm": 2.395719051361084, "learning_rate": 1.9941176470588238e-05, "loss": 0.2552, "step": 340 }, { "epoch": 0.1498347410943812, "grad_norm": 31.97089385986328, "learning_rate": 1.9657617504051864e-05, "loss": 0.1784, "step": 510 }, { "epoch": 0.19977965479250825, "grad_norm": 3.0786185264587402, "learning_rate": 1.931320907617504e-05, "loss": 0.1626, "step": 680 }, { "epoch": 0.24972456849063532, "grad_norm": 36.20637893676758, "learning_rate": 1.896880064829822e-05, "loss": 0.163, "step": 850 }, { "epoch": 0.24972456849063532, "eval_accuracy": 0.9607969151670951, "eval_loss": 0.12955638766288757, "eval_runtime": 632.0606, "eval_samples_per_second": 12.309, "eval_steps_per_second": 3.077, "step": 850 }, { "epoch": 0.2996694821887624, "grad_norm": 13.24213695526123, "learning_rate": 1.8624392220421394e-05, "loss": 0.1494, "step": 1020 }, { "epoch": 0.3496143958868895, "grad_norm": 7.672886371612549, "learning_rate": 1.8279983792544573e-05, "loss": 0.1403, "step": 1190 }, { "epoch": 0.3995593095850165, "grad_norm": 1.0008550882339478, "learning_rate": 1.7935575364667748e-05, "loss": 0.133, "step": 1360 }, { "epoch": 0.4495042232831436, "grad_norm": 15.496179580688477, "learning_rate": 1.7591166936790924e-05, "loss": 0.1059, "step": 1530 }, { "epoch": 0.49944913698127064, "grad_norm": 13.099618911743164, "learning_rate": 1.7246758508914102e-05, "loss": 0.1281, "step": 1700 }, { "epoch": 0.49944913698127064, "eval_accuracy": 0.9701799485861182, "eval_loss": 0.0947001576423645, "eval_runtime": 632.2141, "eval_samples_per_second": 12.306, "eval_steps_per_second": 3.076, "step": 1700 }, { "epoch": 0.5493940506793977, "grad_norm": 9.940011024475098, "learning_rate": 1.6902350081037278e-05, "loss": 0.1258, "step": 1870 }, { "epoch": 0.5993389643775248, "grad_norm": 20.614500045776367, "learning_rate": 1.6557941653160453e-05, "loss": 0.0975, "step": 2040 }, { "epoch": 0.6492838780756518, "grad_norm": 12.049532890319824, "learning_rate": 1.6213533225283632e-05, "loss": 0.1297, "step": 2210 }, { "epoch": 0.699228791773779, "grad_norm": 2.572075843811035, "learning_rate": 1.5869124797406807e-05, "loss": 0.1329, "step": 2380 }, { "epoch": 0.749173705471906, "grad_norm": 7.409031391143799, "learning_rate": 1.5524716369529983e-05, "loss": 0.1162, "step": 2550 }, { "epoch": 0.749173705471906, "eval_accuracy": 0.9699228791773779, "eval_loss": 0.08080464601516724, "eval_runtime": 632.5405, "eval_samples_per_second": 12.3, "eval_steps_per_second": 3.075, "step": 2550 } ], "logging_steps": 170, "max_steps": 10212, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2550, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5367465529344000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }