{ "best_metric": 13.1948, "best_model_checkpoint": "/content/tst-translation/checkpoint-4000", "epoch": 20.0, "eval_steps": 400, "global_step": 4020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.9900497512437811, "grad_norm": 2.333070993423462, "learning_rate": 0.00045024875621890546, "loss": 3.4257, "step": 400 }, { "epoch": 1.9900497512437811, "eval_bleu": 4.1008, "eval_gen_len": 77.8284, "eval_loss": 2.1086840629577637, "eval_runtime": 201.8226, "eval_samples_per_second": 1.992, "eval_steps_per_second": 0.129, "step": 400 }, { "epoch": 3.9800995024875623, "grad_norm": 2.2885234355926514, "learning_rate": 0.00040049751243781097, "loss": 1.8571, "step": 800 }, { "epoch": 3.9800995024875623, "eval_bleu": 8.6198, "eval_gen_len": 61.1418, "eval_loss": 1.9292024374008179, "eval_runtime": 149.5469, "eval_samples_per_second": 2.688, "eval_steps_per_second": 0.174, "step": 800 }, { "epoch": 5.970149253731344, "grad_norm": 1.9040518999099731, "learning_rate": 0.0003507462686567164, "loss": 1.2467, "step": 1200 }, { "epoch": 5.970149253731344, "eval_bleu": 10.7074, "eval_gen_len": 48.3184, "eval_loss": 1.9778931140899658, "eval_runtime": 82.3257, "eval_samples_per_second": 4.883, "eval_steps_per_second": 0.316, "step": 1200 }, { "epoch": 7.960199004975125, "grad_norm": 1.6420375108718872, "learning_rate": 0.00030099502487562194, "loss": 0.8749, "step": 1600 }, { "epoch": 7.960199004975125, "eval_bleu": 11.8538, "eval_gen_len": 49.3483, "eval_loss": 2.0539379119873047, "eval_runtime": 80.462, "eval_samples_per_second": 4.996, "eval_steps_per_second": 0.323, "step": 1600 }, { "epoch": 9.950248756218905, "grad_norm": 1.6268000602722168, "learning_rate": 0.0002512437810945274, "loss": 0.6141, "step": 2000 }, { "epoch": 9.950248756218905, "eval_bleu": 12.4452, "eval_gen_len": 51.1269, "eval_loss": 2.1948225498199463, "eval_runtime": 88.7102, "eval_samples_per_second": 4.532, "eval_steps_per_second": 0.293, "step": 2000 }, { "epoch": 11.940298507462687, "grad_norm": 1.412307858467102, "learning_rate": 0.00020149253731343284, "loss": 0.4446, "step": 2400 }, { "epoch": 11.940298507462687, "eval_bleu": 12.3052, "eval_gen_len": 48.0995, "eval_loss": 2.3901803493499756, "eval_runtime": 67.4373, "eval_samples_per_second": 5.961, "eval_steps_per_second": 0.386, "step": 2400 }, { "epoch": 13.930348258706468, "grad_norm": 1.7401809692382812, "learning_rate": 0.00015174129353233832, "loss": 0.3251, "step": 2800 }, { "epoch": 13.930348258706468, "eval_bleu": 12.5824, "eval_gen_len": 49.1244, "eval_loss": 2.5697591304779053, "eval_runtime": 74.1886, "eval_samples_per_second": 5.419, "eval_steps_per_second": 0.35, "step": 2800 }, { "epoch": 15.92039800995025, "grad_norm": 1.2750484943389893, "learning_rate": 0.00010199004975124378, "loss": 0.2501, "step": 3200 }, { "epoch": 15.92039800995025, "eval_bleu": 13.0619, "eval_gen_len": 50.6095, "eval_loss": 2.663144588470459, "eval_runtime": 80.5868, "eval_samples_per_second": 4.988, "eval_steps_per_second": 0.323, "step": 3200 }, { "epoch": 17.91044776119403, "grad_norm": 0.941927433013916, "learning_rate": 5.223880597014925e-05, "loss": 0.1986, "step": 3600 }, { "epoch": 17.91044776119403, "eval_bleu": 13.0557, "eval_gen_len": 51.1443, "eval_loss": 2.787724494934082, "eval_runtime": 83.6803, "eval_samples_per_second": 4.804, "eval_steps_per_second": 0.311, "step": 3600 }, { "epoch": 19.90049751243781, "grad_norm": 1.0183725357055664, "learning_rate": 2.4875621890547264e-06, "loss": 0.1692, "step": 4000 }, { "epoch": 19.90049751243781, "eval_bleu": 13.1948, "eval_gen_len": 49.9179, "eval_loss": 2.8421177864074707, "eval_runtime": 82.7516, "eval_samples_per_second": 4.858, "eval_steps_per_second": 0.314, "step": 4000 }, { "epoch": 20.0, "step": 4020, "total_flos": 4364211764689920.0, "train_loss": 0.9366569099141591, "train_runtime": 4542.9296, "train_samples_per_second": 7.075, "train_steps_per_second": 0.885 } ], "logging_steps": 400, "max_steps": 4020, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4364211764689920.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }