{ "best_metric": 0.7872340425531915, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-62", "epoch": 3.237704918032787, "eval_steps": 500, "global_step": 244, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.040983606557377046, "grad_norm": 5.660297393798828, "learning_rate": 2e-05, "loss": 0.3242, "step": 10 }, { "epoch": 0.08196721311475409, "grad_norm": 31.36193084716797, "learning_rate": 4e-05, "loss": 0.2215, "step": 20 }, { "epoch": 0.12295081967213115, "grad_norm": 3.619921922683716, "learning_rate": 4.8858447488584476e-05, "loss": 0.3642, "step": 30 }, { "epoch": 0.16393442622950818, "grad_norm": 2.1614866256713867, "learning_rate": 4.657534246575342e-05, "loss": 1.1585, "step": 40 }, { "epoch": 0.20491803278688525, "grad_norm": 5.980413436889648, "learning_rate": 4.4292237442922375e-05, "loss": 0.5672, "step": 50 }, { "epoch": 0.2459016393442623, "grad_norm": 20.064746856689453, "learning_rate": 4.200913242009132e-05, "loss": 0.4643, "step": 60 }, { "epoch": 0.2540983606557377, "eval_accuracy": 0.7872340425531915, "eval_loss": 0.7600494027137756, "eval_runtime": 3.3948, "eval_samples_per_second": 13.845, "eval_steps_per_second": 3.535, "step": 62 }, { "epoch": 1.0327868852459017, "grad_norm": 32.691917419433594, "learning_rate": 3.9726027397260274e-05, "loss": 0.6524, "step": 70 }, { "epoch": 1.0737704918032787, "grad_norm": 27.927385330200195, "learning_rate": 3.744292237442922e-05, "loss": 0.4143, "step": 80 }, { "epoch": 1.1147540983606556, "grad_norm": 73.20063781738281, "learning_rate": 3.5159817351598174e-05, "loss": 0.5154, "step": 90 }, { "epoch": 1.1557377049180328, "grad_norm": 14.900442123413086, "learning_rate": 3.287671232876712e-05, "loss": 0.7259, "step": 100 }, { "epoch": 1.1967213114754098, "grad_norm": 15.045758247375488, "learning_rate": 3.059360730593607e-05, "loss": 0.2435, "step": 110 }, { "epoch": 1.2377049180327868, "grad_norm": 1.9350297451019287, "learning_rate": 2.8310502283105023e-05, "loss": 0.479, "step": 120 }, { "epoch": 1.2540983606557377, "eval_accuracy": 0.5957446808510638, "eval_loss": 1.3025503158569336, "eval_runtime": 3.3706, "eval_samples_per_second": 13.944, "eval_steps_per_second": 3.56, "step": 124 }, { "epoch": 2.0245901639344264, "grad_norm": 80.35542297363281, "learning_rate": 2.6027397260273973e-05, "loss": 0.4446, "step": 130 }, { "epoch": 2.0655737704918034, "grad_norm": 48.855430603027344, "learning_rate": 2.3744292237442922e-05, "loss": 0.3023, "step": 140 }, { "epoch": 2.1065573770491803, "grad_norm": 5.501039028167725, "learning_rate": 2.1461187214611872e-05, "loss": 0.0495, "step": 150 }, { "epoch": 2.1475409836065573, "grad_norm": 42.64323425292969, "learning_rate": 1.9178082191780822e-05, "loss": 0.2239, "step": 160 }, { "epoch": 2.1885245901639343, "grad_norm": 0.1286790668964386, "learning_rate": 1.689497716894977e-05, "loss": 0.2566, "step": 170 }, { "epoch": 2.2295081967213113, "grad_norm": 0.8700327277183533, "learning_rate": 1.4611872146118721e-05, "loss": 0.3048, "step": 180 }, { "epoch": 2.2540983606557377, "eval_accuracy": 0.6808510638297872, "eval_loss": 0.9859520196914673, "eval_runtime": 3.3691, "eval_samples_per_second": 13.95, "eval_steps_per_second": 3.562, "step": 186 }, { "epoch": 3.0163934426229506, "grad_norm": 10.654744148254395, "learning_rate": 1.2328767123287671e-05, "loss": 0.1189, "step": 190 }, { "epoch": 3.057377049180328, "grad_norm": 0.2807650864124298, "learning_rate": 1.004566210045662e-05, "loss": 0.1767, "step": 200 }, { "epoch": 3.098360655737705, "grad_norm": 29.071613311767578, "learning_rate": 7.76255707762557e-06, "loss": 0.233, "step": 210 }, { "epoch": 3.139344262295082, "grad_norm": 7.512803554534912, "learning_rate": 5.479452054794521e-06, "loss": 0.0168, "step": 220 }, { "epoch": 3.180327868852459, "grad_norm": 1.4460692405700684, "learning_rate": 3.19634703196347e-06, "loss": 0.1046, "step": 230 }, { "epoch": 3.221311475409836, "grad_norm": 0.054721295833587646, "learning_rate": 9.132420091324201e-07, "loss": 0.2324, "step": 240 }, { "epoch": 3.237704918032787, "eval_accuracy": 0.7659574468085106, "eval_loss": 0.98479163646698, "eval_runtime": 3.9531, "eval_samples_per_second": 11.89, "eval_steps_per_second": 3.036, "step": 244 }, { "epoch": 3.237704918032787, "step": 244, "total_flos": 1.2049981873389896e+18, "train_loss": 0.3545878718866677, "train_runtime": 283.1012, "train_samples_per_second": 3.448, "train_steps_per_second": 0.862 }, { "epoch": 3.237704918032787, "eval_accuracy": 0.7058823529411765, "eval_loss": 0.8797193765640259, "eval_runtime": 7.4733, "eval_samples_per_second": 13.649, "eval_steps_per_second": 3.479, "step": 244 }, { "epoch": 3.237704918032787, "eval_accuracy": 0.7058823529411765, "eval_loss": 0.8797194361686707, "eval_runtime": 7.5173, "eval_samples_per_second": 13.569, "eval_steps_per_second": 3.459, "step": 244 } ], "logging_steps": 10, "max_steps": 244, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2049981873389896e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }