|
{ |
|
"best_metric": 0.28307044506073, |
|
"best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e3l57-l/checkpoint-16000", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 16287, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09211495946941783, |
|
"grad_norm": 3.071866512298584, |
|
"learning_rate": 4.84647506755097e-07, |
|
"loss": 0.3448, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09211495946941783, |
|
"eval_loss": 1.0838947296142578, |
|
"eval_runtime": 76.5304, |
|
"eval_samples_per_second": 15.772, |
|
"eval_steps_per_second": 1.973, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18422991893883567, |
|
"grad_norm": 2.2681832313537598, |
|
"learning_rate": 4.69295013510194e-07, |
|
"loss": 0.2708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18422991893883567, |
|
"eval_loss": 0.8948286175727844, |
|
"eval_runtime": 76.7259, |
|
"eval_samples_per_second": 15.731, |
|
"eval_steps_per_second": 1.968, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2763448784082535, |
|
"grad_norm": 0.00790461990982294, |
|
"learning_rate": 4.5394252026529107e-07, |
|
"loss": 0.177, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2763448784082535, |
|
"eval_loss": 0.8396429419517517, |
|
"eval_runtime": 78.5505, |
|
"eval_samples_per_second": 15.366, |
|
"eval_steps_per_second": 1.922, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36845983787767134, |
|
"grad_norm": 0.29154184460639954, |
|
"learning_rate": 4.385900270203881e-07, |
|
"loss": 0.1831, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.36845983787767134, |
|
"eval_loss": 0.7720839977264404, |
|
"eval_runtime": 78.1799, |
|
"eval_samples_per_second": 15.439, |
|
"eval_steps_per_second": 1.931, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.46057479734708917, |
|
"grad_norm": 186.6424102783203, |
|
"learning_rate": 4.232375337754851e-07, |
|
"loss": 0.2038, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46057479734708917, |
|
"eval_loss": 0.744644284248352, |
|
"eval_runtime": 76.5359, |
|
"eval_samples_per_second": 15.77, |
|
"eval_steps_per_second": 1.973, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.552689756816507, |
|
"grad_norm": 0.009510820731520653, |
|
"learning_rate": 4.0788504053058217e-07, |
|
"loss": 0.1309, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.552689756816507, |
|
"eval_loss": 0.7234784960746765, |
|
"eval_runtime": 76.6741, |
|
"eval_samples_per_second": 15.742, |
|
"eval_steps_per_second": 1.969, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6448047162859248, |
|
"grad_norm": 542.5567626953125, |
|
"learning_rate": 3.925325472856792e-07, |
|
"loss": 0.1431, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6448047162859248, |
|
"eval_loss": 0.6690872311592102, |
|
"eval_runtime": 76.4873, |
|
"eval_samples_per_second": 15.78, |
|
"eval_steps_per_second": 1.974, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7369196757553427, |
|
"grad_norm": 3.6845791339874268, |
|
"learning_rate": 3.771800540407762e-07, |
|
"loss": 0.1411, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7369196757553427, |
|
"eval_loss": 0.643679141998291, |
|
"eval_runtime": 76.4578, |
|
"eval_samples_per_second": 15.786, |
|
"eval_steps_per_second": 1.975, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8290346352247605, |
|
"grad_norm": 0.005153482314199209, |
|
"learning_rate": 3.618275607958732e-07, |
|
"loss": 0.0849, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8290346352247605, |
|
"eval_loss": 0.48050031065940857, |
|
"eval_runtime": 75.8694, |
|
"eval_samples_per_second": 15.909, |
|
"eval_steps_per_second": 1.99, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9211495946941783, |
|
"grad_norm": 2.631653070449829, |
|
"learning_rate": 3.464750675509703e-07, |
|
"loss": 0.1026, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9211495946941783, |
|
"eval_loss": 0.4800306260585785, |
|
"eval_runtime": 75.365, |
|
"eval_samples_per_second": 16.015, |
|
"eval_steps_per_second": 2.004, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.013264554163596, |
|
"grad_norm": 1.8651682694326155e-05, |
|
"learning_rate": 3.311225743060673e-07, |
|
"loss": 0.1201, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.013264554163596, |
|
"eval_loss": 0.4810214042663574, |
|
"eval_runtime": 74.984, |
|
"eval_samples_per_second": 16.097, |
|
"eval_steps_per_second": 2.014, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.105379513633014, |
|
"grad_norm": 0.5627817511558533, |
|
"learning_rate": 3.157700810611643e-07, |
|
"loss": 0.0757, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.105379513633014, |
|
"eval_loss": 0.46917983889579773, |
|
"eval_runtime": 74.883, |
|
"eval_samples_per_second": 16.118, |
|
"eval_steps_per_second": 2.016, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1974944731024317, |
|
"grad_norm": 0.0001344973425148055, |
|
"learning_rate": 3.0041758781626137e-07, |
|
"loss": 0.0696, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.1974944731024317, |
|
"eval_loss": 0.47636929154396057, |
|
"eval_runtime": 76.4445, |
|
"eval_samples_per_second": 15.789, |
|
"eval_steps_per_second": 1.975, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2896094325718497, |
|
"grad_norm": 0.007305172737687826, |
|
"learning_rate": 2.8506509457135833e-07, |
|
"loss": 0.0911, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2896094325718497, |
|
"eval_loss": 0.4601423740386963, |
|
"eval_runtime": 76.451, |
|
"eval_samples_per_second": 15.788, |
|
"eval_steps_per_second": 1.975, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3817243920412676, |
|
"grad_norm": 9.710428905407298e-09, |
|
"learning_rate": 2.697126013264554e-07, |
|
"loss": 0.0806, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3817243920412676, |
|
"eval_loss": 0.45901384949684143, |
|
"eval_runtime": 74.7174, |
|
"eval_samples_per_second": 16.154, |
|
"eval_steps_per_second": 2.021, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4738393515106853, |
|
"grad_norm": 0.008983594365417957, |
|
"learning_rate": 2.5436010808155247e-07, |
|
"loss": 0.088, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4738393515106853, |
|
"eval_loss": 0.46537238359451294, |
|
"eval_runtime": 74.5146, |
|
"eval_samples_per_second": 16.198, |
|
"eval_steps_per_second": 2.026, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.565954310980103, |
|
"grad_norm": 1.3851042240276001e-05, |
|
"learning_rate": 2.390076148366495e-07, |
|
"loss": 0.0878, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.565954310980103, |
|
"eval_loss": 0.4768938720226288, |
|
"eval_runtime": 74.5757, |
|
"eval_samples_per_second": 16.185, |
|
"eval_steps_per_second": 2.025, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.658069270449521, |
|
"grad_norm": 4.7310441004810855e-06, |
|
"learning_rate": 2.236551215917465e-07, |
|
"loss": 0.0369, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.658069270449521, |
|
"eval_loss": 0.4683688282966614, |
|
"eval_runtime": 74.5774, |
|
"eval_samples_per_second": 16.185, |
|
"eval_steps_per_second": 2.025, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.750184229918939, |
|
"grad_norm": 0.005283420439809561, |
|
"learning_rate": 2.083026283468435e-07, |
|
"loss": 0.1034, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.750184229918939, |
|
"eval_loss": 0.4715610444545746, |
|
"eval_runtime": 74.6527, |
|
"eval_samples_per_second": 16.168, |
|
"eval_steps_per_second": 2.023, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8422991893883567, |
|
"grad_norm": 1.703993320465088, |
|
"learning_rate": 1.9295013510194055e-07, |
|
"loss": 0.0852, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.8422991893883567, |
|
"eval_loss": 0.47203630208969116, |
|
"eval_runtime": 74.6773, |
|
"eval_samples_per_second": 16.163, |
|
"eval_steps_per_second": 2.022, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9344141488577744, |
|
"grad_norm": 8.647094205116446e-07, |
|
"learning_rate": 1.7759764185703757e-07, |
|
"loss": 0.0493, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.9344141488577744, |
|
"eval_loss": 0.47136709094047546, |
|
"eval_runtime": 74.7402, |
|
"eval_samples_per_second": 16.149, |
|
"eval_steps_per_second": 2.02, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.026529108327192, |
|
"grad_norm": 0.0002839279477484524, |
|
"learning_rate": 1.6224514861213458e-07, |
|
"loss": 0.0603, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.026529108327192, |
|
"eval_loss": 0.4660906493663788, |
|
"eval_runtime": 74.8543, |
|
"eval_samples_per_second": 16.125, |
|
"eval_steps_per_second": 2.017, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"grad_norm": 0.17852088809013367, |
|
"learning_rate": 1.4689265536723165e-07, |
|
"loss": 0.0547, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"eval_loss": 0.4668720066547394, |
|
"eval_runtime": 74.8609, |
|
"eval_samples_per_second": 16.123, |
|
"eval_steps_per_second": 2.017, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.210759027266028, |
|
"grad_norm": 4.607869868777925e-06, |
|
"learning_rate": 1.3154016212232866e-07, |
|
"loss": 0.0793, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.210759027266028, |
|
"eval_loss": 0.4664075970649719, |
|
"eval_runtime": 74.7262, |
|
"eval_samples_per_second": 16.152, |
|
"eval_steps_per_second": 2.021, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.302449806594216, |
|
"grad_norm": 0.00044986297143623233, |
|
"learning_rate": 1.1625836556763061e-07, |
|
"loss": 0.0415, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.302449806594216, |
|
"eval_loss": 0.2888467311859131, |
|
"eval_runtime": 78.2614, |
|
"eval_samples_per_second": 15.41, |
|
"eval_steps_per_second": 1.929, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.394547798857985, |
|
"grad_norm": 4.1875861001017256e-08, |
|
"learning_rate": 1.0090870019033584e-07, |
|
"loss": 0.0565, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.394547798857985, |
|
"eval_loss": 0.29098838567733765, |
|
"eval_runtime": 78.3993, |
|
"eval_samples_per_second": 15.383, |
|
"eval_steps_per_second": 1.926, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.4866457911217537, |
|
"grad_norm": 1.556837378302589e-05, |
|
"learning_rate": 8.555903481304106e-08, |
|
"loss": 0.0629, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.4866457911217537, |
|
"eval_loss": 0.2888970673084259, |
|
"eval_runtime": 76.7751, |
|
"eval_samples_per_second": 15.708, |
|
"eval_steps_per_second": 1.967, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.578743783385522, |
|
"grad_norm": 0.12299621850252151, |
|
"learning_rate": 7.020936943574628e-08, |
|
"loss": 0.0584, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.578743783385522, |
|
"eval_loss": 0.28743651509284973, |
|
"eval_runtime": 76.8684, |
|
"eval_samples_per_second": 15.689, |
|
"eval_steps_per_second": 1.964, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.670841775649291, |
|
"grad_norm": 0.25907593965530396, |
|
"learning_rate": 5.485970405845152e-08, |
|
"loss": 0.0582, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.670841775649291, |
|
"eval_loss": 0.2863319218158722, |
|
"eval_runtime": 77.0162, |
|
"eval_samples_per_second": 15.659, |
|
"eval_steps_per_second": 1.961, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.7629397679130596, |
|
"grad_norm": 1.9590417650761083e-05, |
|
"learning_rate": 3.951003868115675e-08, |
|
"loss": 0.052, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.7629397679130596, |
|
"eval_loss": 0.2845611274242401, |
|
"eval_runtime": 76.9043, |
|
"eval_samples_per_second": 15.682, |
|
"eval_steps_per_second": 1.963, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.8550377601768284, |
|
"grad_norm": 0.2627514600753784, |
|
"learning_rate": 2.4160373303861975e-08, |
|
"loss": 0.0402, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.8550377601768284, |
|
"eval_loss": 0.28350818157196045, |
|
"eval_runtime": 76.8887, |
|
"eval_samples_per_second": 15.685, |
|
"eval_steps_per_second": 1.964, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.947135752440597, |
|
"grad_norm": 0.00015808168973308057, |
|
"learning_rate": 8.8107079265672e-09, |
|
"loss": 0.0518, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.947135752440597, |
|
"eval_loss": 0.28307044506073, |
|
"eval_runtime": 76.733, |
|
"eval_samples_per_second": 15.717, |
|
"eval_steps_per_second": 1.968, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 16287, |
|
"total_flos": 5859923904498510.0, |
|
"train_loss": 0.014361034947702911, |
|
"train_runtime": 3221.5046, |
|
"train_samples_per_second": 10.11, |
|
"train_steps_per_second": 5.056 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16287, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5859923904498510.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|