{ "best_metric": 1.388107180595398, "best_model_checkpoint": "./outputs/checkpoint-3500", "epoch": 2.550455373406193, "eval_steps": 100, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.3134, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.1594152450561523, "eval_runtime": 144.083, "eval_samples_per_second": 43.544, "eval_steps_per_second": 5.448, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.1084, "step": 200 }, { "epoch": 0.15, "eval_loss": 2.06351900100708, "eval_runtime": 143.9954, "eval_samples_per_second": 43.571, "eval_steps_per_second": 5.452, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.0343, "step": 300 }, { "epoch": 0.22, "eval_loss": 2.0027661323547363, "eval_runtime": 144.0229, "eval_samples_per_second": 43.563, "eval_steps_per_second": 5.451, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.9806, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.9533618688583374, "eval_runtime": 143.9496, "eval_samples_per_second": 43.585, "eval_steps_per_second": 5.453, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.926, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.9100102186203003, "eval_runtime": 144.0297, "eval_samples_per_second": 43.56, "eval_steps_per_second": 5.45, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.8969, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.8807153701782227, "eval_runtime": 144.1421, "eval_samples_per_second": 43.526, "eval_steps_per_second": 5.446, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.8568, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.8459755182266235, "eval_runtime": 144.0267, "eval_samples_per_second": 43.561, "eval_steps_per_second": 5.45, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.8329, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.8178023099899292, "eval_runtime": 143.9854, "eval_samples_per_second": 43.574, "eval_steps_per_second": 5.452, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.7961, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.7928833961486816, "eval_runtime": 144.015, "eval_samples_per_second": 43.565, "eval_steps_per_second": 5.451, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.7798, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.7671397924423218, "eval_runtime": 144.0009, "eval_samples_per_second": 43.569, "eval_steps_per_second": 5.451, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.7596, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.7436497211456299, "eval_runtime": 144.11, "eval_samples_per_second": 43.536, "eval_steps_per_second": 5.447, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.7215, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.7219586372375488, "eval_runtime": 144.0047, "eval_samples_per_second": 43.568, "eval_steps_per_second": 5.451, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.7295, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.700698733329773, "eval_runtime": 143.9971, "eval_samples_per_second": 43.57, "eval_steps_per_second": 5.451, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.6805, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.679167628288269, "eval_runtime": 144.0847, "eval_samples_per_second": 43.544, "eval_steps_per_second": 5.448, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.6505, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.6613633632659912, "eval_runtime": 144.175, "eval_samples_per_second": 43.517, "eval_steps_per_second": 5.445, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.6327, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.6426126956939697, "eval_runtime": 143.9637, "eval_samples_per_second": 43.58, "eval_steps_per_second": 5.453, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.629, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.6252305507659912, "eval_runtime": 144.0148, "eval_samples_per_second": 43.565, "eval_steps_per_second": 5.451, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.5993, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.6091866493225098, "eval_runtime": 144.0349, "eval_samples_per_second": 43.559, "eval_steps_per_second": 5.45, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.5907, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.5939242839813232, "eval_runtime": 151.8105, "eval_samples_per_second": 41.328, "eval_steps_per_second": 5.171, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.5726, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.578723669052124, "eval_runtime": 144.0665, "eval_samples_per_second": 43.549, "eval_steps_per_second": 5.449, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.5572, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.562243938446045, "eval_runtime": 144.0023, "eval_samples_per_second": 43.569, "eval_steps_per_second": 5.451, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.5788, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.5802664756774902, "eval_runtime": 133.2244, "eval_samples_per_second": 47.093, "eval_steps_per_second": 5.892, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.5406, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.5586791038513184, "eval_runtime": 133.3887, "eval_samples_per_second": 47.035, "eval_steps_per_second": 5.885, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.5397, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.5387272834777832, "eval_runtime": 133.2598, "eval_samples_per_second": 47.081, "eval_steps_per_second": 5.891, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.5072, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.5237348079681396, "eval_runtime": 133.2857, "eval_samples_per_second": 47.072, "eval_steps_per_second": 5.89, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.4925, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.5068706274032593, "eval_runtime": 133.2428, "eval_samples_per_second": 47.087, "eval_steps_per_second": 5.891, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.4832, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.492112636566162, "eval_runtime": 133.2152, "eval_samples_per_second": 47.097, "eval_steps_per_second": 5.893, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.4506, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.4801561832427979, "eval_runtime": 133.2908, "eval_samples_per_second": 47.07, "eval_steps_per_second": 5.889, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.4311, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.4649614095687866, "eval_runtime": 133.232, "eval_samples_per_second": 47.091, "eval_steps_per_second": 5.892, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.4268, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.4508098363876343, "eval_runtime": 133.3471, "eval_samples_per_second": 47.05, "eval_steps_per_second": 5.887, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.4142, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.4384832382202148, "eval_runtime": 133.1612, "eval_samples_per_second": 47.116, "eval_steps_per_second": 5.895, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.4131, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.4258862733840942, "eval_runtime": 133.2271, "eval_samples_per_second": 47.093, "eval_steps_per_second": 5.892, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.3948, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.4105219841003418, "eval_runtime": 133.2146, "eval_samples_per_second": 47.097, "eval_steps_per_second": 5.893, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.3738, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.3999289274215698, "eval_runtime": 133.2454, "eval_samples_per_second": 47.086, "eval_steps_per_second": 5.891, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.373, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.388107180595398, "eval_runtime": 133.2079, "eval_samples_per_second": 47.099, "eval_steps_per_second": 5.893, "step": 3500 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.0839850145502003e+17, "trial_name": null, "trial_params": null }