{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3929971796106486, "global_step": 40500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0019942675013184747, "loss": 0.2155, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.0019885350026369497, "loss": 0.2135, "step": 1000 }, { "epoch": 0.05, "learning_rate": 0.001982802503955424, "loss": 0.2123, "step": 1500 }, { "epoch": 0.07, "learning_rate": 0.001977070005273899, "loss": 0.2196, "step": 2000 }, { "epoch": 0.09, "learning_rate": 0.0019713375065923736, "loss": 0.2223, "step": 2500 }, { "epoch": 0.1, "learning_rate": 0.0019656050079108482, "loss": 0.2209, "step": 3000 }, { "epoch": 0.12, "learning_rate": 0.001959872509229323, "loss": 0.2181, "step": 3500 }, { "epoch": 0.14, "learning_rate": 0.0019541400105477975, "loss": 0.2142, "step": 4000 }, { "epoch": 0.15, "learning_rate": 0.0019484075118662723, "loss": 0.2145, "step": 4500 }, { "epoch": 0.17, "learning_rate": 0.001942675013184747, "loss": 0.2261, "step": 5000 }, { "epoch": 0.19, "learning_rate": 0.0019369425145032218, "loss": 0.2171, "step": 5500 }, { "epoch": 0.21, "learning_rate": 0.0019312100158216962, "loss": 0.2158, "step": 6000 }, { "epoch": 0.22, "learning_rate": 0.001925477517140171, "loss": 0.2179, "step": 6500 }, { "epoch": 0.24, "learning_rate": 0.001919745018458646, "loss": 0.2137, "step": 7000 }, { "epoch": 0.26, "learning_rate": 0.0019140125197771205, "loss": 0.2148, "step": 7500 }, { "epoch": 0.28, "learning_rate": 0.0019082800210955954, "loss": 0.2114, "step": 8000 }, { "epoch": 0.29, "learning_rate": 0.0019025475224140698, "loss": 0.2138, "step": 8500 }, { "epoch": 0.31, "learning_rate": 0.0018968150237325446, "loss": 0.2159, "step": 9000 }, { "epoch": 0.33, "learning_rate": 0.0018910825250510192, "loss": 0.2185, "step": 9500 }, { "epoch": 0.34, "learning_rate": 0.001885350026369494, "loss": 0.2123, "step": 10000 }, { "epoch": 0.36, "learning_rate": 0.0018796175276879685, "loss": 0.2172, "step": 10500 }, { "epoch": 0.38, "learning_rate": 0.0018738850290064433, "loss": 0.2162, "step": 11000 }, { "epoch": 0.4, "learning_rate": 0.001868152530324918, "loss": 0.2103, "step": 11500 }, { "epoch": 0.41, "learning_rate": 0.0018624200316433928, "loss": 0.221, "step": 12000 }, { "epoch": 0.43, "learning_rate": 0.0018566875329618674, "loss": 0.2108, "step": 12500 }, { "epoch": 0.45, "learning_rate": 0.001850955034280342, "loss": 0.2217, "step": 13000 }, { "epoch": 0.46, "learning_rate": 0.001845222535598817, "loss": 0.2157, "step": 13500 }, { "epoch": 0.48, "learning_rate": 0.0018394900369172915, "loss": 0.2076, "step": 14000 }, { "epoch": 0.5, "learning_rate": 0.0018337575382357664, "loss": 0.2162, "step": 14500 }, { "epoch": 0.52, "learning_rate": 0.001828025039554241, "loss": 0.207, "step": 15000 }, { "epoch": 0.53, "learning_rate": 0.0018222925408727156, "loss": 0.2131, "step": 15500 }, { "epoch": 0.55, "learning_rate": 0.0018165600421911903, "loss": 0.2119, "step": 16000 }, { "epoch": 0.57, "learning_rate": 0.001810827543509665, "loss": 0.2205, "step": 16500 }, { "epoch": 0.58, "learning_rate": 0.0018050950448281397, "loss": 0.2134, "step": 17000 }, { "epoch": 0.6, "learning_rate": 0.0017993625461466144, "loss": 0.216, "step": 17500 }, { "epoch": 0.62, "learning_rate": 0.001793630047465089, "loss": 0.2132, "step": 18000 }, { "epoch": 0.64, "learning_rate": 0.0017878975487835638, "loss": 0.2125, "step": 18500 }, { "epoch": 0.65, "learning_rate": 0.0017821650501020385, "loss": 0.2138, "step": 19000 }, { "epoch": 0.67, "learning_rate": 0.0017764325514205133, "loss": 0.2111, "step": 19500 }, { "epoch": 0.69, "learning_rate": 0.001770700052738988, "loss": 0.209, "step": 20000 }, { "epoch": 0.71, "learning_rate": 0.0017649675540574626, "loss": 0.2086, "step": 20500 }, { "epoch": 0.72, "learning_rate": 0.0017592350553759374, "loss": 0.211, "step": 21000 }, { "epoch": 0.74, "learning_rate": 0.001753502556694412, "loss": 0.206, "step": 21500 }, { "epoch": 0.76, "learning_rate": 0.0017477700580128869, "loss": 0.2149, "step": 22000 }, { "epoch": 0.77, "learning_rate": 0.0017420375593313613, "loss": 0.2093, "step": 22500 }, { "epoch": 0.79, "learning_rate": 0.0017363050606498361, "loss": 0.2089, "step": 23000 }, { "epoch": 0.81, "learning_rate": 0.0017305725619683107, "loss": 0.2093, "step": 23500 }, { "epoch": 0.83, "learning_rate": 0.0017248400632867856, "loss": 0.2009, "step": 24000 }, { "epoch": 0.84, "learning_rate": 0.00171910756460526, "loss": 0.2058, "step": 24500 }, { "epoch": 0.86, "learning_rate": 0.0017133750659237348, "loss": 0.2143, "step": 25000 }, { "epoch": 0.88, "learning_rate": 0.0017076425672422095, "loss": 0.2071, "step": 25500 }, { "epoch": 0.89, "learning_rate": 0.0017019100685606843, "loss": 0.2098, "step": 26000 }, { "epoch": 0.91, "learning_rate": 0.0016961775698791592, "loss": 0.2064, "step": 26500 }, { "epoch": 0.93, "learning_rate": 0.0016904450711976336, "loss": 0.207, "step": 27000 }, { "epoch": 0.95, "learning_rate": 0.0016847125725161084, "loss": 0.2063, "step": 27500 }, { "epoch": 0.96, "learning_rate": 0.001678980073834583, "loss": 0.2109, "step": 28000 }, { "epoch": 0.98, "learning_rate": 0.0016732475751530579, "loss": 0.2038, "step": 28500 }, { "epoch": 1.0, "learning_rate": 0.0016675150764715325, "loss": 0.2082, "step": 29000 }, { "epoch": 1.0, "eval_loss": 0.1924375593662262, "eval_runtime": 1182.2501, "eval_samples_per_second": 147.549, "eval_steps_per_second": 6.148, "step": 29074 }, { "epoch": 1.01, "learning_rate": 0.0016617825777900071, "loss": 0.2098, "step": 29500 }, { "epoch": 1.03, "learning_rate": 0.0016560500791084818, "loss": 0.2011, "step": 30000 }, { "epoch": 1.05, "learning_rate": 0.0016503175804269566, "loss": 0.2008, "step": 30500 }, { "epoch": 1.07, "learning_rate": 0.0016445850817454312, "loss": 0.1995, "step": 31000 }, { "epoch": 1.08, "learning_rate": 0.0016388525830639059, "loss": 0.2022, "step": 31500 }, { "epoch": 1.1, "learning_rate": 0.0016331200843823805, "loss": 0.2006, "step": 32000 }, { "epoch": 1.12, "learning_rate": 0.0016273875857008553, "loss": 0.2007, "step": 32500 }, { "epoch": 1.14, "learning_rate": 0.0016216550870193302, "loss": 0.2041, "step": 33000 }, { "epoch": 1.15, "learning_rate": 0.0016159225883378048, "loss": 0.2069, "step": 33500 }, { "epoch": 1.17, "learning_rate": 0.0016101900896562794, "loss": 0.2029, "step": 34000 }, { "epoch": 1.19, "learning_rate": 0.001604457590974754, "loss": 0.2002, "step": 34500 }, { "epoch": 1.2, "learning_rate": 0.0015987250922932289, "loss": 0.2026, "step": 35000 }, { "epoch": 1.22, "learning_rate": 0.0015929925936117035, "loss": 0.1978, "step": 35500 }, { "epoch": 1.24, "learning_rate": 0.0015872600949301781, "loss": 0.2063, "step": 36000 }, { "epoch": 1.26, "learning_rate": 0.0015815275962486528, "loss": 0.1958, "step": 36500 }, { "epoch": 1.27, "learning_rate": 0.0015757950975671276, "loss": 0.2023, "step": 37000 }, { "epoch": 1.29, "learning_rate": 0.0015700625988856022, "loss": 0.2014, "step": 37500 }, { "epoch": 1.31, "learning_rate": 0.001564330100204077, "loss": 0.2116, "step": 38000 }, { "epoch": 1.32, "learning_rate": 0.0015585976015225515, "loss": 0.2047, "step": 38500 }, { "epoch": 1.34, "learning_rate": 0.0015528651028410263, "loss": 0.2048, "step": 39000 }, { "epoch": 1.36, "learning_rate": 0.0015471326041595012, "loss": 0.2003, "step": 39500 }, { "epoch": 1.38, "learning_rate": 0.0015414001054779758, "loss": 0.2046, "step": 40000 }, { "epoch": 1.39, "learning_rate": 0.0015356676067964507, "loss": 0.2001, "step": 40500 } ], "max_steps": 174444, "num_train_epochs": 6, "total_flos": 1.3154952419475456e+17, "trial_name": null, "trial_params": null }