|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.3929971796106486, |
|
"global_step": 40500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0019942675013184747, |
|
"loss": 0.2155, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0019885350026369497, |
|
"loss": 0.2135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.001982802503955424, |
|
"loss": 0.2123, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.001977070005273899, |
|
"loss": 0.2196, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0019713375065923736, |
|
"loss": 0.2223, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0019656050079108482, |
|
"loss": 0.2209, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.001959872509229323, |
|
"loss": 0.2181, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0019541400105477975, |
|
"loss": 0.2142, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0019484075118662723, |
|
"loss": 0.2145, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.001942675013184747, |
|
"loss": 0.2261, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0019369425145032218, |
|
"loss": 0.2171, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0019312100158216962, |
|
"loss": 0.2158, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.001925477517140171, |
|
"loss": 0.2179, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.001919745018458646, |
|
"loss": 0.2137, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0019140125197771205, |
|
"loss": 0.2148, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0019082800210955954, |
|
"loss": 0.2114, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0019025475224140698, |
|
"loss": 0.2138, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0018968150237325446, |
|
"loss": 0.2159, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018910825250510192, |
|
"loss": 0.2185, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.001885350026369494, |
|
"loss": 0.2123, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0018796175276879685, |
|
"loss": 0.2172, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0018738850290064433, |
|
"loss": 0.2162, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.001868152530324918, |
|
"loss": 0.2103, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0018624200316433928, |
|
"loss": 0.221, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0018566875329618674, |
|
"loss": 0.2108, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.001850955034280342, |
|
"loss": 0.2217, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.001845222535598817, |
|
"loss": 0.2157, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0018394900369172915, |
|
"loss": 0.2076, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0018337575382357664, |
|
"loss": 0.2162, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.001828025039554241, |
|
"loss": 0.207, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0018222925408727156, |
|
"loss": 0.2131, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0018165600421911903, |
|
"loss": 0.2119, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.001810827543509665, |
|
"loss": 0.2205, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0018050950448281397, |
|
"loss": 0.2134, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0017993625461466144, |
|
"loss": 0.216, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.001793630047465089, |
|
"loss": 0.2132, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0017878975487835638, |
|
"loss": 0.2125, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0017821650501020385, |
|
"loss": 0.2138, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0017764325514205133, |
|
"loss": 0.2111, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.001770700052738988, |
|
"loss": 0.209, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0017649675540574626, |
|
"loss": 0.2086, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0017592350553759374, |
|
"loss": 0.211, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.001753502556694412, |
|
"loss": 0.206, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0017477700580128869, |
|
"loss": 0.2149, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0017420375593313613, |
|
"loss": 0.2093, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0017363050606498361, |
|
"loss": 0.2089, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0017305725619683107, |
|
"loss": 0.2093, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0017248400632867856, |
|
"loss": 0.2009, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00171910756460526, |
|
"loss": 0.2058, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0017133750659237348, |
|
"loss": 0.2143, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0017076425672422095, |
|
"loss": 0.2071, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0017019100685606843, |
|
"loss": 0.2098, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0016961775698791592, |
|
"loss": 0.2064, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0016904450711976336, |
|
"loss": 0.207, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0016847125725161084, |
|
"loss": 0.2063, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.001678980073834583, |
|
"loss": 0.2109, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0016732475751530579, |
|
"loss": 0.2038, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0016675150764715325, |
|
"loss": 0.2082, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.1924375593662262, |
|
"eval_runtime": 1182.2501, |
|
"eval_samples_per_second": 147.549, |
|
"eval_steps_per_second": 6.148, |
|
"step": 29074 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0016617825777900071, |
|
"loss": 0.2098, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0016560500791084818, |
|
"loss": 0.2011, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0016503175804269566, |
|
"loss": 0.2008, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0016445850817454312, |
|
"loss": 0.1995, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0016388525830639059, |
|
"loss": 0.2022, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0016331200843823805, |
|
"loss": 0.2006, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0016273875857008553, |
|
"loss": 0.2007, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0016216550870193302, |
|
"loss": 0.2041, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0016159225883378048, |
|
"loss": 0.2069, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0016101900896562794, |
|
"loss": 0.2029, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.001604457590974754, |
|
"loss": 0.2002, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0015987250922932289, |
|
"loss": 0.2026, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0015929925936117035, |
|
"loss": 0.1978, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0015872600949301781, |
|
"loss": 0.2063, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0015815275962486528, |
|
"loss": 0.1958, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0015757950975671276, |
|
"loss": 0.2023, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0015700625988856022, |
|
"loss": 0.2014, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.001564330100204077, |
|
"loss": 0.2116, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0015585976015225515, |
|
"loss": 0.2047, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0015528651028410263, |
|
"loss": 0.2048, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0015471326041595012, |
|
"loss": 0.2003, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0015414001054779758, |
|
"loss": 0.2046, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0015356676067964507, |
|
"loss": 0.2001, |
|
"step": 40500 |
|
} |
|
], |
|
"max_steps": 174444, |
|
"num_train_epochs": 6, |
|
"total_flos": 1.3154952419475456e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|