{"train/loss": 0.0337, "train/grad_norm": 0.019543960690498352, "train/learning_rate": 8.061420345489445e-07, "train/epoch": 5.0, "train/global_step": 7815, "_timestamp": 1727123397.4907782, "_runtime": 60485.37278318405, "_step": 20, "eval/loss": 0.35836175084114075, "eval/accuracy": 0.9328, "eval/runtime": 868.737, "eval/samples_per_second": 28.777, "eval/steps_per_second": 1.799, "train_runtime": 60453.6889, "train_samples_per_second": 2.068, "train_steps_per_second": 0.129, "total_flos": 1.6382133492223008e+16, "train_loss": 0.11774859144683077} |