{ "best_metric": 0.4678440617843855, "best_model_checkpoint": "/fsx/anton/cosmopedia/edu_score/bert_snowflake_regression_4/checkpoint-7000", "epoch": 4.247572815533981, "eval_steps": 1000, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06067961165048544, "grad_norm": 0.5638211965560913, "learning_rate": 0.0002990898058252427, "loss": 0.4753, "step": 100 }, { "epoch": 0.12135922330097088, "grad_norm": 0.47830212116241455, "learning_rate": 0.0002981796116504854, "loss": 0.357, "step": 200 }, { "epoch": 0.1820388349514563, "grad_norm": 0.6941384077072144, "learning_rate": 0.0002972694174757281, "loss": 0.3542, "step": 300 }, { "epoch": 0.24271844660194175, "grad_norm": 0.459163635969162, "learning_rate": 0.00029635922330097087, "loss": 0.3508, "step": 400 }, { "epoch": 0.30339805825242716, "grad_norm": 1.1585971117019653, "learning_rate": 0.0002954490291262136, "loss": 0.3407, "step": 500 }, { "epoch": 0.3640776699029126, "grad_norm": 0.6505594849586487, "learning_rate": 0.0002945388349514563, "loss": 0.3394, "step": 600 }, { "epoch": 0.42475728155339804, "grad_norm": 0.9804072976112366, "learning_rate": 0.000293628640776699, "loss": 0.3435, "step": 700 }, { "epoch": 0.4854368932038835, "grad_norm": 0.5816351175308228, "learning_rate": 0.0002927184466019417, "loss": 0.3323, "step": 800 }, { "epoch": 0.5461165048543689, "grad_norm": 0.6582027673721313, "learning_rate": 0.00029180825242718447, "loss": 0.3293, "step": 900 }, { "epoch": 0.6067961165048543, "grad_norm": 0.8432559370994568, "learning_rate": 0.0002908980582524271, "loss": 0.3337, "step": 1000 }, { "epoch": 0.6067961165048543, "eval_accuracy": 0.6516312117268014, "eval_f1_macro": 0.388617225018415, "eval_loss": 0.32428401708602905, "eval_precision": 0.5273989146868351, "eval_recall": 0.3731209274235363, "eval_runtime": 63.5791, "eval_samples_per_second": 737.144, "eval_steps_per_second": 5.772, "step": 1000 }, { "epoch": 0.6674757281553398, "grad_norm": 0.6387248039245605, "learning_rate": 0.0002899878640776699, "loss": 0.3292, "step": 1100 }, { "epoch": 0.7281553398058253, "grad_norm": 2.0111730098724365, "learning_rate": 0.0002890776699029126, "loss": 0.3283, "step": 1200 }, { "epoch": 0.7888349514563107, "grad_norm": 1.0319699048995972, "learning_rate": 0.0002881674757281553, "loss": 0.3236, "step": 1300 }, { "epoch": 0.8495145631067961, "grad_norm": 1.194286584854126, "learning_rate": 0.000287257281553398, "loss": 0.3178, "step": 1400 }, { "epoch": 0.9101941747572816, "grad_norm": 0.4329046308994293, "learning_rate": 0.00028634708737864073, "loss": 0.3234, "step": 1500 }, { "epoch": 0.970873786407767, "grad_norm": 0.4490291476249695, "learning_rate": 0.0002854368932038835, "loss": 0.3148, "step": 1600 }, { "epoch": 1.0315533980582525, "grad_norm": 1.9341398477554321, "learning_rate": 0.0002845266990291262, "loss": 0.3157, "step": 1700 }, { "epoch": 1.0922330097087378, "grad_norm": 0.6705629825592041, "learning_rate": 0.0002836165048543689, "loss": 0.3144, "step": 1800 }, { "epoch": 1.1529126213592233, "grad_norm": 0.3708420395851135, "learning_rate": 0.0002827063106796116, "loss": 0.3106, "step": 1900 }, { "epoch": 1.2135922330097086, "grad_norm": 0.4166070222854614, "learning_rate": 0.00028179611650485433, "loss": 0.3065, "step": 2000 }, { "epoch": 1.2135922330097086, "eval_accuracy": 0.6756993193505025, "eval_f1_macro": 0.4177625223934091, "eval_loss": 0.3020932972431183, "eval_precision": 0.5208365532058327, "eval_recall": 0.3908095186846785, "eval_runtime": 63.8686, "eval_samples_per_second": 733.803, "eval_steps_per_second": 5.746, "step": 2000 }, { "epoch": 1.2742718446601942, "grad_norm": 0.949810266494751, "learning_rate": 0.00028088592233009704, "loss": 0.309, "step": 2100 }, { "epoch": 1.3349514563106797, "grad_norm": 0.6933236718177795, "learning_rate": 0.00027997572815533975, "loss": 0.3039, "step": 2200 }, { "epoch": 1.395631067961165, "grad_norm": 0.4874693751335144, "learning_rate": 0.0002790655339805825, "loss": 0.3016, "step": 2300 }, { "epoch": 1.4563106796116505, "grad_norm": 0.5307803750038147, "learning_rate": 0.0002781553398058252, "loss": 0.295, "step": 2400 }, { "epoch": 1.516990291262136, "grad_norm": 0.7260825634002686, "learning_rate": 0.00027724514563106793, "loss": 0.298, "step": 2500 }, { "epoch": 1.5776699029126213, "grad_norm": 1.3546072244644165, "learning_rate": 0.00027633495145631064, "loss": 0.2937, "step": 2600 }, { "epoch": 1.6383495145631068, "grad_norm": 0.7695233821868896, "learning_rate": 0.00027542475728155335, "loss": 0.2939, "step": 2700 }, { "epoch": 1.6990291262135924, "grad_norm": 0.46857160329818726, "learning_rate": 0.0002745145631067961, "loss": 0.2911, "step": 2800 }, { "epoch": 1.7597087378640777, "grad_norm": 0.521542489528656, "learning_rate": 0.0002736043689320388, "loss": 0.2936, "step": 2900 }, { "epoch": 1.820388349514563, "grad_norm": 1.1797749996185303, "learning_rate": 0.00027269417475728154, "loss": 0.291, "step": 3000 }, { "epoch": 1.820388349514563, "eval_accuracy": 0.6800947361683061, "eval_f1_macro": 0.43005056999542096, "eval_loss": 0.2904761731624603, "eval_precision": 0.5283372179121356, "eval_recall": 0.40012210038254903, "eval_runtime": 63.5697, "eval_samples_per_second": 737.253, "eval_steps_per_second": 5.773, "step": 3000 }, { "epoch": 1.8810679611650487, "grad_norm": 0.9966709613800049, "learning_rate": 0.00027178398058252425, "loss": 0.2965, "step": 3100 }, { "epoch": 1.941747572815534, "grad_norm": 0.40996024012565613, "learning_rate": 0.00027087378640776696, "loss": 0.2941, "step": 3200 }, { "epoch": 2.0024271844660193, "grad_norm": 0.5450060367584229, "learning_rate": 0.00026996359223300967, "loss": 0.2912, "step": 3300 }, { "epoch": 2.063106796116505, "grad_norm": 0.5307539701461792, "learning_rate": 0.0002690533980582524, "loss": 0.2872, "step": 3400 }, { "epoch": 2.1237864077669903, "grad_norm": 0.5863193273544312, "learning_rate": 0.00026814320388349514, "loss": 0.2929, "step": 3500 }, { "epoch": 2.1844660194174756, "grad_norm": 0.584078311920166, "learning_rate": 0.00026723300970873785, "loss": 0.2879, "step": 3600 }, { "epoch": 2.2451456310679614, "grad_norm": 0.6381602883338928, "learning_rate": 0.00026632281553398056, "loss": 0.2892, "step": 3700 }, { "epoch": 2.3058252427184467, "grad_norm": 0.4760149121284485, "learning_rate": 0.00026541262135922327, "loss": 0.2863, "step": 3800 }, { "epoch": 2.366504854368932, "grad_norm": 0.4088296890258789, "learning_rate": 0.000264502427184466, "loss": 0.2913, "step": 3900 }, { "epoch": 2.4271844660194173, "grad_norm": 1.3476176261901855, "learning_rate": 0.00026359223300970874, "loss": 0.2845, "step": 4000 }, { "epoch": 2.4271844660194173, "eval_accuracy": 0.6929182580493738, "eval_f1_macro": 0.4442664055274559, "eval_loss": 0.280377060174942, "eval_precision": 0.5328225535144124, "eval_recall": 0.41160029395774395, "eval_runtime": 64.199, "eval_samples_per_second": 730.027, "eval_steps_per_second": 5.717, "step": 4000 }, { "epoch": 2.487864077669903, "grad_norm": 0.6318752765655518, "learning_rate": 0.0002626820388349514, "loss": 0.2812, "step": 4100 }, { "epoch": 2.5485436893203883, "grad_norm": 0.49435973167419434, "learning_rate": 0.00026177184466019416, "loss": 0.2803, "step": 4200 }, { "epoch": 2.6092233009708736, "grad_norm": 0.4300900101661682, "learning_rate": 0.00026086165048543687, "loss": 0.2853, "step": 4300 }, { "epoch": 2.6699029126213594, "grad_norm": 0.9545436501502991, "learning_rate": 0.0002599514563106796, "loss": 0.2813, "step": 4400 }, { "epoch": 2.7305825242718447, "grad_norm": 0.5803716778755188, "learning_rate": 0.0002590412621359223, "loss": 0.2838, "step": 4500 }, { "epoch": 2.79126213592233, "grad_norm": 1.4714713096618652, "learning_rate": 0.000258131067961165, "loss": 0.2814, "step": 4600 }, { "epoch": 2.8519417475728153, "grad_norm": 0.6767821311950684, "learning_rate": 0.00025722087378640777, "loss": 0.2741, "step": 4700 }, { "epoch": 2.912621359223301, "grad_norm": 0.4653462767601013, "learning_rate": 0.0002563106796116505, "loss": 0.2783, "step": 4800 }, { "epoch": 2.9733009708737863, "grad_norm": 1.3012775182724, "learning_rate": 0.0002554004854368932, "loss": 0.283, "step": 4900 }, { "epoch": 3.033980582524272, "grad_norm": 0.4733451306819916, "learning_rate": 0.0002544902912621359, "loss": 0.2767, "step": 5000 }, { "epoch": 3.033980582524272, "eval_accuracy": 0.6949666076343696, "eval_f1_macro": 0.4524166681181929, "eval_loss": 0.2772601842880249, "eval_precision": 0.52914110464024, "eval_recall": 0.42261704559523156, "eval_runtime": 63.4257, "eval_samples_per_second": 738.927, "eval_steps_per_second": 5.786, "step": 5000 }, { "epoch": 3.0946601941747574, "grad_norm": 0.4103662371635437, "learning_rate": 0.0002535800970873786, "loss": 0.2796, "step": 5100 }, { "epoch": 3.1553398058252426, "grad_norm": 0.4195462763309479, "learning_rate": 0.0002526699029126213, "loss": 0.2757, "step": 5200 }, { "epoch": 3.216019417475728, "grad_norm": 1.2391552925109863, "learning_rate": 0.0002517597087378641, "loss": 0.2783, "step": 5300 }, { "epoch": 3.2766990291262137, "grad_norm": 1.2029412984848022, "learning_rate": 0.0002508495145631068, "loss": 0.2772, "step": 5400 }, { "epoch": 3.337378640776699, "grad_norm": 0.5050978660583496, "learning_rate": 0.0002499393203883495, "loss": 0.2776, "step": 5500 }, { "epoch": 3.3980582524271843, "grad_norm": 1.0107412338256836, "learning_rate": 0.0002490291262135922, "loss": 0.2766, "step": 5600 }, { "epoch": 3.45873786407767, "grad_norm": 0.4374917149543762, "learning_rate": 0.0002481189320388349, "loss": 0.2719, "step": 5700 }, { "epoch": 3.5194174757281553, "grad_norm": 1.6768765449523926, "learning_rate": 0.0002472087378640777, "loss": 0.2803, "step": 5800 }, { "epoch": 3.5800970873786406, "grad_norm": 0.8120823502540588, "learning_rate": 0.0002462985436893204, "loss": 0.2723, "step": 5900 }, { "epoch": 3.6407766990291264, "grad_norm": 1.3967177867889404, "learning_rate": 0.0002453883495145631, "loss": 0.2796, "step": 6000 }, { "epoch": 3.6407766990291264, "eval_accuracy": 0.7001301555465466, "eval_f1_macro": 0.46180498265852127, "eval_loss": 0.272257536649704, "eval_precision": 0.5281578618931982, "eval_recall": 0.4315295129889904, "eval_runtime": 63.8213, "eval_samples_per_second": 734.347, "eval_steps_per_second": 5.75, "step": 6000 }, { "epoch": 3.7014563106796117, "grad_norm": 0.6093985438346863, "learning_rate": 0.0002444781553398058, "loss": 0.2744, "step": 6100 }, { "epoch": 3.762135922330097, "grad_norm": 0.7282202243804932, "learning_rate": 0.00024356796116504852, "loss": 0.2715, "step": 6200 }, { "epoch": 3.8228155339805827, "grad_norm": 1.1341967582702637, "learning_rate": 0.00024265776699029123, "loss": 0.2709, "step": 6300 }, { "epoch": 3.883495145631068, "grad_norm": 0.8576841354370117, "learning_rate": 0.00024174757281553394, "loss": 0.275, "step": 6400 }, { "epoch": 3.9441747572815533, "grad_norm": 0.5656840205192566, "learning_rate": 0.00024083737864077668, "loss": 0.2676, "step": 6500 }, { "epoch": 4.004854368932039, "grad_norm": 0.6544743180274963, "learning_rate": 0.0002399271844660194, "loss": 0.2734, "step": 6600 }, { "epoch": 4.065533980582524, "grad_norm": 1.5159205198287964, "learning_rate": 0.0002390169902912621, "loss": 0.2666, "step": 6700 }, { "epoch": 4.12621359223301, "grad_norm": 0.9112799763679504, "learning_rate": 0.00023810679611650483, "loss": 0.2646, "step": 6800 }, { "epoch": 4.186893203883495, "grad_norm": 0.6971092224121094, "learning_rate": 0.00023719660194174754, "loss": 0.2681, "step": 6900 }, { "epoch": 4.247572815533981, "grad_norm": 0.5126680731773376, "learning_rate": 0.00023628640776699028, "loss": 0.2669, "step": 7000 }, { "epoch": 4.247572815533981, "eval_accuracy": 0.7024985597541981, "eval_f1_macro": 0.4678440617843855, "eval_loss": 0.26851192116737366, "eval_precision": 0.5330566032345198, "eval_recall": 0.43620949446199525, "eval_runtime": 63.9009, "eval_samples_per_second": 733.432, "eval_steps_per_second": 5.743, "step": 7000 } ], "logging_steps": 100, "max_steps": 32960, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 1000, "total_flos": 4.713992160165028e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }