diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": 0.2962397336959839, "best_model_checkpoint": "chickens-composite-201616161616-150-epochs-wo-transform/checkpoint-45000", - "epoch": 112.0, + "epoch": 150.0, "eval_steps": 500, - "global_step": 56000, + "global_step": 75000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -15981,6 +15981,5432 @@ "eval_samples_per_second": 8.843, "eval_steps_per_second": 1.15, "step": 56000 + }, + { + "epoch": 112.02, + "grad_norm": 31.33954620361328, + "learning_rate": 1.5001872163025954e-06, + "loss": 0.199, + "step": 56010 + }, + { + "epoch": 112.08, + "grad_norm": 77.1370620727539, + "learning_rate": 1.4957026584154926e-06, + "loss": 0.2169, + "step": 56040 + }, + { + "epoch": 112.14, + "grad_norm": 57.998966217041016, + "learning_rate": 1.4912236342922143e-06, + "loss": 0.2132, + "step": 56070 + }, + { + "epoch": 112.2, + "grad_norm": 82.21308898925781, + "learning_rate": 1.4867501510057548e-06, + "loss": 0.2074, + "step": 56100 + }, + { + "epoch": 112.26, + "grad_norm": 52.17494201660156, + "learning_rate": 1.482282215620352e-06, + "loss": 0.201, + "step": 56130 + }, + { + "epoch": 112.32, + "grad_norm": 65.00828552246094, + "learning_rate": 1.4778198351914853e-06, + "loss": 0.2029, + "step": 56160 + }, + { + "epoch": 112.38, + "grad_norm": 208.12747192382812, + "learning_rate": 1.4733630167658652e-06, + "loss": 0.2259, + "step": 56190 + }, + { + "epoch": 112.44, + "grad_norm": 85.68213653564453, + "learning_rate": 1.4689117673814135e-06, + "loss": 0.2179, + "step": 56220 + }, + { + "epoch": 112.5, + "grad_norm": 45.39154052734375, + "learning_rate": 1.4644660940672628e-06, + "loss": 0.2035, + "step": 56250 + }, + { + "epoch": 112.56, + "grad_norm": 85.99728393554688, + "learning_rate": 1.4600260038437376e-06, + "loss": 0.2346, + "step": 56280 + }, + { + "epoch": 112.62, + "grad_norm": 56.224971771240234, + "learning_rate": 1.4555915037223438e-06, + "loss": 0.253, + "step": 56310 + }, + { + "epoch": 112.68, + "grad_norm": 40.9771728515625, + "learning_rate": 1.4511626007057667e-06, + "loss": 0.2058, + "step": 56340 + }, + { + "epoch": 112.74, + "grad_norm": 50.493865966796875, + "learning_rate": 1.4467393017878444e-06, + "loss": 0.2431, + "step": 56370 + }, + { + "epoch": 112.8, + "grad_norm": 63.593685150146484, + "learning_rate": 1.4423216139535735e-06, + "loss": 0.2349, + "step": 56400 + }, + { + "epoch": 112.86, + "grad_norm": 57.44195556640625, + "learning_rate": 1.4379095441790847e-06, + "loss": 0.2371, + "step": 56430 + }, + { + "epoch": 112.92, + "grad_norm": 40.976165771484375, + "learning_rate": 1.4335030994316357e-06, + "loss": 0.2195, + "step": 56460 + }, + { + "epoch": 112.98, + "grad_norm": 40.92074966430664, + "learning_rate": 1.4291022866696086e-06, + "loss": 0.2108, + "step": 56490 + }, + { + "epoch": 113.0, + "eval_loss": 0.31088173389434814, + "eval_map": 0.8054, + "eval_map_50": 0.9499, + "eval_map_75": 0.9015, + "eval_map_chicken": 0.8076, + "eval_map_duck": 0.7398, + "eval_map_large": 0.8011, + "eval_map_medium": 0.8084, + "eval_map_plant": 0.8688, + "eval_map_small": 0.3634, + "eval_mar_1": 0.3177, + "eval_mar_10": 0.8407, + "eval_mar_100": 0.8441, + "eval_mar_100_chicken": 0.8468, + "eval_mar_100_duck": 0.7887, + "eval_mar_100_plant": 0.8967, + "eval_mar_large": 0.8296, + "eval_mar_medium": 0.8485, + "eval_mar_small": 0.4129, + "eval_runtime": 12.34, + "eval_samples_per_second": 8.104, + "eval_steps_per_second": 1.053, + "step": 56500 + }, + { + "epoch": 113.04, + "grad_norm": 55.92573165893555, + "learning_rate": 1.4247071128424838e-06, + "loss": 0.2265, + "step": 56520 + }, + { + "epoch": 113.1, + "grad_norm": 60.161190032958984, + "learning_rate": 1.420317584890844e-06, + "loss": 0.2015, + "step": 56550 + }, + { + "epoch": 113.16, + "grad_norm": 49.9560661315918, + "learning_rate": 1.4159337097463515e-06, + "loss": 0.2438, + "step": 56580 + }, + { + "epoch": 113.22, + "grad_norm": 83.88067626953125, + "learning_rate": 1.4115554943317416e-06, + "loss": 0.2644, + "step": 56610 + }, + { + "epoch": 113.28, + "grad_norm": 42.70439529418945, + "learning_rate": 1.407182945560817e-06, + "loss": 0.2174, + "step": 56640 + }, + { + "epoch": 113.34, + "grad_norm": 77.9223861694336, + "learning_rate": 1.402816070338427e-06, + "loss": 0.2076, + "step": 56670 + }, + { + "epoch": 113.4, + "grad_norm": 45.262393951416016, + "learning_rate": 1.3984548755604655e-06, + "loss": 0.2369, + "step": 56700 + }, + { + "epoch": 113.46, + "grad_norm": 61.057743072509766, + "learning_rate": 1.3940993681138533e-06, + "loss": 0.1969, + "step": 56730 + }, + { + "epoch": 113.52, + "grad_norm": 54.26447677612305, + "learning_rate": 1.38974955487653e-06, + "loss": 0.2151, + "step": 56760 + }, + { + "epoch": 113.58, + "grad_norm": 77.25838470458984, + "learning_rate": 1.3854054427174468e-06, + "loss": 0.2445, + "step": 56790 + }, + { + "epoch": 113.64, + "grad_norm": 27.18859100341797, + "learning_rate": 1.3810670384965469e-06, + "loss": 0.2098, + "step": 56820 + }, + { + "epoch": 113.7, + "grad_norm": 66.12752532958984, + "learning_rate": 1.3767343490647668e-06, + "loss": 0.2193, + "step": 56850 + }, + { + "epoch": 113.76, + "grad_norm": 31.070798873901367, + "learning_rate": 1.372407381264011e-06, + "loss": 0.2348, + "step": 56880 + }, + { + "epoch": 113.82, + "grad_norm": 86.24375915527344, + "learning_rate": 1.368086141927154e-06, + "loss": 0.2188, + "step": 56910 + }, + { + "epoch": 113.88, + "grad_norm": 142.45108032226562, + "learning_rate": 1.3637706378780209e-06, + "loss": 0.2058, + "step": 56940 + }, + { + "epoch": 113.94, + "grad_norm": 113.668212890625, + "learning_rate": 1.3594608759313832e-06, + "loss": 0.2353, + "step": 56970 + }, + { + "epoch": 114.0, + "grad_norm": 81.07337951660156, + "learning_rate": 1.3551568628929434e-06, + "loss": 0.2148, + "step": 57000 + }, + { + "epoch": 114.0, + "eval_loss": 0.3065720796585083, + "eval_map": 0.803, + "eval_map_50": 0.9505, + "eval_map_75": 0.9011, + "eval_map_chicken": 0.8097, + "eval_map_duck": 0.7335, + "eval_map_large": 0.8033, + "eval_map_medium": 0.8068, + "eval_map_plant": 0.8657, + "eval_map_small": 0.3337, + "eval_mar_1": 0.3192, + "eval_mar_10": 0.8388, + "eval_mar_100": 0.8421, + "eval_mar_100_chicken": 0.8484, + "eval_mar_100_duck": 0.7845, + "eval_mar_100_plant": 0.8933, + "eval_mar_large": 0.8336, + "eval_mar_medium": 0.8466, + "eval_mar_small": 0.3914, + "eval_runtime": 13.3798, + "eval_samples_per_second": 7.474, + "eval_steps_per_second": 0.972, + "step": 57000 + }, + { + "epoch": 114.06, + "grad_norm": 59.886474609375, + "learning_rate": 1.350858605559323e-06, + "loss": 0.2309, + "step": 57030 + }, + { + "epoch": 114.12, + "grad_norm": 57.11491775512695, + "learning_rate": 1.346566110718061e-06, + "loss": 0.2288, + "step": 57060 + }, + { + "epoch": 114.18, + "grad_norm": 43.07657241821289, + "learning_rate": 1.3422793851475907e-06, + "loss": 0.2108, + "step": 57090 + }, + { + "epoch": 114.24, + "grad_norm": 52.78602981567383, + "learning_rate": 1.337998435617235e-06, + "loss": 0.2891, + "step": 57120 + }, + { + "epoch": 114.3, + "grad_norm": 49.36983108520508, + "learning_rate": 1.333723268887201e-06, + "loss": 0.2174, + "step": 57150 + }, + { + "epoch": 114.36, + "grad_norm": 55.36530685424805, + "learning_rate": 1.3294538917085586e-06, + "loss": 0.2449, + "step": 57180 + }, + { + "epoch": 114.42, + "grad_norm": 73.97029876708984, + "learning_rate": 1.3251903108232362e-06, + "loss": 0.1986, + "step": 57210 + }, + { + "epoch": 114.48, + "grad_norm": 65.40650939941406, + "learning_rate": 1.3209325329640126e-06, + "loss": 0.2228, + "step": 57240 + }, + { + "epoch": 114.54, + "grad_norm": 57.859867095947266, + "learning_rate": 1.316680564854499e-06, + "loss": 0.2143, + "step": 57270 + }, + { + "epoch": 114.6, + "grad_norm": 80.3431625366211, + "learning_rate": 1.312434413209131e-06, + "loss": 0.2277, + "step": 57300 + }, + { + "epoch": 114.66, + "grad_norm": 124.59980010986328, + "learning_rate": 1.3081940847331658e-06, + "loss": 0.1979, + "step": 57330 + }, + { + "epoch": 114.72, + "grad_norm": 42.293277740478516, + "learning_rate": 1.3039595861226579e-06, + "loss": 0.2265, + "step": 57360 + }, + { + "epoch": 114.78, + "grad_norm": 55.87450408935547, + "learning_rate": 1.2997309240644607e-06, + "loss": 0.1922, + "step": 57390 + }, + { + "epoch": 114.84, + "grad_norm": 40.74118423461914, + "learning_rate": 1.2955081052362072e-06, + "loss": 0.2295, + "step": 57420 + }, + { + "epoch": 114.9, + "grad_norm": 80.4583740234375, + "learning_rate": 1.2912911363063048e-06, + "loss": 0.1982, + "step": 57450 + }, + { + "epoch": 114.96, + "grad_norm": 70.01721954345703, + "learning_rate": 1.2870800239339237e-06, + "loss": 0.2107, + "step": 57480 + }, + { + "epoch": 115.0, + "eval_loss": 0.30899959802627563, + "eval_map": 0.8052, + "eval_map_50": 0.9505, + "eval_map_75": 0.9017, + "eval_map_chicken": 0.809, + "eval_map_duck": 0.7345, + "eval_map_large": 0.802, + "eval_map_medium": 0.8074, + "eval_map_plant": 0.8722, + "eval_map_small": 0.359, + "eval_mar_1": 0.3198, + "eval_mar_10": 0.8409, + "eval_mar_100": 0.8438, + "eval_mar_100_chicken": 0.8456, + "eval_mar_100_duck": 0.7866, + "eval_mar_100_plant": 0.8991, + "eval_mar_large": 0.8363, + "eval_mar_medium": 0.8487, + "eval_mar_small": 0.4076, + "eval_runtime": 11.3887, + "eval_samples_per_second": 8.781, + "eval_steps_per_second": 1.141, + "step": 57500 + }, + { + "epoch": 115.02, + "grad_norm": 75.13191986083984, + "learning_rate": 1.2828747747689846e-06, + "loss": 0.2267, + "step": 57510 + }, + { + "epoch": 115.08, + "grad_norm": 38.007808685302734, + "learning_rate": 1.2786753954521508e-06, + "loss": 0.23, + "step": 57540 + }, + { + "epoch": 115.14, + "grad_norm": 51.685386657714844, + "learning_rate": 1.2744818926148157e-06, + "loss": 0.2341, + "step": 57570 + }, + { + "epoch": 115.2, + "grad_norm": 49.958229064941406, + "learning_rate": 1.2702942728790897e-06, + "loss": 0.2442, + "step": 57600 + }, + { + "epoch": 115.26, + "grad_norm": 24.888076782226562, + "learning_rate": 1.2661125428577998e-06, + "loss": 0.2252, + "step": 57630 + }, + { + "epoch": 115.32, + "grad_norm": 51.06270217895508, + "learning_rate": 1.2619367091544654e-06, + "loss": 0.2564, + "step": 57660 + }, + { + "epoch": 115.38, + "grad_norm": 51.42506408691406, + "learning_rate": 1.2577667783633007e-06, + "loss": 0.2233, + "step": 57690 + }, + { + "epoch": 115.44, + "grad_norm": 33.248722076416016, + "learning_rate": 1.2536027570691938e-06, + "loss": 0.2404, + "step": 57720 + }, + { + "epoch": 115.5, + "grad_norm": 51.07853698730469, + "learning_rate": 1.2494446518477022e-06, + "loss": 0.2312, + "step": 57750 + }, + { + "epoch": 115.56, + "grad_norm": 73.65450286865234, + "learning_rate": 1.2452924692650443e-06, + "loss": 0.2013, + "step": 57780 + }, + { + "epoch": 115.62, + "grad_norm": 27.405363082885742, + "learning_rate": 1.2411462158780791e-06, + "loss": 0.216, + "step": 57810 + }, + { + "epoch": 115.68, + "grad_norm": 62.627403259277344, + "learning_rate": 1.2370058982343109e-06, + "loss": 0.23, + "step": 57840 + }, + { + "epoch": 115.74, + "grad_norm": 39.418758392333984, + "learning_rate": 1.232871522871864e-06, + "loss": 0.2142, + "step": 57870 + }, + { + "epoch": 115.8, + "grad_norm": 44.806175231933594, + "learning_rate": 1.2287430963194807e-06, + "loss": 0.2075, + "step": 57900 + }, + { + "epoch": 115.86, + "grad_norm": 56.002132415771484, + "learning_rate": 1.2246206250965127e-06, + "loss": 0.2078, + "step": 57930 + }, + { + "epoch": 115.92, + "grad_norm": 137.88645935058594, + "learning_rate": 1.2205041157129017e-06, + "loss": 0.2048, + "step": 57960 + }, + { + "epoch": 115.98, + "grad_norm": 15.981607437133789, + "learning_rate": 1.2163935746691807e-06, + "loss": 0.2204, + "step": 57990 + }, + { + "epoch": 116.0, + "eval_loss": 0.31037890911102295, + "eval_map": 0.8033, + "eval_map_50": 0.9505, + "eval_map_75": 0.9073, + "eval_map_chicken": 0.8066, + "eval_map_duck": 0.7341, + "eval_map_large": 0.8085, + "eval_map_medium": 0.8036, + "eval_map_plant": 0.8693, + "eval_map_small": 0.3617, + "eval_mar_1": 0.3187, + "eval_mar_10": 0.8387, + "eval_mar_100": 0.8419, + "eval_mar_100_chicken": 0.8448, + "eval_mar_100_duck": 0.7845, + "eval_mar_100_plant": 0.8964, + "eval_mar_large": 0.8359, + "eval_mar_medium": 0.8446, + "eval_mar_small": 0.4143, + "eval_runtime": 12.9378, + "eval_samples_per_second": 7.729, + "eval_steps_per_second": 1.005, + "step": 58000 + }, + { + "epoch": 116.04, + "grad_norm": 202.0614471435547, + "learning_rate": 1.2122890084564542e-06, + "loss": 0.2063, + "step": 58020 + }, + { + "epoch": 116.1, + "grad_norm": 47.288394927978516, + "learning_rate": 1.2081904235563908e-06, + "loss": 0.24, + "step": 58050 + }, + { + "epoch": 116.16, + "grad_norm": 133.25660705566406, + "learning_rate": 1.204097826441218e-06, + "loss": 0.2194, + "step": 58080 + }, + { + "epoch": 116.22, + "grad_norm": 38.492435455322266, + "learning_rate": 1.200011223573702e-06, + "loss": 0.2196, + "step": 58110 + }, + { + "epoch": 116.28, + "grad_norm": 55.27732467651367, + "learning_rate": 1.1959306214071508e-06, + "loss": 0.2093, + "step": 58140 + }, + { + "epoch": 116.34, + "grad_norm": 22.916257858276367, + "learning_rate": 1.1918560263853902e-06, + "loss": 0.2325, + "step": 58170 + }, + { + "epoch": 116.4, + "grad_norm": 48.5063362121582, + "learning_rate": 1.18778744494276e-06, + "loss": 0.2011, + "step": 58200 + }, + { + "epoch": 116.46, + "grad_norm": 41.77861022949219, + "learning_rate": 1.1837248835041093e-06, + "loss": 0.2232, + "step": 58230 + }, + { + "epoch": 116.52, + "grad_norm": 63.43424987792969, + "learning_rate": 1.1796683484847731e-06, + "loss": 0.242, + "step": 58260 + }, + { + "epoch": 116.58, + "grad_norm": 28.93366813659668, + "learning_rate": 1.1756178462905782e-06, + "loss": 0.2271, + "step": 58290 + }, + { + "epoch": 116.64, + "grad_norm": 41.52293395996094, + "learning_rate": 1.1715733833178178e-06, + "loss": 0.2268, + "step": 58320 + }, + { + "epoch": 116.7, + "grad_norm": 51.30208969116211, + "learning_rate": 1.1675349659532514e-06, + "loss": 0.2221, + "step": 58350 + }, + { + "epoch": 116.76, + "grad_norm": 126.49063110351562, + "learning_rate": 1.1635026005740902e-06, + "loss": 0.2103, + "step": 58380 + }, + { + "epoch": 116.82, + "grad_norm": 24.768518447875977, + "learning_rate": 1.159476293547992e-06, + "loss": 0.2149, + "step": 58410 + }, + { + "epoch": 116.88, + "grad_norm": 36.70859909057617, + "learning_rate": 1.1554560512330437e-06, + "loss": 0.2211, + "step": 58440 + }, + { + "epoch": 116.94, + "grad_norm": 65.63562774658203, + "learning_rate": 1.1514418799777554e-06, + "loss": 0.1944, + "step": 58470 + }, + { + "epoch": 117.0, + "grad_norm": 48.93583297729492, + "learning_rate": 1.1474337861210543e-06, + "loss": 0.2192, + "step": 58500 + }, + { + "epoch": 117.0, + "eval_loss": 0.30984461307525635, + "eval_map": 0.8062, + "eval_map_50": 0.9504, + "eval_map_75": 0.8968, + "eval_map_chicken": 0.807, + "eval_map_duck": 0.7388, + "eval_map_large": 0.8052, + "eval_map_medium": 0.8092, + "eval_map_plant": 0.8726, + "eval_map_small": 0.3382, + "eval_mar_1": 0.321, + "eval_mar_10": 0.8416, + "eval_mar_100": 0.8454, + "eval_mar_100_chicken": 0.848, + "eval_mar_100_duck": 0.7897, + "eval_mar_100_plant": 0.8985, + "eval_mar_large": 0.8363, + "eval_mar_medium": 0.8511, + "eval_mar_small": 0.3948, + "eval_runtime": 12.8976, + "eval_samples_per_second": 7.753, + "eval_steps_per_second": 1.008, + "step": 58500 + }, + { + "epoch": 117.06, + "grad_norm": 64.96910858154297, + "learning_rate": 1.1434317759922664e-06, + "loss": 0.23, + "step": 58530 + }, + { + "epoch": 117.12, + "grad_norm": 57.87126922607422, + "learning_rate": 1.1394358559111101e-06, + "loss": 0.2053, + "step": 58560 + }, + { + "epoch": 117.18, + "grad_norm": 171.56883239746094, + "learning_rate": 1.135446032187692e-06, + "loss": 0.2277, + "step": 58590 + }, + { + "epoch": 117.24, + "grad_norm": 76.24279022216797, + "learning_rate": 1.1314623111224865e-06, + "loss": 0.2346, + "step": 58620 + }, + { + "epoch": 117.3, + "grad_norm": 43.50970458984375, + "learning_rate": 1.1274846990063314e-06, + "loss": 0.2009, + "step": 58650 + }, + { + "epoch": 117.36, + "grad_norm": 48.1057014465332, + "learning_rate": 1.1235132021204226e-06, + "loss": 0.2197, + "step": 58680 + }, + { + "epoch": 117.42, + "grad_norm": 34.25832748413086, + "learning_rate": 1.1195478267362924e-06, + "loss": 0.2065, + "step": 58710 + }, + { + "epoch": 117.48, + "grad_norm": 122.34854888916016, + "learning_rate": 1.1155885791158128e-06, + "loss": 0.2114, + "step": 58740 + }, + { + "epoch": 117.54, + "grad_norm": 57.05229187011719, + "learning_rate": 1.111635465511175e-06, + "loss": 0.218, + "step": 58770 + }, + { + "epoch": 117.6, + "grad_norm": 25.191370010375977, + "learning_rate": 1.1076884921648834e-06, + "loss": 0.2492, + "step": 58800 + }, + { + "epoch": 117.66, + "grad_norm": 87.51880645751953, + "learning_rate": 1.1037476653097501e-06, + "loss": 0.1996, + "step": 58830 + }, + { + "epoch": 117.72, + "grad_norm": 45.07328796386719, + "learning_rate": 1.0998129911688766e-06, + "loss": 0.2378, + "step": 58860 + }, + { + "epoch": 117.78, + "grad_norm": 29.409072875976562, + "learning_rate": 1.0958844759556525e-06, + "loss": 0.2098, + "step": 58890 + }, + { + "epoch": 117.84, + "grad_norm": 18.03131675720215, + "learning_rate": 1.0919621258737384e-06, + "loss": 0.2214, + "step": 58920 + }, + { + "epoch": 117.9, + "grad_norm": 47.66557693481445, + "learning_rate": 1.0880459471170597e-06, + "loss": 0.2387, + "step": 58950 + }, + { + "epoch": 117.96, + "grad_norm": 30.374011993408203, + "learning_rate": 1.0841359458697986e-06, + "loss": 0.2384, + "step": 58980 + }, + { + "epoch": 118.0, + "eval_loss": 0.3125670850276947, + "eval_map": 0.8051, + "eval_map_50": 0.9507, + "eval_map_75": 0.9077, + "eval_map_chicken": 0.8043, + "eval_map_duck": 0.7379, + "eval_map_large": 0.8069, + "eval_map_medium": 0.8085, + "eval_map_plant": 0.8732, + "eval_map_small": 0.3702, + "eval_mar_1": 0.3187, + "eval_mar_10": 0.8387, + "eval_mar_100": 0.8421, + "eval_mar_100_chicken": 0.8421, + "eval_mar_100_duck": 0.7845, + "eval_mar_100_plant": 0.8997, + "eval_mar_large": 0.8358, + "eval_mar_medium": 0.8476, + "eval_mar_small": 0.4219, + "eval_runtime": 13.1737, + "eval_samples_per_second": 7.591, + "eval_steps_per_second": 0.987, + "step": 59000 + }, + { + "epoch": 118.02, + "grad_norm": 57.89711380004883, + "learning_rate": 1.0802321283063794e-06, + "loss": 0.211, + "step": 59010 + }, + { + "epoch": 118.08, + "grad_norm": 89.98204803466797, + "learning_rate": 1.0763345005914649e-06, + "loss": 0.2281, + "step": 59040 + }, + { + "epoch": 118.14, + "grad_norm": 37.554039001464844, + "learning_rate": 1.0724430688799402e-06, + "loss": 0.199, + "step": 59070 + }, + { + "epoch": 118.2, + "grad_norm": 43.45093536376953, + "learning_rate": 1.0685578393169054e-06, + "loss": 0.2155, + "step": 59100 + }, + { + "epoch": 118.26, + "grad_norm": 43.8154411315918, + "learning_rate": 1.0646788180376716e-06, + "loss": 0.2171, + "step": 59130 + }, + { + "epoch": 118.32, + "grad_norm": 65.75592803955078, + "learning_rate": 1.0608060111677409e-06, + "loss": 0.2068, + "step": 59160 + }, + { + "epoch": 118.38, + "grad_norm": 30.5941104888916, + "learning_rate": 1.0569394248228026e-06, + "loss": 0.2257, + "step": 59190 + }, + { + "epoch": 118.44, + "grad_norm": 107.75626373291016, + "learning_rate": 1.053079065108728e-06, + "loss": 0.2102, + "step": 59220 + }, + { + "epoch": 118.5, + "grad_norm": 43.346866607666016, + "learning_rate": 1.049224938121548e-06, + "loss": 0.2367, + "step": 59250 + }, + { + "epoch": 118.56, + "grad_norm": 62.630615234375, + "learning_rate": 1.0453770499474585e-06, + "loss": 0.2341, + "step": 59280 + }, + { + "epoch": 118.62, + "grad_norm": 64.70538330078125, + "learning_rate": 1.0415354066627993e-06, + "loss": 0.2207, + "step": 59310 + }, + { + "epoch": 118.68, + "grad_norm": 35.14129638671875, + "learning_rate": 1.037700014334047e-06, + "loss": 0.2262, + "step": 59340 + }, + { + "epoch": 118.74, + "grad_norm": 77.49797821044922, + "learning_rate": 1.0338708790178136e-06, + "loss": 0.2323, + "step": 59370 + }, + { + "epoch": 118.8, + "grad_norm": 33.764774322509766, + "learning_rate": 1.0300480067608232e-06, + "loss": 0.2213, + "step": 59400 + }, + { + "epoch": 118.86, + "grad_norm": 67.69646453857422, + "learning_rate": 1.026231403599915e-06, + "loss": 0.206, + "step": 59430 + }, + { + "epoch": 118.92, + "grad_norm": 26.622072219848633, + "learning_rate": 1.0224210755620257e-06, + "loss": 0.2049, + "step": 59460 + }, + { + "epoch": 118.98, + "grad_norm": 28.23842430114746, + "learning_rate": 1.0186170286641816e-06, + "loss": 0.2001, + "step": 59490 + }, + { + "epoch": 119.0, + "eval_loss": 0.3057597875595093, + "eval_map": 0.8046, + "eval_map_50": 0.9506, + "eval_map_75": 0.9047, + "eval_map_chicken": 0.804, + "eval_map_duck": 0.7374, + "eval_map_large": 0.8057, + "eval_map_medium": 0.8089, + "eval_map_plant": 0.8725, + "eval_map_small": 0.3726, + "eval_mar_1": 0.3203, + "eval_mar_10": 0.8405, + "eval_mar_100": 0.8437, + "eval_mar_100_chicken": 0.8437, + "eval_mar_100_duck": 0.7887, + "eval_mar_100_plant": 0.8988, + "eval_mar_large": 0.8301, + "eval_mar_medium": 0.8507, + "eval_mar_small": 0.4205, + "eval_runtime": 12.7193, + "eval_samples_per_second": 7.862, + "eval_steps_per_second": 1.022, + "step": 59500 + }, + { + "epoch": 119.04, + "grad_norm": 43.927146911621094, + "learning_rate": 1.014819268913495e-06, + "loss": 0.2077, + "step": 59520 + }, + { + "epoch": 119.1, + "grad_norm": 49.40350341796875, + "learning_rate": 1.0110278023071445e-06, + "loss": 0.2033, + "step": 59550 + }, + { + "epoch": 119.16, + "grad_norm": 237.99090576171875, + "learning_rate": 1.0072426348323754e-06, + "loss": 0.246, + "step": 59580 + }, + { + "epoch": 119.22, + "grad_norm": 38.9443244934082, + "learning_rate": 1.0034637724664832e-06, + "loss": 0.2126, + "step": 59610 + }, + { + "epoch": 119.28, + "grad_norm": 67.23509979248047, + "learning_rate": 9.99691221176805e-07, + "loss": 0.1921, + "step": 59640 + }, + { + "epoch": 119.34, + "grad_norm": 47.9791259765625, + "learning_rate": 9.959249869207177e-07, + "loss": 0.2247, + "step": 59670 + }, + { + "epoch": 119.4, + "grad_norm": 98.65447235107422, + "learning_rate": 9.921650756456164e-07, + "loss": 0.2286, + "step": 59700 + }, + { + "epoch": 119.46, + "grad_norm": 50.2000846862793, + "learning_rate": 9.884114932889172e-07, + "loss": 0.2398, + "step": 59730 + }, + { + "epoch": 119.52, + "grad_norm": 32.53291702270508, + "learning_rate": 9.84664245778037e-07, + "loss": 0.2037, + "step": 59760 + }, + { + "epoch": 119.58, + "grad_norm": 50.67923355102539, + "learning_rate": 9.809233390303901e-07, + "loss": 0.2632, + "step": 59790 + }, + { + "epoch": 119.64, + "grad_norm": 63.43164825439453, + "learning_rate": 9.771887789533818e-07, + "loss": 0.2612, + "step": 59820 + }, + { + "epoch": 119.7, + "grad_norm": 34.49391174316406, + "learning_rate": 9.734605714443906e-07, + "loss": 0.2278, + "step": 59850 + }, + { + "epoch": 119.76, + "grad_norm": 69.05148315429688, + "learning_rate": 9.69738722390765e-07, + "loss": 0.1917, + "step": 59880 + }, + { + "epoch": 119.82, + "grad_norm": 138.90768432617188, + "learning_rate": 9.66023237669812e-07, + "loss": 0.2051, + "step": 59910 + }, + { + "epoch": 119.88, + "grad_norm": 67.18104553222656, + "learning_rate": 9.623141231487904e-07, + "loss": 0.187, + "step": 59940 + }, + { + "epoch": 119.94, + "grad_norm": 34.69009017944336, + "learning_rate": 9.586113846848982e-07, + "loss": 0.2267, + "step": 59970 + }, + { + "epoch": 120.0, + "grad_norm": 55.136592864990234, + "learning_rate": 9.549150281252633e-07, + "loss": 0.205, + "step": 60000 + }, + { + "epoch": 120.0, + "eval_loss": 0.3106890022754669, + "eval_map": 0.803, + "eval_map_50": 0.9504, + "eval_map_75": 0.9039, + "eval_map_chicken": 0.8033, + "eval_map_duck": 0.732, + "eval_map_large": 0.8106, + "eval_map_medium": 0.8072, + "eval_map_plant": 0.8737, + "eval_map_small": 0.3506, + "eval_mar_1": 0.3193, + "eval_mar_10": 0.8387, + "eval_mar_100": 0.8423, + "eval_mar_100_chicken": 0.8437, + "eval_mar_100_duck": 0.7845, + "eval_mar_100_plant": 0.8988, + "eval_mar_large": 0.8393, + "eval_mar_medium": 0.8474, + "eval_mar_small": 0.4043, + "eval_runtime": 13.2445, + "eval_samples_per_second": 7.55, + "eval_steps_per_second": 0.982, + "step": 60000 + }, + { + "epoch": 120.06, + "grad_norm": 64.85227966308594, + "learning_rate": 9.512250593069394e-07, + "loss": 0.2098, + "step": 60030 + }, + { + "epoch": 120.12, + "grad_norm": 90.1717529296875, + "learning_rate": 9.475414840568903e-07, + "loss": 0.2312, + "step": 60060 + }, + { + "epoch": 120.18, + "grad_norm": 56.66279602050781, + "learning_rate": 9.438643081919818e-07, + "loss": 0.2083, + "step": 60090 + }, + { + "epoch": 120.24, + "grad_norm": 77.5468521118164, + "learning_rate": 9.401935375189802e-07, + "loss": 0.208, + "step": 60120 + }, + { + "epoch": 120.3, + "grad_norm": 0.0028794463723897934, + "learning_rate": 9.365291778345303e-07, + "loss": 0.1874, + "step": 60150 + }, + { + "epoch": 120.36, + "grad_norm": 75.45403289794922, + "learning_rate": 9.32871234925159e-07, + "loss": 0.2362, + "step": 60180 + }, + { + "epoch": 120.42, + "grad_norm": 52.26958084106445, + "learning_rate": 9.29219714567256e-07, + "loss": 0.2354, + "step": 60210 + }, + { + "epoch": 120.48, + "grad_norm": 49.53770446777344, + "learning_rate": 9.255746225270689e-07, + "loss": 0.2693, + "step": 60240 + }, + { + "epoch": 120.54, + "grad_norm": 45.97019577026367, + "learning_rate": 9.21935964560699e-07, + "loss": 0.2156, + "step": 60270 + }, + { + "epoch": 120.6, + "grad_norm": 38.9113883972168, + "learning_rate": 9.183037464140804e-07, + "loss": 0.2108, + "step": 60300 + }, + { + "epoch": 120.66, + "grad_norm": 48.45543670654297, + "learning_rate": 9.146779738229838e-07, + "loss": 0.202, + "step": 60330 + }, + { + "epoch": 120.72, + "grad_norm": 109.9886703491211, + "learning_rate": 9.110586525129988e-07, + "loss": 0.2332, + "step": 60360 + }, + { + "epoch": 120.78, + "grad_norm": 36.915462493896484, + "learning_rate": 9.074457881995252e-07, + "loss": 0.2075, + "step": 60390 + }, + { + "epoch": 120.84, + "grad_norm": 52.093040466308594, + "learning_rate": 9.038393865877725e-07, + "loss": 0.2154, + "step": 60420 + }, + { + "epoch": 120.9, + "grad_norm": 119.32299041748047, + "learning_rate": 9.002394533727382e-07, + "loss": 0.2397, + "step": 60450 + }, + { + "epoch": 120.96, + "grad_norm": 52.55674743652344, + "learning_rate": 8.966459942392108e-07, + "loss": 0.2247, + "step": 60480 + }, + { + "epoch": 121.0, + "eval_loss": 0.3103560507297516, + "eval_map": 0.8078, + "eval_map_50": 0.9503, + "eval_map_75": 0.9034, + "eval_map_chicken": 0.8084, + "eval_map_duck": 0.7372, + "eval_map_large": 0.806, + "eval_map_medium": 0.8085, + "eval_map_plant": 0.8779, + "eval_map_small": 0.4008, + "eval_mar_1": 0.321, + "eval_mar_10": 0.8437, + "eval_mar_100": 0.847, + "eval_mar_100_chicken": 0.8464, + "eval_mar_100_duck": 0.7928, + "eval_mar_100_plant": 0.9018, + "eval_mar_large": 0.8377, + "eval_mar_medium": 0.851, + "eval_mar_small": 0.4462, + "eval_runtime": 11.5428, + "eval_samples_per_second": 8.663, + "eval_steps_per_second": 1.126, + "step": 60500 + }, + { + "epoch": 121.02, + "grad_norm": 42.727779388427734, + "learning_rate": 8.930590148617513e-07, + "loss": 0.1956, + "step": 60510 + }, + { + "epoch": 121.08, + "grad_norm": 57.22825622558594, + "learning_rate": 8.894785209046886e-07, + "loss": 0.2028, + "step": 60540 + }, + { + "epoch": 121.14, + "grad_norm": 90.83641052246094, + "learning_rate": 8.859045180221137e-07, + "loss": 0.2213, + "step": 60570 + }, + { + "epoch": 121.2, + "grad_norm": 31.503341674804688, + "learning_rate": 8.823370118578628e-07, + "loss": 0.2004, + "step": 60600 + }, + { + "epoch": 121.26, + "grad_norm": 70.24781799316406, + "learning_rate": 8.787760080455171e-07, + "loss": 0.2337, + "step": 60630 + }, + { + "epoch": 121.32, + "grad_norm": 39.60617446899414, + "learning_rate": 8.752215122083874e-07, + "loss": 0.2097, + "step": 60660 + }, + { + "epoch": 121.38, + "grad_norm": 53.7212028503418, + "learning_rate": 8.716735299595059e-07, + "loss": 0.2256, + "step": 60690 + }, + { + "epoch": 121.44, + "grad_norm": 45.7958984375, + "learning_rate": 8.68132066901623e-07, + "loss": 0.2443, + "step": 60720 + }, + { + "epoch": 121.5, + "grad_norm": 79.56392669677734, + "learning_rate": 8.645971286271903e-07, + "loss": 0.2151, + "step": 60750 + }, + { + "epoch": 121.56, + "grad_norm": 32.804012298583984, + "learning_rate": 8.610687207183604e-07, + "loss": 0.1883, + "step": 60780 + }, + { + "epoch": 121.62, + "grad_norm": 30.23906707763672, + "learning_rate": 8.575468487469696e-07, + "loss": 0.2344, + "step": 60810 + }, + { + "epoch": 121.68, + "grad_norm": 62.960548400878906, + "learning_rate": 8.540315182745329e-07, + "loss": 0.2359, + "step": 60840 + }, + { + "epoch": 121.74, + "grad_norm": 61.267372131347656, + "learning_rate": 8.505227348522404e-07, + "loss": 0.2399, + "step": 60870 + }, + { + "epoch": 121.8, + "grad_norm": 87.58113098144531, + "learning_rate": 8.470205040209362e-07, + "loss": 0.1891, + "step": 60900 + }, + { + "epoch": 121.86, + "grad_norm": 52.148704528808594, + "learning_rate": 8.435248313111244e-07, + "loss": 0.2217, + "step": 60930 + }, + { + "epoch": 121.92, + "grad_norm": 40.73577880859375, + "learning_rate": 8.400357222429473e-07, + "loss": 0.2098, + "step": 60960 + }, + { + "epoch": 121.98, + "grad_norm": 74.6595687866211, + "learning_rate": 8.365531823261841e-07, + "loss": 0.2042, + "step": 60990 + }, + { + "epoch": 122.0, + "eval_loss": 0.31579113006591797, + "eval_map": 0.8016, + "eval_map_50": 0.9466, + "eval_map_75": 0.9042, + "eval_map_chicken": 0.8045, + "eval_map_duck": 0.7261, + "eval_map_large": 0.7924, + "eval_map_medium": 0.8066, + "eval_map_plant": 0.8743, + "eval_map_small": 0.3568, + "eval_mar_1": 0.3195, + "eval_mar_10": 0.8391, + "eval_mar_100": 0.8429, + "eval_mar_100_chicken": 0.846, + "eval_mar_100_duck": 0.7825, + "eval_mar_100_plant": 0.9003, + "eval_mar_large": 0.8266, + "eval_mar_medium": 0.8485, + "eval_mar_small": 0.4157, + "eval_runtime": 12.4336, + "eval_samples_per_second": 8.043, + "eval_steps_per_second": 1.046, + "step": 61000 + }, + { + "epoch": 122.04, + "grad_norm": 38.343040466308594, + "learning_rate": 8.330772170602424e-07, + "loss": 0.2252, + "step": 61020 + }, + { + "epoch": 122.1, + "grad_norm": 74.18119812011719, + "learning_rate": 8.296078319341444e-07, + "loss": 0.2137, + "step": 61050 + }, + { + "epoch": 122.16, + "grad_norm": 65.24456787109375, + "learning_rate": 8.261450324265225e-07, + "loss": 0.2098, + "step": 61080 + }, + { + "epoch": 122.22, + "grad_norm": 45.015743255615234, + "learning_rate": 8.226888240056114e-07, + "loss": 0.1982, + "step": 61110 + }, + { + "epoch": 122.28, + "grad_norm": 67.18856811523438, + "learning_rate": 8.192392121292336e-07, + "loss": 0.1895, + "step": 61140 + }, + { + "epoch": 122.34, + "grad_norm": 70.43605041503906, + "learning_rate": 8.157962022448001e-07, + "loss": 0.2065, + "step": 61170 + }, + { + "epoch": 122.4, + "grad_norm": 62.40924072265625, + "learning_rate": 8.123597997892918e-07, + "loss": 0.2141, + "step": 61200 + }, + { + "epoch": 122.46, + "grad_norm": 38.64645767211914, + "learning_rate": 8.089300101892561e-07, + "loss": 0.2097, + "step": 61230 + }, + { + "epoch": 122.52, + "grad_norm": 59.256107330322266, + "learning_rate": 8.055068388608011e-07, + "loss": 0.2103, + "step": 61260 + }, + { + "epoch": 122.58, + "grad_norm": 28.657575607299805, + "learning_rate": 8.020902912095807e-07, + "loss": 0.2334, + "step": 61290 + }, + { + "epoch": 122.64, + "grad_norm": 28.822608947753906, + "learning_rate": 7.986803726307901e-07, + "loss": 0.2003, + "step": 61320 + }, + { + "epoch": 122.7, + "grad_norm": 64.58875274658203, + "learning_rate": 7.952770885091548e-07, + "loss": 0.2058, + "step": 61350 + }, + { + "epoch": 122.76, + "grad_norm": 56.4006233215332, + "learning_rate": 7.918804442189271e-07, + "loss": 0.2472, + "step": 61380 + }, + { + "epoch": 122.82, + "grad_norm": 35.49840545654297, + "learning_rate": 7.884904451238712e-07, + "loss": 0.2192, + "step": 61410 + }, + { + "epoch": 122.88, + "grad_norm": 45.37355422973633, + "learning_rate": 7.851070965772572e-07, + "loss": 0.2269, + "step": 61440 + }, + { + "epoch": 122.94, + "grad_norm": 60.03096389770508, + "learning_rate": 7.81730403921856e-07, + "loss": 0.2392, + "step": 61470 + }, + { + "epoch": 123.0, + "grad_norm": 34.28696823120117, + "learning_rate": 7.783603724899258e-07, + "loss": 0.2029, + "step": 61500 + }, + { + "epoch": 123.0, + "eval_loss": 0.3060416281223297, + "eval_map": 0.8059, + "eval_map_50": 0.9503, + "eval_map_75": 0.8976, + "eval_map_chicken": 0.802, + "eval_map_duck": 0.7403, + "eval_map_large": 0.8096, + "eval_map_medium": 0.808, + "eval_map_plant": 0.8753, + "eval_map_small": 0.336, + "eval_mar_1": 0.3202, + "eval_mar_10": 0.8431, + "eval_mar_100": 0.8466, + "eval_mar_100_chicken": 0.846, + "eval_mar_100_duck": 0.7938, + "eval_mar_100_plant": 0.9, + "eval_mar_large": 0.8403, + "eval_mar_medium": 0.8509, + "eval_mar_small": 0.39, + "eval_runtime": 12.9518, + "eval_samples_per_second": 7.721, + "eval_steps_per_second": 1.004, + "step": 61500 + }, + { + "epoch": 123.06, + "grad_norm": 72.92243194580078, + "learning_rate": 7.749970076032048e-07, + "loss": 0.2276, + "step": 61530 + }, + { + "epoch": 123.12, + "grad_norm": 64.05213928222656, + "learning_rate": 7.716403145729073e-07, + "loss": 0.2084, + "step": 61560 + }, + { + "epoch": 123.18, + "grad_norm": 90.82645416259766, + "learning_rate": 7.682902986997076e-07, + "loss": 0.2284, + "step": 61590 + }, + { + "epoch": 123.24, + "grad_norm": 70.66219329833984, + "learning_rate": 7.649469652737407e-07, + "loss": 0.2002, + "step": 61620 + }, + { + "epoch": 123.3, + "grad_norm": 46.48184585571289, + "learning_rate": 7.61610319574585e-07, + "loss": 0.2054, + "step": 61650 + }, + { + "epoch": 123.36, + "grad_norm": 49.623992919921875, + "learning_rate": 7.582803668712579e-07, + "loss": 0.2303, + "step": 61680 + }, + { + "epoch": 123.42, + "grad_norm": 54.12312316894531, + "learning_rate": 7.549571124222127e-07, + "loss": 0.2509, + "step": 61710 + }, + { + "epoch": 123.48, + "grad_norm": 39.39469528198242, + "learning_rate": 7.51640561475318e-07, + "loss": 0.2076, + "step": 61740 + }, + { + "epoch": 123.54, + "grad_norm": 27.189287185668945, + "learning_rate": 7.48330719267864e-07, + "loss": 0.2316, + "step": 61770 + }, + { + "epoch": 123.6, + "grad_norm": 88.34009552001953, + "learning_rate": 7.450275910265415e-07, + "loss": 0.2575, + "step": 61800 + }, + { + "epoch": 123.66, + "grad_norm": 53.184043884277344, + "learning_rate": 7.4173118196744e-07, + "loss": 0.1986, + "step": 61830 + }, + { + "epoch": 123.72, + "grad_norm": 48.865074157714844, + "learning_rate": 7.384414972960419e-07, + "loss": 0.1832, + "step": 61860 + }, + { + "epoch": 123.78, + "grad_norm": 44.44317626953125, + "learning_rate": 7.351585422072049e-07, + "loss": 0.247, + "step": 61890 + }, + { + "epoch": 123.84, + "grad_norm": 73.54085540771484, + "learning_rate": 7.318823218851668e-07, + "loss": 0.1979, + "step": 61920 + }, + { + "epoch": 123.9, + "grad_norm": 90.45979309082031, + "learning_rate": 7.286128415035249e-07, + "loss": 0.2072, + "step": 61950 + }, + { + "epoch": 123.96, + "grad_norm": 74.4012680053711, + "learning_rate": 7.253501062252338e-07, + "loss": 0.2077, + "step": 61980 + }, + { + "epoch": 124.0, + "eval_loss": 0.31227219104766846, + "eval_map": 0.8028, + "eval_map_50": 0.9499, + "eval_map_75": 0.8959, + "eval_map_chicken": 0.8023, + "eval_map_duck": 0.7324, + "eval_map_large": 0.802, + "eval_map_medium": 0.8068, + "eval_map_plant": 0.8737, + "eval_map_small": 0.3735, + "eval_mar_1": 0.3185, + "eval_mar_10": 0.8399, + "eval_mar_100": 0.8435, + "eval_mar_100_chicken": 0.8437, + "eval_mar_100_duck": 0.7866, + "eval_mar_100_plant": 0.9003, + "eval_mar_large": 0.8301, + "eval_mar_medium": 0.85, + "eval_mar_small": 0.4319, + "eval_runtime": 13.0439, + "eval_samples_per_second": 7.666, + "eval_steps_per_second": 0.997, + "step": 62000 + }, + { + "epoch": 124.02, + "grad_norm": 56.22016525268555, + "learning_rate": 7.220941212026005e-07, + "loss": 0.1965, + "step": 62010 + }, + { + "epoch": 124.08, + "grad_norm": 50.742515563964844, + "learning_rate": 7.188448915772673e-07, + "loss": 0.2208, + "step": 62040 + }, + { + "epoch": 124.14, + "grad_norm": 27.431562423706055, + "learning_rate": 7.156024224802139e-07, + "loss": 0.1939, + "step": 62070 + }, + { + "epoch": 124.2, + "grad_norm": 33.407440185546875, + "learning_rate": 7.123667190317396e-07, + "loss": 0.2363, + "step": 62100 + }, + { + "epoch": 124.26, + "grad_norm": 60.38604736328125, + "learning_rate": 7.091377863414611e-07, + "loss": 0.2159, + "step": 62130 + }, + { + "epoch": 124.32, + "grad_norm": 28.370864868164062, + "learning_rate": 7.059156295083064e-07, + "loss": 0.2346, + "step": 62160 + }, + { + "epoch": 124.38, + "grad_norm": 32.50497817993164, + "learning_rate": 7.027002536204986e-07, + "loss": 0.1986, + "step": 62190 + }, + { + "epoch": 124.44, + "grad_norm": 66.82977294921875, + "learning_rate": 6.994916637555571e-07, + "loss": 0.2031, + "step": 62220 + }, + { + "epoch": 124.5, + "grad_norm": 85.54017639160156, + "learning_rate": 6.962898649802824e-07, + "loss": 0.1977, + "step": 62250 + }, + { + "epoch": 124.56, + "grad_norm": 66.37556457519531, + "learning_rate": 6.930948623507505e-07, + "loss": 0.2266, + "step": 62280 + }, + { + "epoch": 124.62, + "grad_norm": 55.63164138793945, + "learning_rate": 6.89906660912309e-07, + "loss": 0.2168, + "step": 62310 + }, + { + "epoch": 124.68, + "grad_norm": 42.36758804321289, + "learning_rate": 6.86725265699561e-07, + "loss": 0.2227, + "step": 62340 + }, + { + "epoch": 124.74, + "grad_norm": 220.3474578857422, + "learning_rate": 6.835506817363657e-07, + "loss": 0.1851, + "step": 62370 + }, + { + "epoch": 124.8, + "grad_norm": 40.383907318115234, + "learning_rate": 6.803829140358237e-07, + "loss": 0.2162, + "step": 62400 + }, + { + "epoch": 124.86, + "grad_norm": 64.54944610595703, + "learning_rate": 6.772219676002717e-07, + "loss": 0.2291, + "step": 62430 + }, + { + "epoch": 124.92, + "grad_norm": 80.85274505615234, + "learning_rate": 6.74067847421277e-07, + "loss": 0.2275, + "step": 62460 + }, + { + "epoch": 124.98, + "grad_norm": 103.29847717285156, + "learning_rate": 6.709205584796241e-07, + "loss": 0.2262, + "step": 62490 + }, + { + "epoch": 125.0, + "eval_loss": 0.3111739158630371, + "eval_map": 0.8043, + "eval_map_50": 0.9499, + "eval_map_75": 0.9056, + "eval_map_chicken": 0.8, + "eval_map_duck": 0.739, + "eval_map_large": 0.8116, + "eval_map_medium": 0.8063, + "eval_map_plant": 0.8741, + "eval_map_small": 0.3527, + "eval_mar_1": 0.3196, + "eval_mar_10": 0.8401, + "eval_mar_100": 0.8434, + "eval_mar_100_chicken": 0.8401, + "eval_mar_100_duck": 0.7907, + "eval_mar_100_plant": 0.8994, + "eval_mar_large": 0.8383, + "eval_mar_medium": 0.8486, + "eval_mar_small": 0.3957, + "eval_runtime": 12.7565, + "eval_samples_per_second": 7.839, + "eval_steps_per_second": 1.019, + "step": 62500 + }, + { + "epoch": 125.04, + "grad_norm": 106.2325210571289, + "learning_rate": 6.677801057453143e-07, + "loss": 0.2064, + "step": 62520 + }, + { + "epoch": 125.1, + "grad_norm": 64.54126739501953, + "learning_rate": 6.646464941775499e-07, + "loss": 0.2138, + "step": 62550 + }, + { + "epoch": 125.16, + "grad_norm": 62.765010833740234, + "learning_rate": 6.615197287247299e-07, + "loss": 0.1982, + "step": 62580 + }, + { + "epoch": 125.22, + "grad_norm": 80.47004699707031, + "learning_rate": 6.583998143244463e-07, + "loss": 0.2141, + "step": 62610 + }, + { + "epoch": 125.28, + "grad_norm": 74.80974578857422, + "learning_rate": 6.552867559034687e-07, + "loss": 0.2398, + "step": 62640 + }, + { + "epoch": 125.34, + "grad_norm": 34.195377349853516, + "learning_rate": 6.521805583777396e-07, + "loss": 0.2437, + "step": 62670 + }, + { + "epoch": 125.4, + "grad_norm": 27.298751831054688, + "learning_rate": 6.490812266523716e-07, + "loss": 0.2042, + "step": 62700 + }, + { + "epoch": 125.46, + "grad_norm": 35.28337097167969, + "learning_rate": 6.459887656216318e-07, + "loss": 0.2129, + "step": 62730 + }, + { + "epoch": 125.52, + "grad_norm": 52.30793380737305, + "learning_rate": 6.429031801689362e-07, + "loss": 0.2216, + "step": 62760 + }, + { + "epoch": 125.58, + "grad_norm": 51.34511184692383, + "learning_rate": 6.398244751668481e-07, + "loss": 0.1996, + "step": 62790 + }, + { + "epoch": 125.64, + "grad_norm": 43.16114807128906, + "learning_rate": 6.36752655477062e-07, + "loss": 0.2037, + "step": 62820 + }, + { + "epoch": 125.7, + "grad_norm": 51.460853576660156, + "learning_rate": 6.336877259504004e-07, + "loss": 0.2291, + "step": 62850 + }, + { + "epoch": 125.76, + "grad_norm": 65.08202362060547, + "learning_rate": 6.30629691426804e-07, + "loss": 0.2223, + "step": 62880 + }, + { + "epoch": 125.82, + "grad_norm": 73.04798889160156, + "learning_rate": 6.275785567353293e-07, + "loss": 0.2204, + "step": 62910 + }, + { + "epoch": 125.88, + "grad_norm": 55.04792404174805, + "learning_rate": 6.245343266941328e-07, + "loss": 0.2026, + "step": 62940 + }, + { + "epoch": 125.94, + "grad_norm": 70.68724060058594, + "learning_rate": 6.214970061104686e-07, + "loss": 0.224, + "step": 62970 + }, + { + "epoch": 126.0, + "grad_norm": 51.036346435546875, + "learning_rate": 6.184665997806832e-07, + "loss": 0.2136, + "step": 63000 + }, + { + "epoch": 126.0, + "eval_loss": 0.3078014850616455, + "eval_map": 0.8045, + "eval_map_50": 0.9492, + "eval_map_75": 0.9035, + "eval_map_chicken": 0.8033, + "eval_map_duck": 0.7368, + "eval_map_large": 0.8059, + "eval_map_medium": 0.8086, + "eval_map_plant": 0.8735, + "eval_map_small": 0.3766, + "eval_mar_1": 0.3189, + "eval_mar_10": 0.8406, + "eval_mar_100": 0.8439, + "eval_mar_100_chicken": 0.844, + "eval_mar_100_duck": 0.7887, + "eval_mar_100_plant": 0.8991, + "eval_mar_large": 0.8334, + "eval_mar_medium": 0.8497, + "eval_mar_small": 0.4271, + "eval_runtime": 11.2037, + "eval_samples_per_second": 8.926, + "eval_steps_per_second": 1.16, + "step": 63000 + }, + { + "epoch": 126.06, + "grad_norm": 64.0252685546875, + "learning_rate": 6.154431124901983e-07, + "loss": 0.2113, + "step": 63030 + }, + { + "epoch": 126.12, + "grad_norm": 84.33251190185547, + "learning_rate": 6.124265490135161e-07, + "loss": 0.2276, + "step": 63060 + }, + { + "epoch": 126.18, + "grad_norm": 55.78408432006836, + "learning_rate": 6.094169141142014e-07, + "loss": 0.19, + "step": 63090 + }, + { + "epoch": 126.24, + "grad_norm": 39.02850341796875, + "learning_rate": 6.064142125448763e-07, + "loss": 0.2128, + "step": 63120 + }, + { + "epoch": 126.3, + "grad_norm": 38.10506057739258, + "learning_rate": 6.034184490472195e-07, + "loss": 0.203, + "step": 63150 + }, + { + "epoch": 126.36, + "grad_norm": 75.8965835571289, + "learning_rate": 6.004296283519478e-07, + "loss": 0.2176, + "step": 63180 + }, + { + "epoch": 126.42, + "grad_norm": 49.18169403076172, + "learning_rate": 5.974477551788194e-07, + "loss": 0.2212, + "step": 63210 + }, + { + "epoch": 126.48, + "grad_norm": 192.56015014648438, + "learning_rate": 5.944728342366179e-07, + "loss": 0.2368, + "step": 63240 + }, + { + "epoch": 126.54, + "grad_norm": 100.00894165039062, + "learning_rate": 5.915048702231491e-07, + "loss": 0.2315, + "step": 63270 + }, + { + "epoch": 126.6, + "grad_norm": 64.26509094238281, + "learning_rate": 5.885438678252342e-07, + "loss": 0.1936, + "step": 63300 + }, + { + "epoch": 126.66, + "grad_norm": 47.66757583618164, + "learning_rate": 5.855898317186992e-07, + "loss": 0.2149, + "step": 63330 + }, + { + "epoch": 126.72, + "grad_norm": 23.463951110839844, + "learning_rate": 5.826427665683715e-07, + "loss": 0.1777, + "step": 63360 + }, + { + "epoch": 126.78, + "grad_norm": 36.043025970458984, + "learning_rate": 5.797026770280683e-07, + "loss": 0.2227, + "step": 63390 + }, + { + "epoch": 126.84, + "grad_norm": 72.77320861816406, + "learning_rate": 5.767695677405921e-07, + "loss": 0.2175, + "step": 63420 + }, + { + "epoch": 126.9, + "grad_norm": 75.97449493408203, + "learning_rate": 5.738434433377244e-07, + "loss": 0.1979, + "step": 63450 + }, + { + "epoch": 126.96, + "grad_norm": 45.13546371459961, + "learning_rate": 5.709243084402128e-07, + "loss": 0.2446, + "step": 63480 + }, + { + "epoch": 127.0, + "eval_loss": 0.3091703951358795, + "eval_map": 0.8063, + "eval_map_50": 0.9502, + "eval_map_75": 0.9039, + "eval_map_chicken": 0.8081, + "eval_map_duck": 0.7388, + "eval_map_large": 0.7998, + "eval_map_medium": 0.8106, + "eval_map_plant": 0.872, + "eval_map_small": 0.3843, + "eval_mar_1": 0.3193, + "eval_mar_10": 0.8417, + "eval_mar_100": 0.8447, + "eval_mar_100_chicken": 0.8464, + "eval_mar_100_duck": 0.7897, + "eval_mar_100_plant": 0.8979, + "eval_mar_large": 0.8321, + "eval_mar_medium": 0.85, + "eval_mar_small": 0.4267, + "eval_runtime": 13.5522, + "eval_samples_per_second": 7.379, + "eval_steps_per_second": 0.959, + "step": 63500 + }, + { + "epoch": 127.02, + "grad_norm": 22.453636169433594, + "learning_rate": 5.680121676577721e-07, + "loss": 0.2081, + "step": 63510 + }, + { + "epoch": 127.08, + "grad_norm": 66.31897735595703, + "learning_rate": 5.651070255890689e-07, + "loss": 0.2167, + "step": 63540 + }, + { + "epoch": 127.14, + "grad_norm": 107.52672576904297, + "learning_rate": 5.622088868217179e-07, + "loss": 0.2255, + "step": 63570 + }, + { + "epoch": 127.2, + "grad_norm": 49.077579498291016, + "learning_rate": 5.593177559322776e-07, + "loss": 0.2447, + "step": 63600 + }, + { + "epoch": 127.26, + "grad_norm": 560.881591796875, + "learning_rate": 5.564336374862373e-07, + "loss": 0.193, + "step": 63630 + }, + { + "epoch": 127.32, + "grad_norm": 79.59185791015625, + "learning_rate": 5.535565360380146e-07, + "loss": 0.2024, + "step": 63660 + }, + { + "epoch": 127.38, + "grad_norm": 86.64059448242188, + "learning_rate": 5.506864561309455e-07, + "loss": 0.2036, + "step": 63690 + }, + { + "epoch": 127.44, + "grad_norm": 47.96025848388672, + "learning_rate": 5.478234022972756e-07, + "loss": 0.2284, + "step": 63720 + }, + { + "epoch": 127.5, + "grad_norm": 43.72831344604492, + "learning_rate": 5.449673790581611e-07, + "loss": 0.2129, + "step": 63750 + }, + { + "epoch": 127.56, + "grad_norm": 38.098045349121094, + "learning_rate": 5.421183909236494e-07, + "loss": 0.2136, + "step": 63780 + }, + { + "epoch": 127.62, + "grad_norm": 44.35323715209961, + "learning_rate": 5.392764423926844e-07, + "loss": 0.2376, + "step": 63810 + }, + { + "epoch": 127.68, + "grad_norm": 48.356201171875, + "learning_rate": 5.364415379530891e-07, + "loss": 0.2212, + "step": 63840 + }, + { + "epoch": 127.74, + "grad_norm": 33.646507263183594, + "learning_rate": 5.33613682081564e-07, + "loss": 0.185, + "step": 63870 + }, + { + "epoch": 127.8, + "grad_norm": 52.02384567260742, + "learning_rate": 5.307928792436812e-07, + "loss": 0.2146, + "step": 63900 + }, + { + "epoch": 127.86, + "grad_norm": 38.83525085449219, + "learning_rate": 5.279791338938717e-07, + "loss": 0.2158, + "step": 63930 + }, + { + "epoch": 127.92, + "grad_norm": 32.53917694091797, + "learning_rate": 5.251724504754258e-07, + "loss": 0.202, + "step": 63960 + }, + { + "epoch": 127.98, + "grad_norm": 40.70000457763672, + "learning_rate": 5.22372833420478e-07, + "loss": 0.2116, + "step": 63990 + }, + { + "epoch": 128.0, + "eval_loss": 0.30406132340431213, + "eval_map": 0.809, + "eval_map_50": 0.9479, + "eval_map_75": 0.9041, + "eval_map_chicken": 0.807, + "eval_map_duck": 0.7429, + "eval_map_large": 0.8049, + "eval_map_medium": 0.8146, + "eval_map_plant": 0.8771, + "eval_map_small": 0.3855, + "eval_mar_1": 0.3241, + "eval_mar_10": 0.844, + "eval_mar_100": 0.8471, + "eval_mar_100_chicken": 0.8476, + "eval_mar_100_duck": 0.7918, + "eval_mar_100_plant": 0.9018, + "eval_mar_large": 0.832, + "eval_mar_medium": 0.8535, + "eval_mar_small": 0.4233, + "eval_runtime": 14.4971, + "eval_samples_per_second": 6.898, + "eval_steps_per_second": 0.897, + "step": 64000 + }, + { + "epoch": 128.04, + "grad_norm": 49.22426223754883, + "learning_rate": 5.19580287150005e-07, + "loss": 0.2364, + "step": 64020 + }, + { + "epoch": 128.1, + "grad_norm": 30.956523895263672, + "learning_rate": 5.167948160738206e-07, + "loss": 0.2098, + "step": 64050 + }, + { + "epoch": 128.16, + "grad_norm": 46.6041145324707, + "learning_rate": 5.140164245905633e-07, + "loss": 0.1986, + "step": 64080 + }, + { + "epoch": 128.22, + "grad_norm": 50.73973846435547, + "learning_rate": 5.112451170876903e-07, + "loss": 0.2013, + "step": 64110 + }, + { + "epoch": 128.28, + "grad_norm": 53.47914505004883, + "learning_rate": 5.084808979414779e-07, + "loss": 0.2164, + "step": 64140 + }, + { + "epoch": 128.34, + "grad_norm": 53.10034942626953, + "learning_rate": 5.057237715170032e-07, + "loss": 0.2212, + "step": 64170 + }, + { + "epoch": 128.4, + "grad_norm": 39.50797653198242, + "learning_rate": 5.029737421681446e-07, + "loss": 0.2196, + "step": 64200 + }, + { + "epoch": 128.46, + "grad_norm": 125.84817504882812, + "learning_rate": 5.002308142375762e-07, + "loss": 0.2078, + "step": 64230 + }, + { + "epoch": 128.52, + "grad_norm": 66.1211929321289, + "learning_rate": 4.97494992056754e-07, + "loss": 0.2078, + "step": 64260 + }, + { + "epoch": 128.58, + "grad_norm": 77.76156616210938, + "learning_rate": 4.947662799459152e-07, + "loss": 0.2247, + "step": 64290 + }, + { + "epoch": 128.64, + "grad_norm": 41.21811294555664, + "learning_rate": 4.920446822140673e-07, + "loss": 0.2013, + "step": 64320 + }, + { + "epoch": 128.7, + "grad_norm": 48.723506927490234, + "learning_rate": 4.893302031589864e-07, + "loss": 0.2264, + "step": 64350 + }, + { + "epoch": 128.76, + "grad_norm": 48.01104736328125, + "learning_rate": 4.866228470672041e-07, + "loss": 0.2086, + "step": 64380 + }, + { + "epoch": 128.82, + "grad_norm": 52.20193862915039, + "learning_rate": 4.839226182140072e-07, + "loss": 0.2079, + "step": 64410 + }, + { + "epoch": 128.88, + "grad_norm": 30.023849487304688, + "learning_rate": 4.812295208634238e-07, + "loss": 0.2095, + "step": 64440 + }, + { + "epoch": 128.94, + "grad_norm": 44.33883285522461, + "learning_rate": 4.78543559268222e-07, + "loss": 0.227, + "step": 64470 + }, + { + "epoch": 129.0, + "grad_norm": 45.76134490966797, + "learning_rate": 4.758647376699033e-07, + "loss": 0.2194, + "step": 64500 + }, + { + "epoch": 129.0, + "eval_loss": 0.3098631203174591, + "eval_map": 0.809, + "eval_map_50": 0.9511, + "eval_map_75": 0.9049, + "eval_map_chicken": 0.8081, + "eval_map_duck": 0.7421, + "eval_map_large": 0.8153, + "eval_map_medium": 0.8112, + "eval_map_plant": 0.8767, + "eval_map_small": 0.3936, + "eval_mar_1": 0.3202, + "eval_mar_10": 0.8442, + "eval_mar_100": 0.8471, + "eval_mar_100_chicken": 0.8468, + "eval_mar_100_duck": 0.7928, + "eval_mar_100_plant": 0.9018, + "eval_mar_large": 0.8389, + "eval_mar_medium": 0.8508, + "eval_mar_small": 0.4448, + "eval_runtime": 13.9885, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.929, + "step": 64500 + }, + { + "epoch": 129.06, + "grad_norm": 78.0044174194336, + "learning_rate": 4.731930602986906e-07, + "loss": 0.2242, + "step": 64530 + }, + { + "epoch": 129.12, + "grad_norm": 99.87544250488281, + "learning_rate": 4.705285313735297e-07, + "loss": 0.2007, + "step": 64560 + }, + { + "epoch": 129.18, + "grad_norm": 43.78367233276367, + "learning_rate": 4.678711551020743e-07, + "loss": 0.1949, + "step": 64590 + }, + { + "epoch": 129.24, + "grad_norm": 41.33878707885742, + "learning_rate": 4.6522093568068307e-07, + "loss": 0.1895, + "step": 64620 + }, + { + "epoch": 129.3, + "grad_norm": 112.94983673095703, + "learning_rate": 4.625778772944156e-07, + "loss": 0.1941, + "step": 64650 + }, + { + "epoch": 129.36, + "grad_norm": 39.820884704589844, + "learning_rate": 4.599419841170216e-07, + "loss": 0.1935, + "step": 64680 + }, + { + "epoch": 129.42, + "grad_norm": 52.91804885864258, + "learning_rate": 4.5731326031093645e-07, + "loss": 0.2079, + "step": 64710 + }, + { + "epoch": 129.48, + "grad_norm": 65.22171020507812, + "learning_rate": 4.546917100272735e-07, + "loss": 0.2195, + "step": 64740 + }, + { + "epoch": 129.54, + "grad_norm": 39.4925651550293, + "learning_rate": 4.520773374058179e-07, + "loss": 0.2156, + "step": 64770 + }, + { + "epoch": 129.6, + "grad_norm": 55.07014846801758, + "learning_rate": 4.494701465750217e-07, + "loss": 0.2638, + "step": 64800 + }, + { + "epoch": 129.66, + "grad_norm": 45.676753997802734, + "learning_rate": 4.4687014165199547e-07, + "loss": 0.2218, + "step": 64830 + }, + { + "epoch": 129.72, + "grad_norm": 90.54066467285156, + "learning_rate": 4.4427732674250045e-07, + "loss": 0.1989, + "step": 64860 + }, + { + "epoch": 129.78, + "grad_norm": 101.72560119628906, + "learning_rate": 4.416917059409465e-07, + "loss": 0.2263, + "step": 64890 + }, + { + "epoch": 129.84, + "grad_norm": 36.34654998779297, + "learning_rate": 4.391132833303807e-07, + "loss": 0.2257, + "step": 64920 + }, + { + "epoch": 129.9, + "grad_norm": 62.203880310058594, + "learning_rate": 4.3654206298248625e-07, + "loss": 0.2127, + "step": 64950 + }, + { + "epoch": 129.96, + "grad_norm": 44.24311065673828, + "learning_rate": 4.3397804895756957e-07, + "loss": 0.2259, + "step": 64980 + }, + { + "epoch": 130.0, + "eval_loss": 0.30975058674812317, + "eval_map": 0.8074, + "eval_map_50": 0.951, + "eval_map_75": 0.9035, + "eval_map_chicken": 0.8069, + "eval_map_duck": 0.7433, + "eval_map_large": 0.8103, + "eval_map_medium": 0.808, + "eval_map_plant": 0.872, + "eval_map_small": 0.385, + "eval_mar_1": 0.323, + "eval_mar_10": 0.8424, + "eval_mar_100": 0.8457, + "eval_mar_100_chicken": 0.846, + "eval_mar_100_duck": 0.7928, + "eval_mar_100_plant": 0.8982, + "eval_mar_large": 0.8372, + "eval_mar_medium": 0.8483, + "eval_mar_small": 0.4367, + "eval_runtime": 12.5307, + "eval_samples_per_second": 7.98, + "eval_steps_per_second": 1.037, + "step": 65000 + }, + { + "epoch": 130.02, + "grad_norm": 26.39763069152832, + "learning_rate": 4.31421245304558e-07, + "loss": 0.2253, + "step": 65010 + }, + { + "epoch": 130.08, + "grad_norm": 103.76341247558594, + "learning_rate": 4.2887165606099513e-07, + "loss": 0.1987, + "step": 65040 + }, + { + "epoch": 130.14, + "grad_norm": 63.88904571533203, + "learning_rate": 4.263292852530293e-07, + "loss": 0.1979, + "step": 65070 + }, + { + "epoch": 130.2, + "grad_norm": 83.46735382080078, + "learning_rate": 4.237941368954124e-07, + "loss": 0.2374, + "step": 65100 + }, + { + "epoch": 130.26, + "grad_norm": 55.29493713378906, + "learning_rate": 4.212662149914887e-07, + "loss": 0.2058, + "step": 65130 + }, + { + "epoch": 130.32, + "grad_norm": 43.588871002197266, + "learning_rate": 4.1874552353319107e-07, + "loss": 0.2035, + "step": 65160 + }, + { + "epoch": 130.38, + "grad_norm": 36.1904296875, + "learning_rate": 4.162320665010372e-07, + "loss": 0.1872, + "step": 65190 + }, + { + "epoch": 130.44, + "grad_norm": 39.65531921386719, + "learning_rate": 4.137258478641176e-07, + "loss": 0.1862, + "step": 65220 + }, + { + "epoch": 130.5, + "grad_norm": 88.49795532226562, + "learning_rate": 4.112268715800943e-07, + "loss": 0.2218, + "step": 65250 + }, + { + "epoch": 130.56, + "grad_norm": 31.362567901611328, + "learning_rate": 4.087351415951918e-07, + "loss": 0.2277, + "step": 65280 + }, + { + "epoch": 130.62, + "grad_norm": 25.125764846801758, + "learning_rate": 4.062506618441908e-07, + "loss": 0.1873, + "step": 65310 + }, + { + "epoch": 130.68, + "grad_norm": 32.94488525390625, + "learning_rate": 4.0377343625042587e-07, + "loss": 0.2049, + "step": 65340 + }, + { + "epoch": 130.74, + "grad_norm": 79.44256591796875, + "learning_rate": 4.013034687257727e-07, + "loss": 0.221, + "step": 65370 + }, + { + "epoch": 130.8, + "grad_norm": 57.485897064208984, + "learning_rate": 3.9884076317064813e-07, + "loss": 0.2389, + "step": 65400 + }, + { + "epoch": 130.86, + "grad_norm": 22.714303970336914, + "learning_rate": 3.963853234740006e-07, + "loss": 0.2628, + "step": 65430 + }, + { + "epoch": 130.92, + "grad_norm": 83.66651916503906, + "learning_rate": 3.9393715351330243e-07, + "loss": 0.2366, + "step": 65460 + }, + { + "epoch": 130.98, + "grad_norm": 34.74546813964844, + "learning_rate": 3.9149625715455107e-07, + "loss": 0.1932, + "step": 65490 + }, + { + "epoch": 131.0, + "eval_loss": 0.30847281217575073, + "eval_map": 0.8098, + "eval_map_50": 0.9506, + "eval_map_75": 0.9102, + "eval_map_chicken": 0.8093, + "eval_map_duck": 0.7464, + "eval_map_large": 0.8153, + "eval_map_medium": 0.8121, + "eval_map_plant": 0.8738, + "eval_map_small": 0.373, + "eval_mar_1": 0.3205, + "eval_mar_10": 0.8444, + "eval_mar_100": 0.8473, + "eval_mar_100_chicken": 0.8476, + "eval_mar_100_duck": 0.7948, + "eval_mar_100_plant": 0.8994, + "eval_mar_large": 0.8406, + "eval_mar_medium": 0.8522, + "eval_mar_small": 0.4119, + "eval_runtime": 11.4515, + "eval_samples_per_second": 8.733, + "eval_steps_per_second": 1.135, + "step": 65500 + }, + { + "epoch": 131.04, + "grad_norm": 79.6670913696289, + "learning_rate": 3.890626382522539e-07, + "loss": 0.1955, + "step": 65520 + }, + { + "epoch": 131.1, + "grad_norm": 31.21834945678711, + "learning_rate": 3.866363006494256e-07, + "loss": 0.2079, + "step": 65550 + }, + { + "epoch": 131.16, + "grad_norm": 108.16787719726562, + "learning_rate": 3.8421724817758745e-07, + "loss": 0.1955, + "step": 65580 + }, + { + "epoch": 131.22, + "grad_norm": 51.9688606262207, + "learning_rate": 3.818054846567515e-07, + "loss": 0.2034, + "step": 65610 + }, + { + "epoch": 131.28, + "grad_norm": 32.75484848022461, + "learning_rate": 3.794010138954213e-07, + "loss": 0.231, + "step": 65640 + }, + { + "epoch": 131.34, + "grad_norm": 41.51599884033203, + "learning_rate": 3.770038396905862e-07, + "loss": 0.2235, + "step": 65670 + }, + { + "epoch": 131.4, + "grad_norm": 59.717350006103516, + "learning_rate": 3.7461396582771035e-07, + "loss": 0.2181, + "step": 65700 + }, + { + "epoch": 131.46, + "grad_norm": 99.7775650024414, + "learning_rate": 3.7223139608073e-07, + "loss": 0.2233, + "step": 65730 + }, + { + "epoch": 131.52, + "grad_norm": 50.78632354736328, + "learning_rate": 3.698561342120499e-07, + "loss": 0.2195, + "step": 65760 + }, + { + "epoch": 131.58, + "grad_norm": 73.87471771240234, + "learning_rate": 3.674881839725314e-07, + "loss": 0.2186, + "step": 65790 + }, + { + "epoch": 131.64, + "grad_norm": 63.35624313354492, + "learning_rate": 3.651275491014905e-07, + "loss": 0.2371, + "step": 65820 + }, + { + "epoch": 131.7, + "grad_norm": 111.2394027709961, + "learning_rate": 3.627742333266937e-07, + "loss": 0.2038, + "step": 65850 + }, + { + "epoch": 131.76, + "grad_norm": 46.248443603515625, + "learning_rate": 3.604282403643472e-07, + "loss": 0.1969, + "step": 65880 + }, + { + "epoch": 131.82, + "grad_norm": 109.54549407958984, + "learning_rate": 3.5808957391909315e-07, + "loss": 0.2108, + "step": 65910 + }, + { + "epoch": 131.88, + "grad_norm": 77.7681655883789, + "learning_rate": 3.557582376840063e-07, + "loss": 0.2123, + "step": 65940 + }, + { + "epoch": 131.94, + "grad_norm": 37.92878723144531, + "learning_rate": 3.534342353405834e-07, + "loss": 0.2221, + "step": 65970 + }, + { + "epoch": 132.0, + "grad_norm": 75.26317596435547, + "learning_rate": 3.511175705587433e-07, + "loss": 0.1961, + "step": 66000 + }, + { + "epoch": 132.0, + "eval_loss": 0.30847275257110596, + "eval_map": 0.8095, + "eval_map_50": 0.9505, + "eval_map_75": 0.9027, + "eval_map_chicken": 0.8096, + "eval_map_duck": 0.7432, + "eval_map_large": 0.8094, + "eval_map_medium": 0.8132, + "eval_map_plant": 0.8755, + "eval_map_small": 0.3761, + "eval_mar_1": 0.3203, + "eval_mar_10": 0.8439, + "eval_mar_100": 0.8473, + "eval_mar_100_chicken": 0.8484, + "eval_mar_100_duck": 0.7928, + "eval_mar_100_plant": 0.9006, + "eval_mar_large": 0.8352, + "eval_mar_medium": 0.8526, + "eval_mar_small": 0.4219, + "eval_runtime": 12.145, + "eval_samples_per_second": 8.234, + "eval_steps_per_second": 1.07, + "step": 66000 + }, + { + "epoch": 132.06, + "grad_norm": 29.321550369262695, + "learning_rate": 3.488082469968146e-07, + "loss": 0.2137, + "step": 66030 + }, + { + "epoch": 132.12, + "grad_norm": 54.83131408691406, + "learning_rate": 3.465062683015341e-07, + "loss": 0.2166, + "step": 66060 + }, + { + "epoch": 132.18, + "grad_norm": 50.95697784423828, + "learning_rate": 3.442116381080418e-07, + "loss": 0.2172, + "step": 66090 + }, + { + "epoch": 132.24, + "grad_norm": 44.23823928833008, + "learning_rate": 3.419243600398703e-07, + "loss": 0.2118, + "step": 66120 + }, + { + "epoch": 132.3, + "grad_norm": 40.69063186645508, + "learning_rate": 3.396444377089453e-07, + "loss": 0.2206, + "step": 66150 + }, + { + "epoch": 132.36, + "grad_norm": 49.35795974731445, + "learning_rate": 3.373718747155752e-07, + "loss": 0.2042, + "step": 66180 + }, + { + "epoch": 132.42, + "grad_norm": 21.74803924560547, + "learning_rate": 3.351066746484455e-07, + "loss": 0.2228, + "step": 66210 + }, + { + "epoch": 132.48, + "grad_norm": 58.39532470703125, + "learning_rate": 3.328488410846187e-07, + "loss": 0.2136, + "step": 66240 + }, + { + "epoch": 132.54, + "grad_norm": 41.151676177978516, + "learning_rate": 3.3059837758951995e-07, + "loss": 0.1966, + "step": 66270 + }, + { + "epoch": 132.6, + "grad_norm": 40.82617950439453, + "learning_rate": 3.283552877169399e-07, + "loss": 0.1999, + "step": 66300 + }, + { + "epoch": 132.66, + "grad_norm": 35.09123229980469, + "learning_rate": 3.2611957500902345e-07, + "loss": 0.2105, + "step": 66330 + }, + { + "epoch": 132.72, + "grad_norm": 45.79312515258789, + "learning_rate": 3.2389124299626483e-07, + "loss": 0.2106, + "step": 66360 + }, + { + "epoch": 132.78, + "grad_norm": 71.86212921142578, + "learning_rate": 3.216702951975059e-07, + "loss": 0.2331, + "step": 66390 + }, + { + "epoch": 132.84, + "grad_norm": 65.11103057861328, + "learning_rate": 3.194567351199257e-07, + "loss": 0.2018, + "step": 66420 + }, + { + "epoch": 132.9, + "grad_norm": 55.746700286865234, + "learning_rate": 3.172505662590386e-07, + "loss": 0.2141, + "step": 66450 + }, + { + "epoch": 132.96, + "grad_norm": 50.38323211669922, + "learning_rate": 3.150517920986851e-07, + "loss": 0.2426, + "step": 66480 + }, + { + "epoch": 133.0, + "eval_loss": 0.3091796040534973, + "eval_map": 0.8092, + "eval_map_50": 0.9509, + "eval_map_75": 0.9023, + "eval_map_chicken": 0.811, + "eval_map_duck": 0.7433, + "eval_map_large": 0.8172, + "eval_map_medium": 0.8094, + "eval_map_plant": 0.8734, + "eval_map_small": 0.3815, + "eval_mar_1": 0.3209, + "eval_mar_10": 0.8446, + "eval_mar_100": 0.8473, + "eval_mar_100_chicken": 0.8504, + "eval_mar_100_duck": 0.7928, + "eval_mar_100_plant": 0.8988, + "eval_mar_large": 0.8398, + "eval_mar_medium": 0.8504, + "eval_mar_small": 0.4271, + "eval_runtime": 14.3657, + "eval_samples_per_second": 6.961, + "eval_steps_per_second": 0.905, + "step": 66500 + }, + { + "epoch": 133.02, + "grad_norm": 41.360443115234375, + "learning_rate": 3.128604161110299e-07, + "loss": 0.2051, + "step": 66510 + }, + { + "epoch": 133.08, + "grad_norm": 207.290283203125, + "learning_rate": 3.106764417565561e-07, + "loss": 0.1918, + "step": 66540 + }, + { + "epoch": 133.14, + "grad_norm": 49.47697448730469, + "learning_rate": 3.08499872484056e-07, + "loss": 0.1988, + "step": 66570 + }, + { + "epoch": 133.2, + "grad_norm": 101.23628234863281, + "learning_rate": 3.0633071173062966e-07, + "loss": 0.2284, + "step": 66600 + }, + { + "epoch": 133.26, + "grad_norm": 62.6041374206543, + "learning_rate": 3.0416896292167873e-07, + "loss": 0.2599, + "step": 66630 + }, + { + "epoch": 133.32, + "grad_norm": 59.834163665771484, + "learning_rate": 3.0201462947089865e-07, + "loss": 0.2035, + "step": 66660 + }, + { + "epoch": 133.38, + "grad_norm": 60.794857025146484, + "learning_rate": 2.99867714780277e-07, + "loss": 0.2439, + "step": 66690 + }, + { + "epoch": 133.44, + "grad_norm": 48.02446746826172, + "learning_rate": 2.9772822224008515e-07, + "loss": 0.1922, + "step": 66720 + }, + { + "epoch": 133.5, + "grad_norm": 29.021291732788086, + "learning_rate": 2.9559615522887275e-07, + "loss": 0.1886, + "step": 66750 + }, + { + "epoch": 133.56, + "grad_norm": 84.00584411621094, + "learning_rate": 2.9347151711346556e-07, + "loss": 0.2242, + "step": 66780 + }, + { + "epoch": 133.62, + "grad_norm": 35.25551223754883, + "learning_rate": 2.913543112489564e-07, + "loss": 0.2064, + "step": 66810 + }, + { + "epoch": 133.68, + "grad_norm": 47.507972717285156, + "learning_rate": 2.892445409787037e-07, + "loss": 0.1964, + "step": 66840 + }, + { + "epoch": 133.74, + "grad_norm": 44.02022171020508, + "learning_rate": 2.8714220963432125e-07, + "loss": 0.2238, + "step": 66870 + }, + { + "epoch": 133.8, + "grad_norm": 34.1710319519043, + "learning_rate": 2.850473205356774e-07, + "loss": 0.1996, + "step": 66900 + }, + { + "epoch": 133.86, + "grad_norm": 117.07316589355469, + "learning_rate": 2.829598769908892e-07, + "loss": 0.1996, + "step": 66930 + }, + { + "epoch": 133.92, + "grad_norm": 47.652687072753906, + "learning_rate": 2.8087988229631325e-07, + "loss": 0.2035, + "step": 66960 + }, + { + "epoch": 133.98, + "grad_norm": 82.08881378173828, + "learning_rate": 2.788073397365465e-07, + "loss": 0.2011, + "step": 66990 + }, + { + "epoch": 134.0, + "eval_loss": 0.3087918162345886, + "eval_map": 0.8087, + "eval_map_50": 0.9501, + "eval_map_75": 0.9024, + "eval_map_chicken": 0.8105, + "eval_map_duck": 0.7434, + "eval_map_large": 0.8148, + "eval_map_medium": 0.8093, + "eval_map_plant": 0.8724, + "eval_map_small": 0.3701, + "eval_mar_1": 0.3204, + "eval_mar_10": 0.8441, + "eval_mar_100": 0.847, + "eval_mar_100_chicken": 0.8496, + "eval_mar_100_duck": 0.7928, + "eval_mar_100_plant": 0.8985, + "eval_mar_large": 0.8407, + "eval_mar_medium": 0.8503, + "eval_mar_small": 0.4205, + "eval_runtime": 12.2877, + "eval_samples_per_second": 8.138, + "eval_steps_per_second": 1.058, + "step": 67000 + }, + { + "epoch": 134.04, + "grad_norm": 50.52214050292969, + "learning_rate": 2.76742252584416e-07, + "loss": 0.2204, + "step": 67020 + }, + { + "epoch": 134.1, + "grad_norm": 78.1152114868164, + "learning_rate": 2.746846241009765e-07, + "loss": 0.216, + "step": 67050 + }, + { + "epoch": 134.16, + "grad_norm": 77.4903793334961, + "learning_rate": 2.7263445753550275e-07, + "loss": 0.2467, + "step": 67080 + }, + { + "epoch": 134.22, + "grad_norm": 73.16485595703125, + "learning_rate": 2.7059175612548947e-07, + "loss": 0.216, + "step": 67110 + }, + { + "epoch": 134.28, + "grad_norm": 68.31117248535156, + "learning_rate": 2.685565230966408e-07, + "loss": 0.2242, + "step": 67140 + }, + { + "epoch": 134.34, + "grad_norm": 36.76439666748047, + "learning_rate": 2.665287616628659e-07, + "loss": 0.198, + "step": 67170 + }, + { + "epoch": 134.4, + "grad_norm": 59.230377197265625, + "learning_rate": 2.6450847502627883e-07, + "loss": 0.1889, + "step": 67200 + }, + { + "epoch": 134.46, + "grad_norm": 44.514556884765625, + "learning_rate": 2.6249566637718714e-07, + "loss": 0.2342, + "step": 67230 + }, + { + "epoch": 134.52, + "grad_norm": 50.98478317260742, + "learning_rate": 2.604903388940899e-07, + "loss": 0.2152, + "step": 67260 + }, + { + "epoch": 134.58, + "grad_norm": 51.002811431884766, + "learning_rate": 2.584924957436735e-07, + "loss": 0.2133, + "step": 67290 + }, + { + "epoch": 134.64, + "grad_norm": 54.643314361572266, + "learning_rate": 2.5650214008080544e-07, + "loss": 0.2243, + "step": 67320 + }, + { + "epoch": 134.7, + "grad_norm": 87.97196960449219, + "learning_rate": 2.5451927504852757e-07, + "loss": 0.2116, + "step": 67350 + }, + { + "epoch": 134.76, + "grad_norm": 36.109962463378906, + "learning_rate": 2.525439037780558e-07, + "loss": 0.1959, + "step": 67380 + }, + { + "epoch": 134.82, + "grad_norm": 55.99542236328125, + "learning_rate": 2.505760293887699e-07, + "loss": 0.2036, + "step": 67410 + }, + { + "epoch": 134.88, + "grad_norm": 61.448246002197266, + "learning_rate": 2.486156549882135e-07, + "loss": 0.2016, + "step": 67440 + }, + { + "epoch": 134.94, + "grad_norm": 64.0799789428711, + "learning_rate": 2.4666278367208417e-07, + "loss": 0.2162, + "step": 67470 + }, + { + "epoch": 135.0, + "grad_norm": 79.16766357421875, + "learning_rate": 2.447174185242324e-07, + "loss": 0.2408, + "step": 67500 + }, + { + "epoch": 135.0, + "eval_loss": 0.3119555413722992, + "eval_map": 0.8034, + "eval_map_50": 0.9497, + "eval_map_75": 0.9032, + "eval_map_chicken": 0.8028, + "eval_map_duck": 0.7355, + "eval_map_large": 0.8049, + "eval_map_medium": 0.8042, + "eval_map_plant": 0.8718, + "eval_map_small": 0.3635, + "eval_mar_1": 0.3183, + "eval_mar_10": 0.8406, + "eval_mar_100": 0.8434, + "eval_mar_100_chicken": 0.8437, + "eval_mar_100_duck": 0.7876, + "eval_mar_100_plant": 0.8988, + "eval_mar_large": 0.832, + "eval_mar_medium": 0.848, + "eval_mar_small": 0.409, + "eval_runtime": 13.7574, + "eval_samples_per_second": 7.269, + "eval_steps_per_second": 0.945, + "step": 67500 + }, + { + "epoch": 135.06, + "grad_norm": 65.69921875, + "learning_rate": 2.4277956261665624e-07, + "loss": 0.2107, + "step": 67530 + }, + { + "epoch": 135.12, + "grad_norm": 64.71975708007812, + "learning_rate": 2.40849219009493e-07, + "loss": 0.228, + "step": 67560 + }, + { + "epoch": 135.18, + "grad_norm": 59.862911224365234, + "learning_rate": 2.389263907510209e-07, + "loss": 0.2089, + "step": 67590 + }, + { + "epoch": 135.24, + "grad_norm": 32.341583251953125, + "learning_rate": 2.3701108087764657e-07, + "loss": 0.1987, + "step": 67620 + }, + { + "epoch": 135.3, + "grad_norm": 109.37493133544922, + "learning_rate": 2.351032924139063e-07, + "loss": 0.2295, + "step": 67650 + }, + { + "epoch": 135.36, + "grad_norm": 45.07475662231445, + "learning_rate": 2.3320302837245846e-07, + "loss": 0.1956, + "step": 67680 + }, + { + "epoch": 135.42, + "grad_norm": 54.63572692871094, + "learning_rate": 2.3131029175407883e-07, + "loss": 0.1937, + "step": 67710 + }, + { + "epoch": 135.48, + "grad_norm": 79.57504272460938, + "learning_rate": 2.2942508554765764e-07, + "loss": 0.1992, + "step": 67740 + }, + { + "epoch": 135.54, + "grad_norm": 74.65667724609375, + "learning_rate": 2.27547412730193e-07, + "loss": 0.2102, + "step": 67770 + }, + { + "epoch": 135.6, + "grad_norm": 22.859420776367188, + "learning_rate": 2.2567727626678527e-07, + "loss": 0.2013, + "step": 67800 + }, + { + "epoch": 135.66, + "grad_norm": 52.90283966064453, + "learning_rate": 2.2381467911063658e-07, + "loss": 0.2175, + "step": 67830 + }, + { + "epoch": 135.72, + "grad_norm": 93.8233871459961, + "learning_rate": 2.2195962420304083e-07, + "loss": 0.2423, + "step": 67860 + }, + { + "epoch": 135.78, + "grad_norm": 54.931026458740234, + "learning_rate": 2.2011211447338477e-07, + "loss": 0.1994, + "step": 67890 + }, + { + "epoch": 135.84, + "grad_norm": 80.65608978271484, + "learning_rate": 2.1827215283913683e-07, + "loss": 0.2231, + "step": 67920 + }, + { + "epoch": 135.9, + "grad_norm": 249.62669372558594, + "learning_rate": 2.1643974220584729e-07, + "loss": 0.1899, + "step": 67950 + }, + { + "epoch": 135.96, + "grad_norm": 103.24887084960938, + "learning_rate": 2.1461488546714425e-07, + "loss": 0.2169, + "step": 67980 + }, + { + "epoch": 136.0, + "eval_loss": 0.3096560537815094, + "eval_map": 0.8066, + "eval_map_50": 0.9498, + "eval_map_75": 0.9037, + "eval_map_chicken": 0.8083, + "eval_map_duck": 0.7384, + "eval_map_large": 0.8147, + "eval_map_medium": 0.8068, + "eval_map_plant": 0.8732, + "eval_map_small": 0.3689, + "eval_mar_1": 0.3194, + "eval_mar_10": 0.8436, + "eval_mar_100": 0.8464, + "eval_mar_100_chicken": 0.8476, + "eval_mar_100_duck": 0.7918, + "eval_mar_100_plant": 0.8997, + "eval_mar_large": 0.8411, + "eval_mar_medium": 0.8503, + "eval_mar_small": 0.4219, + "eval_runtime": 11.2635, + "eval_samples_per_second": 8.878, + "eval_steps_per_second": 1.154, + "step": 68000 + }, + { + "epoch": 136.02, + "grad_norm": 66.1946792602539, + "learning_rate": 2.127975855047243e-07, + "loss": 0.2674, + "step": 68010 + }, + { + "epoch": 136.08, + "grad_norm": 45.892452239990234, + "learning_rate": 2.1098784518835292e-07, + "loss": 0.2066, + "step": 68040 + }, + { + "epoch": 136.14, + "grad_norm": 48.29930114746094, + "learning_rate": 2.0918566737585688e-07, + "loss": 0.2193, + "step": 68070 + }, + { + "epoch": 136.2, + "grad_norm": 48.946964263916016, + "learning_rate": 2.0739105491312028e-07, + "loss": 0.2229, + "step": 68100 + }, + { + "epoch": 136.26, + "grad_norm": 52.66734313964844, + "learning_rate": 2.056040106340823e-07, + "loss": 0.2114, + "step": 68130 + }, + { + "epoch": 136.32, + "grad_norm": 61.51396942138672, + "learning_rate": 2.0382453736072838e-07, + "loss": 0.2044, + "step": 68160 + }, + { + "epoch": 136.38, + "grad_norm": 64.57730102539062, + "learning_rate": 2.0205263790309125e-07, + "loss": 0.195, + "step": 68190 + }, + { + "epoch": 136.44, + "grad_norm": 45.15483856201172, + "learning_rate": 2.0028831505924162e-07, + "loss": 0.2264, + "step": 68220 + }, + { + "epoch": 136.5, + "grad_norm": 42.67230987548828, + "learning_rate": 1.9853157161528468e-07, + "loss": 0.2093, + "step": 68250 + }, + { + "epoch": 136.56, + "grad_norm": 17.29395294189453, + "learning_rate": 1.967824103453597e-07, + "loss": 0.2212, + "step": 68280 + }, + { + "epoch": 136.62, + "grad_norm": 55.87034225463867, + "learning_rate": 1.9504083401162999e-07, + "loss": 0.2144, + "step": 68310 + }, + { + "epoch": 136.68, + "grad_norm": 57.51459884643555, + "learning_rate": 1.9330684536428335e-07, + "loss": 0.1862, + "step": 68340 + }, + { + "epoch": 136.74, + "grad_norm": 70.5501937866211, + "learning_rate": 1.9158044714152447e-07, + "loss": 0.2088, + "step": 68370 + }, + { + "epoch": 136.8, + "grad_norm": 26.095773696899414, + "learning_rate": 1.8986164206957037e-07, + "loss": 0.2091, + "step": 68400 + }, + { + "epoch": 136.86, + "grad_norm": 28.720632553100586, + "learning_rate": 1.8815043286265044e-07, + "loss": 0.194, + "step": 68430 + }, + { + "epoch": 136.92, + "grad_norm": 46.130802154541016, + "learning_rate": 1.8644682222299703e-07, + "loss": 0.191, + "step": 68460 + }, + { + "epoch": 136.98, + "grad_norm": 32.829002380371094, + "learning_rate": 1.8475081284084428e-07, + "loss": 0.2292, + "step": 68490 + }, + { + "epoch": 137.0, + "eval_loss": 0.30863094329833984, + "eval_map": 0.8046, + "eval_map_50": 0.9498, + "eval_map_75": 0.8996, + "eval_map_chicken": 0.8058, + "eval_map_duck": 0.7339, + "eval_map_large": 0.8031, + "eval_map_medium": 0.8068, + "eval_map_plant": 0.874, + "eval_map_small": 0.3796, + "eval_mar_1": 0.3184, + "eval_mar_10": 0.8407, + "eval_mar_100": 0.844, + "eval_mar_100_chicken": 0.8456, + "eval_mar_100_duck": 0.7866, + "eval_mar_100_plant": 0.8997, + "eval_mar_large": 0.8316, + "eval_mar_medium": 0.849, + "eval_mar_small": 0.4314, + "eval_runtime": 12.1831, + "eval_samples_per_second": 8.208, + "eval_steps_per_second": 1.067, + "step": 68500 + }, + { + "epoch": 137.04, + "grad_norm": 31.091623306274414, + "learning_rate": 1.8306240739442094e-07, + "loss": 0.2179, + "step": 68520 + }, + { + "epoch": 137.1, + "grad_norm": 40.29054641723633, + "learning_rate": 1.8138160854995145e-07, + "loss": 0.2034, + "step": 68550 + }, + { + "epoch": 137.16, + "grad_norm": 35.621116638183594, + "learning_rate": 1.7970841896164658e-07, + "loss": 0.2096, + "step": 68580 + }, + { + "epoch": 137.22, + "grad_norm": 106.7773666381836, + "learning_rate": 1.7804284127169946e-07, + "loss": 0.1982, + "step": 68610 + }, + { + "epoch": 137.28, + "grad_norm": 39.857696533203125, + "learning_rate": 1.7638487811028616e-07, + "loss": 0.1941, + "step": 68640 + }, + { + "epoch": 137.34, + "grad_norm": 57.87089157104492, + "learning_rate": 1.7473453209555625e-07, + "loss": 0.2281, + "step": 68670 + }, + { + "epoch": 137.4, + "grad_norm": 76.89950561523438, + "learning_rate": 1.7309180583363062e-07, + "loss": 0.2403, + "step": 68700 + }, + { + "epoch": 137.46, + "grad_norm": 58.04665756225586, + "learning_rate": 1.7145670191859977e-07, + "loss": 0.2296, + "step": 68730 + }, + { + "epoch": 137.52, + "grad_norm": 28.78618049621582, + "learning_rate": 1.6982922293251548e-07, + "loss": 0.2145, + "step": 68760 + }, + { + "epoch": 137.58, + "grad_norm": 45.55547332763672, + "learning_rate": 1.6820937144538807e-07, + "loss": 0.1947, + "step": 68790 + }, + { + "epoch": 137.64, + "grad_norm": 42.586551666259766, + "learning_rate": 1.6659715001518583e-07, + "loss": 0.2322, + "step": 68820 + }, + { + "epoch": 137.7, + "grad_norm": 25.68801498413086, + "learning_rate": 1.6499256118782503e-07, + "loss": 0.1923, + "step": 68850 + }, + { + "epoch": 137.76, + "grad_norm": 54.242618560791016, + "learning_rate": 1.6339560749717154e-07, + "loss": 0.2217, + "step": 68880 + }, + { + "epoch": 137.82, + "grad_norm": 59.135337829589844, + "learning_rate": 1.6180629146503256e-07, + "loss": 0.2069, + "step": 68910 + }, + { + "epoch": 137.88, + "grad_norm": 37.869014739990234, + "learning_rate": 1.6022461560115498e-07, + "loss": 0.207, + "step": 68940 + }, + { + "epoch": 137.94, + "grad_norm": 48.881080627441406, + "learning_rate": 1.586505824032214e-07, + "loss": 0.1919, + "step": 68970 + }, + { + "epoch": 138.0, + "grad_norm": 206.4425048828125, + "learning_rate": 1.5708419435684463e-07, + "loss": 0.2231, + "step": 69000 + }, + { + "epoch": 138.0, + "eval_loss": 0.30831366777420044, + "eval_map": 0.8073, + "eval_map_50": 0.9477, + "eval_map_75": 0.905, + "eval_map_chicken": 0.8075, + "eval_map_duck": 0.74, + "eval_map_large": 0.8085, + "eval_map_medium": 0.8098, + "eval_map_plant": 0.8742, + "eval_map_small": 0.3733, + "eval_mar_1": 0.3192, + "eval_mar_10": 0.8433, + "eval_mar_100": 0.8461, + "eval_mar_100_chicken": 0.848, + "eval_mar_100_duck": 0.7897, + "eval_mar_100_plant": 0.9006, + "eval_mar_large": 0.8356, + "eval_mar_medium": 0.8516, + "eval_mar_small": 0.4119, + "eval_runtime": 11.0571, + "eval_samples_per_second": 9.044, + "eval_steps_per_second": 1.176, + "step": 69000 + }, + { + "epoch": 138.06, + "grad_norm": 47.209869384765625, + "learning_rate": 1.555254539355655e-07, + "loss": 0.2146, + "step": 69030 + }, + { + "epoch": 138.12, + "grad_norm": 53.7562255859375, + "learning_rate": 1.5397436360084784e-07, + "loss": 0.2006, + "step": 69060 + }, + { + "epoch": 138.18, + "grad_norm": 82.0704345703125, + "learning_rate": 1.5243092580207507e-07, + "loss": 0.2285, + "step": 69090 + }, + { + "epoch": 138.24, + "grad_norm": 31.887880325317383, + "learning_rate": 1.5089514297654594e-07, + "loss": 0.2017, + "step": 69120 + }, + { + "epoch": 138.3, + "grad_norm": 94.9820327758789, + "learning_rate": 1.4936701754947104e-07, + "loss": 0.2195, + "step": 69150 + }, + { + "epoch": 138.36, + "grad_norm": 75.08333587646484, + "learning_rate": 1.4784655193396947e-07, + "loss": 0.2127, + "step": 69180 + }, + { + "epoch": 138.42, + "grad_norm": 40.21970748901367, + "learning_rate": 1.463337485310634e-07, + "loss": 0.1941, + "step": 69210 + }, + { + "epoch": 138.48, + "grad_norm": 50.572811126708984, + "learning_rate": 1.448286097296764e-07, + "loss": 0.2082, + "step": 69240 + }, + { + "epoch": 138.54, + "grad_norm": 503.557861328125, + "learning_rate": 1.4333113790662822e-07, + "loss": 0.2164, + "step": 69270 + }, + { + "epoch": 138.6, + "grad_norm": 46.357208251953125, + "learning_rate": 1.4184133542663014e-07, + "loss": 0.1926, + "step": 69300 + }, + { + "epoch": 138.66, + "grad_norm": 77.93207550048828, + "learning_rate": 1.4035920464228525e-07, + "loss": 0.2165, + "step": 69330 + }, + { + "epoch": 138.72, + "grad_norm": 32.061344146728516, + "learning_rate": 1.388847478940797e-07, + "loss": 0.1961, + "step": 69360 + }, + { + "epoch": 138.78, + "grad_norm": 45.14935302734375, + "learning_rate": 1.3741796751038095e-07, + "loss": 0.2195, + "step": 69390 + }, + { + "epoch": 138.84, + "grad_norm": 18.540773391723633, + "learning_rate": 1.3595886580743677e-07, + "loss": 0.2463, + "step": 69420 + }, + { + "epoch": 138.9, + "grad_norm": 61.1831169128418, + "learning_rate": 1.3450744508936687e-07, + "loss": 0.2171, + "step": 69450 + }, + { + "epoch": 138.96, + "grad_norm": 48.19976806640625, + "learning_rate": 1.330637076481639e-07, + "loss": 0.1987, + "step": 69480 + }, + { + "epoch": 139.0, + "eval_loss": 0.3095991015434265, + "eval_map": 0.8068, + "eval_map_50": 0.9492, + "eval_map_75": 0.8997, + "eval_map_chicken": 0.8089, + "eval_map_duck": 0.7366, + "eval_map_large": 0.8041, + "eval_map_medium": 0.8094, + "eval_map_plant": 0.8747, + "eval_map_small": 0.3756, + "eval_mar_1": 0.319, + "eval_mar_10": 0.8427, + "eval_mar_100": 0.8454, + "eval_mar_100_chicken": 0.8488, + "eval_mar_100_duck": 0.7876, + "eval_mar_100_plant": 0.8997, + "eval_mar_large": 0.8318, + "eval_mar_medium": 0.851, + "eval_mar_small": 0.4252, + "eval_runtime": 12.2386, + "eval_samples_per_second": 8.171, + "eval_steps_per_second": 1.062, + "step": 69500 + }, + { + "epoch": 139.02, + "grad_norm": 33.00763702392578, + "learning_rate": 1.3162765576368587e-07, + "loss": 0.2106, + "step": 69510 + }, + { + "epoch": 139.08, + "grad_norm": 71.31500244140625, + "learning_rate": 1.3019929170365376e-07, + "loss": 0.209, + "step": 69540 + }, + { + "epoch": 139.14, + "grad_norm": 61.765052795410156, + "learning_rate": 1.287786177236511e-07, + "loss": 0.2267, + "step": 69570 + }, + { + "epoch": 139.2, + "grad_norm": 25.743423461914062, + "learning_rate": 1.2736563606711384e-07, + "loss": 0.2014, + "step": 69600 + }, + { + "epoch": 139.26, + "grad_norm": 48.05965805053711, + "learning_rate": 1.259603489653355e-07, + "loss": 0.255, + "step": 69630 + }, + { + "epoch": 139.32, + "grad_norm": 52.13318634033203, + "learning_rate": 1.2456275863745426e-07, + "loss": 0.1985, + "step": 69660 + }, + { + "epoch": 139.38, + "grad_norm": 67.66744995117188, + "learning_rate": 1.2317286729045586e-07, + "loss": 0.2397, + "step": 69690 + }, + { + "epoch": 139.44, + "grad_norm": 135.82752990722656, + "learning_rate": 1.2179067711917015e-07, + "loss": 0.1931, + "step": 69720 + }, + { + "epoch": 139.5, + "grad_norm": 28.21087074279785, + "learning_rate": 1.2041619030626283e-07, + "loss": 0.1983, + "step": 69750 + }, + { + "epoch": 139.56, + "grad_norm": 49.653682708740234, + "learning_rate": 1.1904940902223661e-07, + "loss": 0.2175, + "step": 69780 + }, + { + "epoch": 139.62, + "grad_norm": 158.8368682861328, + "learning_rate": 1.1769033542542552e-07, + "loss": 0.2223, + "step": 69810 + }, + { + "epoch": 139.68, + "grad_norm": 31.01651382446289, + "learning_rate": 1.1633897166199227e-07, + "loss": 0.192, + "step": 69840 + }, + { + "epoch": 139.74, + "grad_norm": 54.803897857666016, + "learning_rate": 1.1499531986592482e-07, + "loss": 0.1917, + "step": 69870 + }, + { + "epoch": 139.8, + "grad_norm": 41.02241897583008, + "learning_rate": 1.136593821590326e-07, + "loss": 0.2329, + "step": 69900 + }, + { + "epoch": 139.86, + "grad_norm": 39.60552978515625, + "learning_rate": 1.1233116065094363e-07, + "loss": 0.2045, + "step": 69930 + }, + { + "epoch": 139.92, + "grad_norm": 24.14561653137207, + "learning_rate": 1.1101065743910122e-07, + "loss": 0.2088, + "step": 69960 + }, + { + "epoch": 139.98, + "grad_norm": 36.25834655761719, + "learning_rate": 1.0969787460876013e-07, + "loss": 0.1917, + "step": 69990 + }, + { + "epoch": 140.0, + "eval_loss": 0.3100622296333313, + "eval_map": 0.8051, + "eval_map_50": 0.9476, + "eval_map_75": 0.9028, + "eval_map_chicken": 0.8057, + "eval_map_duck": 0.735, + "eval_map_large": 0.812, + "eval_map_medium": 0.8076, + "eval_map_plant": 0.8746, + "eval_map_small": 0.3669, + "eval_mar_1": 0.3184, + "eval_mar_10": 0.8411, + "eval_mar_100": 0.8441, + "eval_mar_100_chicken": 0.8456, + "eval_mar_100_duck": 0.7866, + "eval_mar_100_plant": 0.9, + "eval_mar_large": 0.8377, + "eval_mar_medium": 0.8494, + "eval_mar_small": 0.4038, + "eval_runtime": 13.1165, + "eval_samples_per_second": 7.624, + "eval_steps_per_second": 0.991, + "step": 70000 + }, + { + "epoch": 140.04, + "grad_norm": 77.3727035522461, + "learning_rate": 1.0839281423298375e-07, + "loss": 0.2235, + "step": 70020 + }, + { + "epoch": 140.1, + "grad_norm": 148.96051025390625, + "learning_rate": 1.0709547837263967e-07, + "loss": 0.1946, + "step": 70050 + }, + { + "epoch": 140.16, + "grad_norm": 37.42975997924805, + "learning_rate": 1.0580586907639912e-07, + "loss": 0.206, + "step": 70080 + }, + { + "epoch": 140.22, + "grad_norm": 39.80850601196289, + "learning_rate": 1.0452398838073141e-07, + "loss": 0.2201, + "step": 70110 + }, + { + "epoch": 140.28, + "grad_norm": 44.86832809448242, + "learning_rate": 1.032498383099001e-07, + "loss": 0.2143, + "step": 70140 + }, + { + "epoch": 140.34, + "grad_norm": 88.26593017578125, + "learning_rate": 1.0198342087596292e-07, + "loss": 0.2273, + "step": 70170 + }, + { + "epoch": 140.4, + "grad_norm": 36.841163635253906, + "learning_rate": 1.007247380787657e-07, + "loss": 0.221, + "step": 70200 + }, + { + "epoch": 140.46, + "grad_norm": 76.04727172851562, + "learning_rate": 9.947379190594076e-08, + "loss": 0.2166, + "step": 70230 + }, + { + "epoch": 140.52, + "grad_norm": 111.13554382324219, + "learning_rate": 9.823058433290178e-08, + "loss": 0.2146, + "step": 70260 + }, + { + "epoch": 140.58, + "grad_norm": 75.74388885498047, + "learning_rate": 9.699511732284395e-08, + "loss": 0.2137, + "step": 70290 + }, + { + "epoch": 140.64, + "grad_norm": 57.81622314453125, + "learning_rate": 9.576739282673886e-08, + "loss": 0.1948, + "step": 70320 + }, + { + "epoch": 140.7, + "grad_norm": 125.7251205444336, + "learning_rate": 9.454741278333013e-08, + "loss": 0.2356, + "step": 70350 + }, + { + "epoch": 140.76, + "grad_norm": 178.1024932861328, + "learning_rate": 9.333517911913281e-08, + "loss": 0.2086, + "step": 70380 + }, + { + "epoch": 140.82, + "grad_norm": 51.754905700683594, + "learning_rate": 9.213069374842953e-08, + "loss": 0.199, + "step": 70410 + }, + { + "epoch": 140.88, + "grad_norm": 65.02310180664062, + "learning_rate": 9.093395857326714e-08, + "loss": 0.213, + "step": 70440 + }, + { + "epoch": 140.94, + "grad_norm": 59.75958251953125, + "learning_rate": 8.974497548345396e-08, + "loss": 0.2072, + "step": 70470 + }, + { + "epoch": 141.0, + "grad_norm": 60.88467788696289, + "learning_rate": 8.856374635655696e-08, + "loss": 0.2281, + "step": 70500 + }, + { + "epoch": 141.0, + "eval_loss": 0.3109195828437805, + "eval_map": 0.8049, + "eval_map_50": 0.9476, + "eval_map_75": 0.9091, + "eval_map_chicken": 0.8048, + "eval_map_duck": 0.7368, + "eval_map_large": 0.8106, + "eval_map_medium": 0.8076, + "eval_map_plant": 0.8731, + "eval_map_small": 0.3608, + "eval_mar_1": 0.3194, + "eval_mar_10": 0.8415, + "eval_mar_100": 0.8444, + "eval_mar_100_chicken": 0.846, + "eval_mar_100_duck": 0.7887, + "eval_mar_100_plant": 0.8985, + "eval_mar_large": 0.8359, + "eval_mar_medium": 0.8499, + "eval_mar_small": 0.4038, + "eval_runtime": 12.3332, + "eval_samples_per_second": 8.108, + "eval_steps_per_second": 1.054, + "step": 70500 + }, + { + "epoch": 141.06, + "grad_norm": 60.512489318847656, + "learning_rate": 8.739027305789682e-08, + "loss": 0.2013, + "step": 70530 + }, + { + "epoch": 141.12, + "grad_norm": 54.249263763427734, + "learning_rate": 8.622455744054958e-08, + "loss": 0.2301, + "step": 70560 + }, + { + "epoch": 141.18, + "grad_norm": 494.5781555175781, + "learning_rate": 8.506660134533828e-08, + "loss": 0.1916, + "step": 70590 + }, + { + "epoch": 141.24, + "grad_norm": 42.008140563964844, + "learning_rate": 8.391640660083411e-08, + "loss": 0.2372, + "step": 70620 + }, + { + "epoch": 141.3, + "grad_norm": 107.05094146728516, + "learning_rate": 8.277397502335194e-08, + "loss": 0.2048, + "step": 70650 + }, + { + "epoch": 141.36, + "grad_norm": 33.05570602416992, + "learning_rate": 8.163930841694589e-08, + "loss": 0.2199, + "step": 70680 + }, + { + "epoch": 141.42, + "grad_norm": 60.28254699707031, + "learning_rate": 8.051240857341102e-08, + "loss": 0.2432, + "step": 70710 + }, + { + "epoch": 141.48, + "grad_norm": 70.86982727050781, + "learning_rate": 7.939327727227441e-08, + "loss": 0.22, + "step": 70740 + }, + { + "epoch": 141.54, + "grad_norm": 86.37015533447266, + "learning_rate": 7.828191628079851e-08, + "loss": 0.1896, + "step": 70770 + }, + { + "epoch": 141.6, + "grad_norm": 35.2357177734375, + "learning_rate": 7.717832735397335e-08, + "loss": 0.2088, + "step": 70800 + }, + { + "epoch": 141.66, + "grad_norm": 1039.4613037109375, + "learning_rate": 7.608251223451601e-08, + "loss": 0.2039, + "step": 70830 + }, + { + "epoch": 141.72, + "grad_norm": 24.711950302124023, + "learning_rate": 7.499447265286952e-08, + "loss": 0.1896, + "step": 70860 + }, + { + "epoch": 141.78, + "grad_norm": 30.11922264099121, + "learning_rate": 7.39142103271956e-08, + "loss": 0.2105, + "step": 70890 + }, + { + "epoch": 141.84, + "grad_norm": 37.19144058227539, + "learning_rate": 7.284172696337688e-08, + "loss": 0.2011, + "step": 70920 + }, + { + "epoch": 141.9, + "grad_norm": 99.82898712158203, + "learning_rate": 7.177702425500977e-08, + "loss": 0.2473, + "step": 70950 + }, + { + "epoch": 141.96, + "grad_norm": 33.77702713012695, + "learning_rate": 7.072010388340656e-08, + "loss": 0.2052, + "step": 70980 + }, + { + "epoch": 142.0, + "eval_loss": 0.308868944644928, + "eval_map": 0.8064, + "eval_map_50": 0.9478, + "eval_map_75": 0.9063, + "eval_map_chicken": 0.8072, + "eval_map_duck": 0.7376, + "eval_map_large": 0.8141, + "eval_map_medium": 0.8096, + "eval_map_plant": 0.8744, + "eval_map_small": 0.3715, + "eval_mar_1": 0.3192, + "eval_mar_10": 0.8427, + "eval_mar_100": 0.8454, + "eval_mar_100_chicken": 0.8468, + "eval_mar_100_duck": 0.7897, + "eval_mar_100_plant": 0.8997, + "eval_mar_large": 0.8407, + "eval_mar_medium": 0.8512, + "eval_mar_small": 0.4052, + "eval_runtime": 12.1023, + "eval_samples_per_second": 8.263, + "eval_steps_per_second": 1.074, + "step": 71000 + }, + { + "epoch": 142.02, + "grad_norm": 31.064472198486328, + "learning_rate": 6.967096751758773e-08, + "loss": 0.2284, + "step": 71010 + }, + { + "epoch": 142.08, + "grad_norm": 49.0639533996582, + "learning_rate": 6.862961681428304e-08, + "loss": 0.2014, + "step": 71040 + }, + { + "epoch": 142.14, + "grad_norm": 48.217796325683594, + "learning_rate": 6.759605341792819e-08, + "loss": 0.2459, + "step": 71070 + }, + { + "epoch": 142.2, + "grad_norm": 74.9462890625, + "learning_rate": 6.657027896065982e-08, + "loss": 0.2196, + "step": 71100 + }, + { + "epoch": 142.26, + "grad_norm": 31.848787307739258, + "learning_rate": 6.555229506231608e-08, + "loss": 0.2387, + "step": 71130 + }, + { + "epoch": 142.32, + "grad_norm": 45.96524429321289, + "learning_rate": 6.454210333043275e-08, + "loss": 0.1939, + "step": 71160 + }, + { + "epoch": 142.38, + "grad_norm": 51.15266036987305, + "learning_rate": 6.353970536024045e-08, + "loss": 0.1843, + "step": 71190 + }, + { + "epoch": 142.44, + "grad_norm": 57.012203216552734, + "learning_rate": 6.254510273466186e-08, + "loss": 0.2211, + "step": 71220 + }, + { + "epoch": 142.5, + "grad_norm": 40.6700439453125, + "learning_rate": 6.15582970243117e-08, + "loss": 0.1903, + "step": 71250 + }, + { + "epoch": 142.56, + "grad_norm": 36.734195709228516, + "learning_rate": 6.057928978748906e-08, + "loss": 0.2031, + "step": 71280 + }, + { + "epoch": 142.62, + "grad_norm": 122.18500518798828, + "learning_rate": 5.960808257018113e-08, + "loss": 0.1927, + "step": 71310 + }, + { + "epoch": 142.68, + "grad_norm": 37.88673400878906, + "learning_rate": 5.864467690605613e-08, + "loss": 0.2047, + "step": 71340 + }, + { + "epoch": 142.74, + "grad_norm": 69.3516616821289, + "learning_rate": 5.76890743164632e-08, + "loss": 0.2085, + "step": 71370 + }, + { + "epoch": 142.8, + "grad_norm": 98.10469818115234, + "learning_rate": 5.674127631043025e-08, + "loss": 0.2504, + "step": 71400 + }, + { + "epoch": 142.86, + "grad_norm": 47.47639083862305, + "learning_rate": 5.580128438465837e-08, + "loss": 0.198, + "step": 71430 + }, + { + "epoch": 142.92, + "grad_norm": 44.47444152832031, + "learning_rate": 5.4869100023523526e-08, + "loss": 0.2059, + "step": 71460 + }, + { + "epoch": 142.98, + "grad_norm": 75.74762725830078, + "learning_rate": 5.394472469907208e-08, + "loss": 0.215, + "step": 71490 + }, + { + "epoch": 143.0, + "eval_loss": 0.3087313175201416, + "eval_map": 0.8073, + "eval_map_50": 0.9476, + "eval_map_75": 0.9062, + "eval_map_chicken": 0.8082, + "eval_map_duck": 0.7384, + "eval_map_large": 0.815, + "eval_map_medium": 0.8087, + "eval_map_plant": 0.8752, + "eval_map_small": 0.3781, + "eval_mar_1": 0.3193, + "eval_mar_10": 0.8434, + "eval_mar_100": 0.8463, + "eval_mar_100_chicken": 0.8484, + "eval_mar_100_duck": 0.7907, + "eval_mar_100_plant": 0.8997, + "eval_mar_large": 0.8418, + "eval_mar_medium": 0.8508, + "eval_mar_small": 0.42, + "eval_runtime": 11.6975, + "eval_samples_per_second": 8.549, + "eval_steps_per_second": 1.111, + "step": 71500 + }, + { + "epoch": 143.04, + "grad_norm": 46.21229553222656, + "learning_rate": 5.302815987101917e-08, + "loss": 0.2404, + "step": 71520 + }, + { + "epoch": 143.1, + "grad_norm": 61.36137008666992, + "learning_rate": 5.2119406986745336e-08, + "loss": 0.2064, + "step": 71550 + }, + { + "epoch": 143.16, + "grad_norm": 51.88548278808594, + "learning_rate": 5.121846748129544e-08, + "loss": 0.2488, + "step": 71580 + }, + { + "epoch": 143.22, + "grad_norm": 72.71974182128906, + "learning_rate": 5.032534277737644e-08, + "loss": 0.2177, + "step": 71610 + }, + { + "epoch": 143.28, + "grad_norm": 66.9245834350586, + "learning_rate": 4.944003428535349e-08, + "loss": 0.252, + "step": 71640 + }, + { + "epoch": 143.34, + "grad_norm": 150.7808074951172, + "learning_rate": 4.856254340325051e-08, + "loss": 0.2234, + "step": 71670 + }, + { + "epoch": 143.4, + "grad_norm": 41.08302688598633, + "learning_rate": 4.769287151674407e-08, + "loss": 0.2017, + "step": 71700 + }, + { + "epoch": 143.46, + "grad_norm": 60.249473571777344, + "learning_rate": 4.683101999916562e-08, + "loss": 0.1932, + "step": 71730 + }, + { + "epoch": 143.52, + "grad_norm": 40.887020111083984, + "learning_rate": 4.597699021149649e-08, + "loss": 0.2198, + "step": 71760 + }, + { + "epoch": 143.58, + "grad_norm": 86.38909149169922, + "learning_rate": 4.5130783502365106e-08, + "loss": 0.2149, + "step": 71790 + }, + { + "epoch": 143.64, + "grad_norm": 70.79454040527344, + "learning_rate": 4.429240120804923e-08, + "loss": 0.1897, + "step": 71820 + }, + { + "epoch": 143.7, + "grad_norm": 58.97731018066406, + "learning_rate": 4.346184465246761e-08, + "loss": 0.2016, + "step": 71850 + }, + { + "epoch": 143.76, + "grad_norm": 30.449399948120117, + "learning_rate": 4.263911514718222e-08, + "loss": 0.1932, + "step": 71880 + }, + { + "epoch": 143.82, + "grad_norm": 59.97642135620117, + "learning_rate": 4.1824213991396024e-08, + "loss": 0.2003, + "step": 71910 + }, + { + "epoch": 143.88, + "grad_norm": 50.71462631225586, + "learning_rate": 4.10171424719491e-08, + "loss": 0.2051, + "step": 71940 + }, + { + "epoch": 143.94, + "grad_norm": 117.72753143310547, + "learning_rate": 4.0217901863317534e-08, + "loss": 0.1994, + "step": 71970 + }, + { + "epoch": 144.0, + "grad_norm": 62.03313064575195, + "learning_rate": 3.9426493427611177e-08, + "loss": 0.2031, + "step": 72000 + }, + { + "epoch": 144.0, + "eval_loss": 0.30865278840065, + "eval_map": 0.8065, + "eval_map_50": 0.9475, + "eval_map_75": 0.9062, + "eval_map_chicken": 0.8062, + "eval_map_duck": 0.7383, + "eval_map_large": 0.8107, + "eval_map_medium": 0.8091, + "eval_map_plant": 0.8752, + "eval_map_small": 0.3766, + "eval_mar_1": 0.3191, + "eval_mar_10": 0.843, + "eval_mar_100": 0.8458, + "eval_mar_100_chicken": 0.8476, + "eval_mar_100_duck": 0.7897, + "eval_mar_100_plant": 0.9, + "eval_mar_large": 0.8384, + "eval_mar_medium": 0.8511, + "eval_mar_small": 0.42, + "eval_runtime": 13.6489, + "eval_samples_per_second": 7.327, + "eval_steps_per_second": 0.952, + "step": 72000 + }, + { + "epoch": 144.06, + "grad_norm": 35.4929313659668, + "learning_rate": 3.864291841457146e-08, + "loss": 0.1833, + "step": 72030 + }, + { + "epoch": 144.12, + "grad_norm": 87.02384185791016, + "learning_rate": 3.786717806157136e-08, + "loss": 0.2104, + "step": 72060 + }, + { + "epoch": 144.18, + "grad_norm": 178.31637573242188, + "learning_rate": 3.7099273593609316e-08, + "loss": 0.2086, + "step": 72090 + }, + { + "epoch": 144.24, + "grad_norm": 53.136329650878906, + "learning_rate": 3.633920622331311e-08, + "loss": 0.2387, + "step": 72120 + }, + { + "epoch": 144.3, + "grad_norm": 78.11143493652344, + "learning_rate": 3.558697715093207e-08, + "loss": 0.2213, + "step": 72150 + }, + { + "epoch": 144.36, + "grad_norm": 46.717445373535156, + "learning_rate": 3.4842587564337674e-08, + "loss": 0.2149, + "step": 72180 + }, + { + "epoch": 144.42, + "grad_norm": 30.410228729248047, + "learning_rate": 3.410603863902406e-08, + "loss": 0.2304, + "step": 72210 + }, + { + "epoch": 144.48, + "grad_norm": 99.21456909179688, + "learning_rate": 3.337733153810141e-08, + "loss": 0.2044, + "step": 72240 + }, + { + "epoch": 144.54, + "grad_norm": 67.04376983642578, + "learning_rate": 3.2656467412298665e-08, + "loss": 0.2052, + "step": 72270 + }, + { + "epoch": 144.6, + "grad_norm": 64.57819366455078, + "learning_rate": 3.194344739995803e-08, + "loss": 0.2304, + "step": 72300 + }, + { + "epoch": 144.66, + "grad_norm": 93.20011901855469, + "learning_rate": 3.1238272627035494e-08, + "loss": 0.1735, + "step": 72330 + }, + { + "epoch": 144.72, + "grad_norm": 44.79425048828125, + "learning_rate": 3.054094420709863e-08, + "loss": 0.2073, + "step": 72360 + }, + { + "epoch": 144.78, + "grad_norm": 83.33869934082031, + "learning_rate": 2.985146324132438e-08, + "loss": 0.1901, + "step": 72390 + }, + { + "epoch": 144.84, + "grad_norm": 53.1854248046875, + "learning_rate": 2.9169830818496226e-08, + "loss": 0.2056, + "step": 72420 + }, + { + "epoch": 144.9, + "grad_norm": 43.01469802856445, + "learning_rate": 2.8496048015005385e-08, + "loss": 0.2146, + "step": 72450 + }, + { + "epoch": 144.96, + "grad_norm": 209.90834045410156, + "learning_rate": 2.783011589484741e-08, + "loss": 0.2042, + "step": 72480 + }, + { + "epoch": 145.0, + "eval_loss": 0.30925503373146057, + "eval_map": 0.8056, + "eval_map_50": 0.9476, + "eval_map_75": 0.9048, + "eval_map_chicken": 0.8045, + "eval_map_duck": 0.737, + "eval_map_large": 0.8105, + "eval_map_medium": 0.8082, + "eval_map_plant": 0.8752, + "eval_map_small": 0.3781, + "eval_mar_1": 0.3189, + "eval_mar_10": 0.8425, + "eval_mar_100": 0.8452, + "eval_mar_100_chicken": 0.846, + "eval_mar_100_duck": 0.7897, + "eval_mar_100_plant": 0.9, + "eval_mar_large": 0.8374, + "eval_mar_medium": 0.8506, + "eval_mar_small": 0.42, + "eval_runtime": 12.735, + "eval_samples_per_second": 7.852, + "eval_steps_per_second": 1.021, + "step": 72500 + }, + { + "epoch": 145.02, + "grad_norm": 41.61909484863281, + "learning_rate": 2.7172035509619442e-08, + "loss": 0.2153, + "step": 72510 + }, + { + "epoch": 145.08, + "grad_norm": 92.02596282958984, + "learning_rate": 2.6521807898520214e-08, + "loss": 0.2094, + "step": 72540 + }, + { + "epoch": 145.14, + "grad_norm": 56.887611389160156, + "learning_rate": 2.5879434088348364e-08, + "loss": 0.2085, + "step": 72570 + }, + { + "epoch": 145.2, + "grad_norm": 26.000112533569336, + "learning_rate": 2.5244915093499134e-08, + "loss": 0.2036, + "step": 72600 + }, + { + "epoch": 145.26, + "grad_norm": 32.90476989746094, + "learning_rate": 2.46182519159649e-08, + "loss": 0.1947, + "step": 72630 + }, + { + "epoch": 145.32, + "grad_norm": 55.35662078857422, + "learning_rate": 2.3999445545332955e-08, + "loss": 0.2019, + "step": 72660 + }, + { + "epoch": 145.38, + "grad_norm": 91.33636474609375, + "learning_rate": 2.3388496958782203e-08, + "loss": 0.252, + "step": 72690 + }, + { + "epoch": 145.44, + "grad_norm": 83.00819396972656, + "learning_rate": 2.2785407121084236e-08, + "loss": 0.2069, + "step": 72720 + }, + { + "epoch": 145.5, + "grad_norm": 42.86649703979492, + "learning_rate": 2.219017698460002e-08, + "loss": 0.2156, + "step": 72750 + }, + { + "epoch": 145.56, + "grad_norm": 86.97473907470703, + "learning_rate": 2.1602807489279344e-08, + "loss": 0.2129, + "step": 72780 + }, + { + "epoch": 145.62, + "grad_norm": 56.99577713012695, + "learning_rate": 2.1023299562658584e-08, + "loss": 0.2005, + "step": 72810 + }, + { + "epoch": 145.68, + "grad_norm": 61.787715911865234, + "learning_rate": 2.0451654119860164e-08, + "loss": 0.2379, + "step": 72840 + }, + { + "epoch": 145.74, + "grad_norm": 989.9368286132812, + "learning_rate": 1.988787206359033e-08, + "loss": 0.2225, + "step": 72870 + }, + { + "epoch": 145.8, + "grad_norm": 75.80166625976562, + "learning_rate": 1.9331954284137476e-08, + "loss": 0.209, + "step": 72900 + }, + { + "epoch": 145.86, + "grad_norm": 48.081207275390625, + "learning_rate": 1.8783901659372162e-08, + "loss": 0.2186, + "step": 72930 + }, + { + "epoch": 145.92, + "grad_norm": 43.130218505859375, + "learning_rate": 1.8243715054744315e-08, + "loss": 0.185, + "step": 72960 + }, + { + "epoch": 145.98, + "grad_norm": 77.66969299316406, + "learning_rate": 1.7711395323281588e-08, + "loss": 0.1978, + "step": 72990 + }, + { + "epoch": 146.0, + "eval_loss": 0.30891692638397217, + "eval_map": 0.8065, + "eval_map_50": 0.9475, + "eval_map_75": 0.9047, + "eval_map_chicken": 0.8064, + "eval_map_duck": 0.7372, + "eval_map_large": 0.815, + "eval_map_medium": 0.8091, + "eval_map_plant": 0.876, + "eval_map_small": 0.3781, + "eval_mar_1": 0.3197, + "eval_mar_10": 0.8436, + "eval_mar_100": 0.8463, + "eval_mar_100_chicken": 0.848, + "eval_mar_100_duck": 0.7907, + "eval_mar_100_plant": 0.9003, + "eval_mar_large": 0.8418, + "eval_mar_medium": 0.8515, + "eval_mar_small": 0.42, + "eval_runtime": 12.7636, + "eval_samples_per_second": 7.835, + "eval_steps_per_second": 1.019, + "step": 73000 + }, + { + "epoch": 146.04, + "grad_norm": 51.415863037109375, + "learning_rate": 1.71869433055899e-08, + "loss": 0.2065, + "step": 73020 + }, + { + "epoch": 146.1, + "grad_norm": 55.093109130859375, + "learning_rate": 1.6670359829850657e-08, + "loss": 0.199, + "step": 73050 + }, + { + "epoch": 146.16, + "grad_norm": 67.57818603515625, + "learning_rate": 1.6161645711819664e-08, + "loss": 0.2043, + "step": 73080 + }, + { + "epoch": 146.22, + "grad_norm": 78.64994812011719, + "learning_rate": 1.5660801754825983e-08, + "loss": 0.1761, + "step": 73110 + }, + { + "epoch": 146.28, + "grad_norm": 44.29743957519531, + "learning_rate": 1.5167828749770853e-08, + "loss": 0.2297, + "step": 73140 + }, + { + "epoch": 146.34, + "grad_norm": 56.25002670288086, + "learning_rate": 1.4682727475124891e-08, + "loss": 0.1894, + "step": 73170 + }, + { + "epoch": 146.4, + "grad_norm": 81.73661041259766, + "learning_rate": 1.4205498696930332e-08, + "loss": 0.1852, + "step": 73200 + }, + { + "epoch": 146.46, + "grad_norm": 29.35874366760254, + "learning_rate": 1.3736143168796012e-08, + "loss": 0.2025, + "step": 73230 + }, + { + "epoch": 146.52, + "grad_norm": 29.727802276611328, + "learning_rate": 1.3274661631899055e-08, + "loss": 0.2379, + "step": 73260 + }, + { + "epoch": 146.58, + "grad_norm": 66.46299743652344, + "learning_rate": 1.2821054814980971e-08, + "loss": 0.2061, + "step": 73290 + }, + { + "epoch": 146.64, + "grad_norm": 51.29338455200195, + "learning_rate": 1.2375323434348773e-08, + "loss": 0.2488, + "step": 73320 + }, + { + "epoch": 146.7, + "grad_norm": 35.33070373535156, + "learning_rate": 1.1937468193873869e-08, + "loss": 0.2273, + "step": 73350 + }, + { + "epoch": 146.76, + "grad_norm": 45.7594108581543, + "learning_rate": 1.1507489784989278e-08, + "loss": 0.2235, + "step": 73380 + }, + { + "epoch": 146.82, + "grad_norm": 50.467529296875, + "learning_rate": 1.1085388886689085e-08, + "loss": 0.2128, + "step": 73410 + }, + { + "epoch": 146.88, + "grad_norm": 61.66533660888672, + "learning_rate": 1.067116616552899e-08, + "loss": 0.2061, + "step": 73440 + }, + { + "epoch": 146.94, + "grad_norm": 58.77129364013672, + "learning_rate": 1.026482227562242e-08, + "loss": 0.228, + "step": 73470 + }, + { + "epoch": 147.0, + "grad_norm": 32.76871109008789, + "learning_rate": 9.866357858642206e-09, + "loss": 0.1969, + "step": 73500 + }, + { + "epoch": 147.0, + "eval_loss": 0.30892306566238403, + "eval_map": 0.8063, + "eval_map_50": 0.9477, + "eval_map_75": 0.9049, + "eval_map_chicken": 0.8065, + "eval_map_duck": 0.7377, + "eval_map_large": 0.8145, + "eval_map_medium": 0.8093, + "eval_map_plant": 0.8748, + "eval_map_small": 0.3766, + "eval_mar_1": 0.3195, + "eval_mar_10": 0.8434, + "eval_mar_100": 0.8462, + "eval_mar_100_chicken": 0.8476, + "eval_mar_100_duck": 0.7907, + "eval_mar_100_plant": 0.9003, + "eval_mar_large": 0.8408, + "eval_mar_medium": 0.8515, + "eval_mar_small": 0.42, + "eval_runtime": 12.1018, + "eval_samples_per_second": 8.263, + "eval_steps_per_second": 1.074, + "step": 73500 + }, + { + "epoch": 147.06, + "grad_norm": 63.24929428100586, + "learning_rate": 9.475773543818345e-09, + "loss": 0.2078, + "step": 73530 + }, + { + "epoch": 147.12, + "grad_norm": 45.290950775146484, + "learning_rate": 9.09306994793635e-09, + "loss": 0.2573, + "step": 73560 + }, + { + "epoch": 147.18, + "grad_norm": 52.7389030456543, + "learning_rate": 8.718247675337243e-09, + "loss": 0.193, + "step": 73590 + }, + { + "epoch": 147.24, + "grad_norm": 38.60297393798828, + "learning_rate": 8.351307317917002e-09, + "loss": 0.221, + "step": 73620 + }, + { + "epoch": 147.3, + "grad_norm": 35.21743392944336, + "learning_rate": 7.992249455124889e-09, + "loss": 0.2271, + "step": 73650 + }, + { + "epoch": 147.36, + "grad_norm": 44.66568374633789, + "learning_rate": 7.641074653961244e-09, + "loss": 0.2287, + "step": 73680 + }, + { + "epoch": 147.42, + "grad_norm": 30.76506805419922, + "learning_rate": 7.297783468980246e-09, + "loss": 0.2162, + "step": 73710 + }, + { + "epoch": 147.48, + "grad_norm": 44.26003646850586, + "learning_rate": 6.962376442284368e-09, + "loss": 0.22, + "step": 73740 + }, + { + "epoch": 147.54, + "grad_norm": 78.09246826171875, + "learning_rate": 6.63485410352771e-09, + "loss": 0.1989, + "step": 73770 + }, + { + "epoch": 147.6, + "grad_norm": 64.34059143066406, + "learning_rate": 6.315216969912663e-09, + "loss": 0.1954, + "step": 73800 + }, + { + "epoch": 147.66, + "grad_norm": 80.68038940429688, + "learning_rate": 6.003465546189358e-09, + "loss": 0.1898, + "step": 73830 + }, + { + "epoch": 147.72, + "grad_norm": 37.143863677978516, + "learning_rate": 5.699600324657328e-09, + "loss": 0.1896, + "step": 73860 + }, + { + "epoch": 147.78, + "grad_norm": 86.01634216308594, + "learning_rate": 5.403621785159407e-09, + "loss": 0.2144, + "step": 73890 + }, + { + "epoch": 147.84, + "grad_norm": 43.97216796875, + "learning_rate": 5.115530395087276e-09, + "loss": 0.213, + "step": 73920 + }, + { + "epoch": 147.9, + "grad_norm": 38.47873306274414, + "learning_rate": 4.835326609376468e-09, + "loss": 0.2246, + "step": 73950 + }, + { + "epoch": 147.96, + "grad_norm": 35.541812896728516, + "learning_rate": 4.5630108705063684e-09, + "loss": 0.2083, + "step": 73980 + }, + { + "epoch": 148.0, + "eval_loss": 0.30893582105636597, + "eval_map": 0.8062, + "eval_map_50": 0.9477, + "eval_map_75": 0.9049, + "eval_map_chicken": 0.8066, + "eval_map_duck": 0.7377, + "eval_map_large": 0.8149, + "eval_map_medium": 0.8098, + "eval_map_plant": 0.8744, + "eval_map_small": 0.3781, + "eval_mar_1": 0.3198, + "eval_mar_10": 0.8435, + "eval_mar_100": 0.8463, + "eval_mar_100_chicken": 0.848, + "eval_mar_100_duck": 0.7907, + "eval_mar_100_plant": 0.9003, + "eval_mar_large": 0.8417, + "eval_mar_medium": 0.8519, + "eval_mar_small": 0.42, + "eval_runtime": 13.6671, + "eval_samples_per_second": 7.317, + "eval_steps_per_second": 0.951, + "step": 74000 + }, + { + "epoch": 148.02, + "grad_norm": 56.26382064819336, + "learning_rate": 4.298583608501328e-09, + "loss": 0.2157, + "step": 74010 + }, + { + "epoch": 148.08, + "grad_norm": 32.970672607421875, + "learning_rate": 4.042045240927883e-09, + "loss": 0.2133, + "step": 74040 + }, + { + "epoch": 148.14, + "grad_norm": 63.352481842041016, + "learning_rate": 3.793396172895314e-09, + "loss": 0.2116, + "step": 74070 + }, + { + "epoch": 148.2, + "grad_norm": 88.44683837890625, + "learning_rate": 3.5526367970539765e-09, + "loss": 0.1971, + "step": 74100 + }, + { + "epoch": 148.26, + "grad_norm": 36.679317474365234, + "learning_rate": 3.31976749359586e-09, + "loss": 0.1952, + "step": 74130 + }, + { + "epoch": 148.32, + "grad_norm": 34.160804748535156, + "learning_rate": 3.094788630254031e-09, + "loss": 0.2392, + "step": 74160 + }, + { + "epoch": 148.38, + "grad_norm": 50.7787971496582, + "learning_rate": 2.8777005622998567e-09, + "loss": 0.2159, + "step": 74190 + }, + { + "epoch": 148.44, + "grad_norm": 105.82192993164062, + "learning_rate": 2.6685036325457826e-09, + "loss": 0.2018, + "step": 74220 + }, + { + "epoch": 148.5, + "grad_norm": 52.19423294067383, + "learning_rate": 2.4671981713420003e-09, + "loss": 0.2117, + "step": 74250 + }, + { + "epoch": 148.56, + "grad_norm": 54.85720443725586, + "learning_rate": 2.2737844965775578e-09, + "loss": 0.2018, + "step": 74280 + }, + { + "epoch": 148.62, + "grad_norm": 67.35590362548828, + "learning_rate": 2.088262913679251e-09, + "loss": 0.1987, + "step": 74310 + }, + { + "epoch": 148.68, + "grad_norm": 65.11038208007812, + "learning_rate": 1.9106337156099553e-09, + "loss": 0.2329, + "step": 74340 + }, + { + "epoch": 148.74, + "grad_norm": 74.10694122314453, + "learning_rate": 1.740897182871404e-09, + "loss": 0.2376, + "step": 74370 + }, + { + "epoch": 148.8, + "grad_norm": 73.60809326171875, + "learning_rate": 1.5790535835003006e-09, + "loss": 0.2149, + "step": 74400 + }, + { + "epoch": 148.86, + "grad_norm": 42.631622314453125, + "learning_rate": 1.425103173069986e-09, + "loss": 0.1995, + "step": 74430 + }, + { + "epoch": 148.92, + "grad_norm": 63.96980667114258, + "learning_rate": 1.2790461946887712e-09, + "loss": 0.1897, + "step": 74460 + }, + { + "epoch": 148.98, + "grad_norm": 71.41777801513672, + "learning_rate": 1.1408828790010484e-09, + "loss": 0.1991, + "step": 74490 + }, + { + "epoch": 149.0, + "eval_loss": 0.3090154230594635, + "eval_map": 0.8062, + "eval_map_50": 0.9477, + "eval_map_75": 0.9049, + "eval_map_chicken": 0.8066, + "eval_map_duck": 0.7377, + "eval_map_large": 0.8149, + "eval_map_medium": 0.8098, + "eval_map_plant": 0.8745, + "eval_map_small": 0.3781, + "eval_mar_1": 0.3198, + "eval_mar_10": 0.8436, + "eval_mar_100": 0.8464, + "eval_mar_100_chicken": 0.848, + "eval_mar_100_duck": 0.7907, + "eval_mar_100_plant": 0.9006, + "eval_mar_large": 0.8418, + "eval_mar_medium": 0.8519, + "eval_mar_small": 0.42, + "eval_runtime": 13.9979, + "eval_samples_per_second": 7.144, + "eval_steps_per_second": 0.929, + "step": 74500 + }, + { + "epoch": 149.04, + "grad_norm": 102.14067840576172, + "learning_rate": 1.0106134441850712e-09, + "loss": 0.2216, + "step": 74520 + }, + { + "epoch": 149.1, + "grad_norm": 42.29328155517578, + "learning_rate": 8.88238095955174e-10, + "loss": 0.1898, + "step": 74550 + }, + { + "epoch": 149.16, + "grad_norm": 47.901248931884766, + "learning_rate": 7.737570275573314e-10, + "loss": 0.2068, + "step": 74580 + }, + { + "epoch": 149.22, + "grad_norm": 41.09926986694336, + "learning_rate": 6.671704197735995e-10, + "loss": 0.2043, + "step": 74610 + }, + { + "epoch": 149.28, + "grad_norm": 95.37877655029297, + "learning_rate": 5.684784409182298e-10, + "loss": 0.2002, + "step": 74640 + }, + { + "epoch": 149.34, + "grad_norm": 86.34088897705078, + "learning_rate": 4.776812468398895e-10, + "loss": 0.2129, + "step": 74670 + }, + { + "epoch": 149.4, + "grad_norm": 50.384944915771484, + "learning_rate": 3.9477898091944135e-10, + "loss": 0.2064, + "step": 74700 + }, + { + "epoch": 149.46, + "grad_norm": 44.60343933105469, + "learning_rate": 3.1977177407105376e-10, + "loss": 0.2034, + "step": 74730 + }, + { + "epoch": 149.52, + "grad_norm": 58.12319564819336, + "learning_rate": 2.5265974474109054e-10, + "loss": 0.221, + "step": 74760 + }, + { + "epoch": 149.58, + "grad_norm": 49.90918731689453, + "learning_rate": 1.9344299890866614e-10, + "loss": 0.2165, + "step": 74790 + }, + { + "epoch": 149.64, + "grad_norm": 56.82841873168945, + "learning_rate": 1.4212163008509028e-10, + "loss": 0.1935, + "step": 74820 + }, + { + "epoch": 149.7, + "grad_norm": 54.94172668457031, + "learning_rate": 9.869571931442334e-11, + "loss": 0.264, + "step": 74850 + }, + { + "epoch": 149.76, + "grad_norm": 49.350772857666016, + "learning_rate": 6.316533517125578e-11, + "loss": 0.217, + "step": 74880 + }, + { + "epoch": 149.82, + "grad_norm": 45.621150970458984, + "learning_rate": 3.55305337634837e-11, + "loss": 0.2009, + "step": 74910 + }, + { + "epoch": 149.88, + "grad_norm": 50.17906951904297, + "learning_rate": 1.57913587295333e-11, + "loss": 0.2077, + "step": 74940 + }, + { + "epoch": 149.94, + "grad_norm": 19.391836166381836, + "learning_rate": 3.947841241136452e-12, + "loss": 0.224, + "step": 74970 + }, + { + "epoch": 150.0, + "grad_norm": 35.242279052734375, + "learning_rate": 0.0, + "loss": 0.2337, + "step": 75000 + }, + { + "epoch": 150.0, + "eval_loss": 0.30901747941970825, + "eval_map": 0.8062, + "eval_map_50": 0.9477, + "eval_map_75": 0.9049, + "eval_map_chicken": 0.8066, + "eval_map_duck": 0.7377, + "eval_map_large": 0.8149, + "eval_map_medium": 0.8098, + "eval_map_plant": 0.8745, + "eval_map_small": 0.3781, + "eval_mar_1": 0.3198, + "eval_mar_10": 0.8436, + "eval_mar_100": 0.8464, + "eval_mar_100_chicken": 0.848, + "eval_mar_100_duck": 0.7907, + "eval_mar_100_plant": 0.9006, + "eval_mar_large": 0.8418, + "eval_mar_medium": 0.8519, + "eval_mar_small": 0.42, + "eval_runtime": 17.4562, + "eval_samples_per_second": 5.729, + "eval_steps_per_second": 0.745, + "step": 75000 } ], "logging_steps": 30, @@ -15995,12 +21421,12 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": false + "should_training_stop": true }, "attributes": {} } }, - "total_flos": 1.92652943228928e+19, + "total_flos": 2.580173346816e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null