diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4140 @@ +{ + "best_metric": 2.132894992828369, + "best_model_checkpoint": "/tmp/wandb/run-20240211_061007-slcnkgcr/files/train_output/checkpoint-10000", + "epoch": 2.042133333333333, + "eval_steps": 500, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "MSE": 891.9713033040365, + "MSE/layer0": 891.9713033040365, + "dead_code_fraction": 0.1506, + "dead_code_fraction/layer0": 0.1506, + "epoch": 0.0, + "input_norm": 31.997233708699547, + "input_norm/layer0": 31.997233708699547, + "learning_rate": 0.0005, + "loss": 8.0845, + "max_norm": 34.580135345458984, + "max_norm/layer0": 34.580135345458984, + "mean_norm": 31.989344596862793, + "mean_norm/layer0": 31.989344596862793, + "multicode_k": 1, + "output_norm": 8.584638833999634, + "output_norm/layer0": 8.584638833999634, + "step": 1 + }, + { + "MSE": 883.0105907414232, + "MSE/layer0": 883.0105907414232, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.01, + "input_norm": 31.99778711876902, + "input_norm/layer0": 31.99778711876902, + "learning_rate": 0.0005, + "loss": 4.8444, + "max_norm": 34.610191345214844, + "max_norm/layer0": 34.610191345214844, + "mean_norm": 32.02294731140137, + "mean_norm/layer0": 32.02294731140137, + "multicode_k": 1, + "output_norm": 8.645599765842462, + "output_norm/layer0": 8.645599765842462, + "step": 50 + }, + { + "MSE": 872.9267329915364, + "MSE/layer0": 872.9267329915364, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.01, + "input_norm": 31.998572165171304, + "input_norm/layer0": 31.998572165171304, + "learning_rate": 0.0005, + "loss": 3.9294, + "max_norm": 34.62763595581055, + "max_norm/layer0": 34.62763595581055, + "mean_norm": 32.06278419494629, + "mean_norm/layer0": 32.06278419494629, + "multicode_k": 1, + "output_norm": 8.74148860613505, + "output_norm/layer0": 8.74148860613505, + "step": 100 + }, + { + "MSE": 866.7590488688152, + "MSE/layer0": 866.7590488688152, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.01, + "input_norm": 31.99865425427754, + "input_norm/layer0": 31.99865425427754, + "learning_rate": 0.0005, + "loss": 3.5413, + "max_norm": 34.65019607543945, + "max_norm/layer0": 34.65019607543945, + "mean_norm": 32.1027717590332, + "mean_norm/layer0": 32.1027717590332, + "multicode_k": 1, + "output_norm": 8.811674615542097, + "output_norm/layer0": 8.811674615542097, + "step": 150 + }, + { + "MSE": 858.8314244588221, + "MSE/layer0": 858.8314244588221, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.02, + "input_norm": 31.998634125391646, + "input_norm/layer0": 31.998634125391646, + "learning_rate": 0.0005, + "loss": 3.3381, + "max_norm": 34.73014831542969, + "max_norm/layer0": 34.73014831542969, + "mean_norm": 32.17362403869629, + "mean_norm/layer0": 32.17362403869629, + "multicode_k": 1, + "output_norm": 8.925555121103923, + "output_norm/layer0": 8.925555121103923, + "step": 200 + }, + { + "MSE": 849.6408699544276, + "MSE/layer0": 849.6408699544276, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.03, + "input_norm": 31.9986141427358, + "input_norm/layer0": 31.9986141427358, + "learning_rate": 0.0005, + "loss": 3.2486, + "max_norm": 34.8281364440918, + "max_norm/layer0": 34.8281364440918, + "mean_norm": 32.26718330383301, + "mean_norm/layer0": 32.26718330383301, + "multicode_k": 1, + "output_norm": 9.101092262268068, + "output_norm/layer0": 9.101092262268068, + "step": 250 + }, + { + "MSE": 841.0051658121741, + "MSE/layer0": 841.0051658121741, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.03, + "input_norm": 31.99862952232361, + "input_norm/layer0": 31.99862952232361, + "learning_rate": 0.0005, + "loss": 3.1503, + "max_norm": 34.946006774902344, + "max_norm/layer0": 34.946006774902344, + "mean_norm": 32.361915588378906, + "mean_norm/layer0": 32.361915588378906, + "multicode_k": 1, + "output_norm": 9.305952178637185, + "output_norm/layer0": 9.305952178637185, + "step": 300 + }, + { + "MSE": 833.1103855387371, + "MSE/layer0": 833.1103855387371, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.04, + "input_norm": 31.998617506027223, + "input_norm/layer0": 31.998617506027223, + "learning_rate": 0.0005, + "loss": 3.0966, + "max_norm": 35.09696578979492, + "max_norm/layer0": 35.09696578979492, + "mean_norm": 32.463951110839844, + "mean_norm/layer0": 32.463951110839844, + "multicode_k": 1, + "output_norm": 9.513547644615176, + "output_norm/layer0": 9.513547644615176, + "step": 350 + }, + { + "MSE": 824.8635622151694, + "MSE/layer0": 824.8635622151694, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.04, + "input_norm": 31.998617092768363, + "input_norm/layer0": 31.998617092768363, + "learning_rate": 0.0005, + "loss": 3.0998, + "max_norm": 35.28767013549805, + "max_norm/layer0": 35.28767013549805, + "mean_norm": 32.571420669555664, + "mean_norm/layer0": 32.571420669555664, + "multicode_k": 1, + "output_norm": 9.74717748324076, + "output_norm/layer0": 9.74717748324076, + "step": 400 + }, + { + "MSE": 817.218793334961, + "MSE/layer0": 817.218793334961, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.04, + "input_norm": 31.99862334251403, + "input_norm/layer0": 31.99862334251403, + "learning_rate": 0.0005, + "loss": 3.0603, + "max_norm": 35.4771842956543, + "max_norm/layer0": 35.4771842956543, + "mean_norm": 32.68177795410156, + "mean_norm/layer0": 32.68177795410156, + "multicode_k": 1, + "output_norm": 9.985308513641357, + "output_norm/layer0": 9.985308513641357, + "step": 450 + }, + { + "MSE": 809.1558084106446, + "MSE/layer0": 809.1558084106446, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.05, + "input_norm": 31.998615137736, + "input_norm/layer0": 31.998615137736, + "learning_rate": 0.0005, + "loss": 3.0494, + "max_norm": 35.6486701965332, + "max_norm/layer0": 35.6486701965332, + "mean_norm": 32.793779373168945, + "mean_norm/layer0": 32.793779373168945, + "multicode_k": 1, + "output_norm": 10.232081251144415, + "output_norm/layer0": 10.232081251144415, + "step": 500 + }, + { + "epoch": 0.05, + "eval_MSE/layer0": 805.1675846628777, + "eval_accuracy": 0.41770872781318447, + "eval_dead_code_fraction/layer0": 0.0, + "eval_input_norm/layer0": 31.998606410347342, + "eval_loss": 2.992654323577881, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 10.360000263063938, + "eval_runtime": 159.8847, + "eval_samples_per_second": 28.915, + "eval_steps_per_second": 1.808, + "step": 500 + }, + { + "MSE": 801.7215725708003, + "MSE/layer0": 801.7215725708003, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.06, + "input_norm": 31.998598546981817, + "input_norm/layer0": 31.998598546981817, + "learning_rate": 0.0005, + "loss": 2.9547, + "max_norm": 35.86976623535156, + "max_norm/layer0": 35.86976623535156, + "mean_norm": 32.91193962097168, + "mean_norm/layer0": 32.91193962097168, + "multicode_k": 1, + "output_norm": 10.47719025929769, + "output_norm/layer0": 10.47719025929769, + "step": 550 + }, + { + "MSE": 794.043483174642, + "MSE/layer0": 794.043483174642, + "dead_code_fraction": 0.0, + "dead_code_fraction/layer0": 0.0, + "epoch": 0.06, + "input_norm": 31.99859639167787, + "input_norm/layer0": 31.99859639167787, + "learning_rate": 0.0005, + "loss": 2.9506, + "max_norm": 36.08134078979492, + "max_norm/layer0": 36.08134078979492, + "mean_norm": 33.03110313415527, + "mean_norm/layer0": 33.03110313415527, + "multicode_k": 1, + "output_norm": 10.729146582285566, + "output_norm/layer0": 10.729146582285566, + "step": 600 + }, + { + "MSE": 786.3193520100913, + "MSE/layer0": 786.3193520100913, + "dead_code_fraction": 5e-05, + "dead_code_fraction/layer0": 5e-05, + "epoch": 0.07, + "input_norm": 31.99857716878254, + "input_norm/layer0": 31.99857716878254, + "learning_rate": 0.0005, + "loss": 2.8944, + "max_norm": 36.33954620361328, + "max_norm/layer0": 36.33954620361328, + "mean_norm": 33.15106773376465, + "mean_norm/layer0": 33.15106773376465, + "multicode_k": 1, + "output_norm": 10.987898168563845, + "output_norm/layer0": 10.987898168563845, + "step": 650 + }, + { + "MSE": 780.0598099772137, + "MSE/layer0": 780.0598099772137, + "dead_code_fraction": 0.0001, + "dead_code_fraction/layer0": 0.0001, + "epoch": 0.07, + "input_norm": 31.998565645217887, + "input_norm/layer0": 31.998565645217887, + "learning_rate": 0.0005, + "loss": 2.8643, + "max_norm": 36.55862808227539, + "max_norm/layer0": 36.55862808227539, + "mean_norm": 33.269744873046875, + "mean_norm/layer0": 33.269744873046875, + "multicode_k": 1, + "output_norm": 11.218051005999246, + "output_norm/layer0": 11.218051005999246, + "step": 700 + }, + { + "MSE": 772.4797055053714, + "MSE/layer0": 772.4797055053714, + "dead_code_fraction": 0.00045, + "dead_code_fraction/layer0": 0.00045, + "epoch": 0.07, + "input_norm": 31.998559678395594, + "input_norm/layer0": 31.998559678395594, + "learning_rate": 0.0005, + "loss": 2.8618, + "max_norm": 36.793521881103516, + "max_norm/layer0": 36.793521881103516, + "mean_norm": 33.39421844482422, + "mean_norm/layer0": 33.39421844482422, + "multicode_k": 1, + "output_norm": 11.470201053619387, + "output_norm/layer0": 11.470201053619387, + "step": 750 + }, + { + "MSE": 766.037492879232, + "MSE/layer0": 766.037492879232, + "dead_code_fraction": 0.00055, + "dead_code_fraction/layer0": 0.00055, + "epoch": 0.08, + "input_norm": 31.99854364713033, + "input_norm/layer0": 31.99854364713033, + "learning_rate": 0.0005, + "loss": 2.8403, + "max_norm": 37.0079231262207, + "max_norm/layer0": 37.0079231262207, + "mean_norm": 33.52132034301758, + "mean_norm/layer0": 33.52132034301758, + "multicode_k": 1, + "output_norm": 11.711471532185875, + "output_norm/layer0": 11.711471532185875, + "step": 800 + }, + { + "MSE": 759.9610600789387, + "MSE/layer0": 759.9610600789387, + "dead_code_fraction": 0.00135, + "dead_code_fraction/layer0": 0.00135, + "epoch": 0.09, + "input_norm": 31.998529828389472, + "input_norm/layer0": 31.998529828389472, + "learning_rate": 0.0005, + "loss": 2.7453, + "max_norm": 37.20747375488281, + "max_norm/layer0": 37.20747375488281, + "mean_norm": 33.64577674865723, + "mean_norm/layer0": 33.64577674865723, + "multicode_k": 1, + "output_norm": 11.93199801921844, + "output_norm/layer0": 11.93199801921844, + "step": 850 + }, + { + "MSE": 753.5576912434896, + "MSE/layer0": 753.5576912434896, + "dead_code_fraction": 0.00205, + "dead_code_fraction/layer0": 0.00205, + "epoch": 0.09, + "input_norm": 31.99852911949157, + "input_norm/layer0": 31.99852911949157, + "learning_rate": 0.0005, + "loss": 2.7975, + "max_norm": 37.432743072509766, + "max_norm/layer0": 37.432743072509766, + "mean_norm": 33.778066635131836, + "mean_norm/layer0": 33.778066635131836, + "multicode_k": 1, + "output_norm": 12.165767738024394, + "output_norm/layer0": 12.165767738024394, + "step": 900 + }, + { + "MSE": 747.6473927815753, + "MSE/layer0": 747.6473927815753, + "dead_code_fraction": 0.00335, + "dead_code_fraction/layer0": 0.00335, + "epoch": 0.1, + "input_norm": 31.998517106374106, + "input_norm/layer0": 31.998517106374106, + "learning_rate": 0.0005, + "loss": 2.7378, + "max_norm": 37.62055969238281, + "max_norm/layer0": 37.62055969238281, + "mean_norm": 33.90963554382324, + "mean_norm/layer0": 33.90963554382324, + "multicode_k": 1, + "output_norm": 12.390189347267153, + "output_norm/layer0": 12.390189347267153, + "step": 950 + }, + { + "MSE": 742.6674826049805, + "MSE/layer0": 742.6674826049805, + "dead_code_fraction": 0.0048, + "dead_code_fraction/layer0": 0.0048, + "epoch": 0.1, + "input_norm": 31.998499689102182, + "input_norm/layer0": 31.998499689102182, + "learning_rate": 0.0005, + "loss": 2.6986, + "max_norm": 37.880615234375, + "max_norm/layer0": 37.880615234375, + "mean_norm": 34.04428672790527, + "mean_norm/layer0": 34.04428672790527, + "multicode_k": 1, + "output_norm": 12.59642965157827, + "output_norm/layer0": 12.59642965157827, + "step": 1000 + }, + { + "epoch": 0.1, + "eval_MSE/layer0": 739.3243520424373, + "eval_accuracy": 0.44721058737930897, + "eval_dead_code_fraction/layer0": 0.00845, + "eval_input_norm/layer0": 31.998487053973697, + "eval_loss": 2.707960367202759, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 12.71647696584792, + "eval_runtime": 157.5908, + "eval_samples_per_second": 29.335, + "eval_steps_per_second": 1.834, + "step": 1000 + }, + { + "MSE": 736.2877898152667, + "MSE/layer0": 736.2877898152667, + "dead_code_fraction": 0.00735, + "dead_code_fraction/layer0": 0.00735, + "epoch": 0.1, + "input_norm": 31.998484554290766, + "input_norm/layer0": 31.998484554290766, + "learning_rate": 0.0005, + "loss": 2.7222, + "max_norm": 38.21133804321289, + "max_norm/layer0": 38.21133804321289, + "mean_norm": 34.17984199523926, + "mean_norm/layer0": 34.17984199523926, + "multicode_k": 1, + "output_norm": 12.82279133001963, + "output_norm/layer0": 12.82279133001963, + "step": 1050 + }, + { + "MSE": 731.6754523722336, + "MSE/layer0": 731.6754523722336, + "dead_code_fraction": 0.01015, + "dead_code_fraction/layer0": 0.01015, + "epoch": 0.11, + "input_norm": 31.998473711013787, + "input_norm/layer0": 31.998473711013787, + "learning_rate": 0.0005, + "loss": 2.652, + "max_norm": 38.533973693847656, + "max_norm/layer0": 38.533973693847656, + "mean_norm": 34.31424903869629, + "mean_norm/layer0": 34.31424903869629, + "multicode_k": 1, + "output_norm": 13.017293116251633, + "output_norm/layer0": 13.017293116251633, + "step": 1100 + }, + { + "MSE": 726.8081079101562, + "MSE/layer0": 726.8081079101562, + "dead_code_fraction": 0.013, + "dead_code_fraction/layer0": 0.013, + "epoch": 0.12, + "input_norm": 31.99846080144247, + "input_norm/layer0": 31.99846080144247, + "learning_rate": 0.0005, + "loss": 2.6519, + "max_norm": 38.87154769897461, + "max_norm/layer0": 38.87154769897461, + "mean_norm": 34.454498291015625, + "mean_norm/layer0": 34.454498291015625, + "multicode_k": 1, + "output_norm": 13.209378539721174, + "output_norm/layer0": 13.209378539721174, + "step": 1150 + }, + { + "MSE": 722.3268162027996, + "MSE/layer0": 722.3268162027996, + "dead_code_fraction": 0.01565, + "dead_code_fraction/layer0": 0.01565, + "epoch": 0.12, + "input_norm": 31.998446766535434, + "input_norm/layer0": 31.998446766535434, + "learning_rate": 0.0005, + "loss": 2.6464, + "max_norm": 39.23857879638672, + "max_norm/layer0": 39.23857879638672, + "mean_norm": 34.597312927246094, + "mean_norm/layer0": 34.597312927246094, + "multicode_k": 1, + "output_norm": 13.40400979042053, + "output_norm/layer0": 13.40400979042053, + "step": 1200 + }, + { + "MSE": 717.3231912231446, + "MSE/layer0": 717.3231912231446, + "dead_code_fraction": 0.0241, + "dead_code_fraction/layer0": 0.0241, + "epoch": 0.12, + "input_norm": 31.998441489537555, + "input_norm/layer0": 31.998441489537555, + "learning_rate": 0.0005, + "loss": 2.6563, + "max_norm": 39.60569381713867, + "max_norm/layer0": 39.60569381713867, + "mean_norm": 34.73863220214844, + "mean_norm/layer0": 34.73863220214844, + "multicode_k": 1, + "output_norm": 13.590513488451638, + "output_norm/layer0": 13.590513488451638, + "step": 1250 + }, + { + "MSE": 713.6523872884117, + "MSE/layer0": 713.6523872884117, + "dead_code_fraction": 0.02485, + "dead_code_fraction/layer0": 0.02485, + "epoch": 0.13, + "input_norm": 31.998419742584225, + "input_norm/layer0": 31.998419742584225, + "learning_rate": 0.0005, + "loss": 2.5806, + "max_norm": 39.939239501953125, + "max_norm/layer0": 39.939239501953125, + "mean_norm": 34.87986946105957, + "mean_norm/layer0": 34.87986946105957, + "multicode_k": 1, + "output_norm": 13.766959317525227, + "output_norm/layer0": 13.766959317525227, + "step": 1300 + }, + { + "MSE": 709.5852165730794, + "MSE/layer0": 709.5852165730794, + "dead_code_fraction": 0.02925, + "dead_code_fraction/layer0": 0.02925, + "epoch": 0.14, + "input_norm": 31.998412898381545, + "input_norm/layer0": 31.998412898381545, + "learning_rate": 0.0005, + "loss": 2.5789, + "max_norm": 40.28993225097656, + "max_norm/layer0": 40.28993225097656, + "mean_norm": 35.022348403930664, + "mean_norm/layer0": 35.022348403930664, + "multicode_k": 1, + "output_norm": 13.93345036347707, + "output_norm/layer0": 13.93345036347707, + "step": 1350 + }, + { + "MSE": 705.2143248494463, + "MSE/layer0": 705.2143248494463, + "dead_code_fraction": 0.03375, + "dead_code_fraction/layer0": 0.03375, + "epoch": 0.14, + "input_norm": 31.9984123802185, + "input_norm/layer0": 31.9984123802185, + "learning_rate": 0.0005, + "loss": 2.5943, + "max_norm": 40.63530349731445, + "max_norm/layer0": 40.63530349731445, + "mean_norm": 35.164276123046875, + "mean_norm/layer0": 35.164276123046875, + "multicode_k": 1, + "output_norm": 14.105911358197524, + "output_norm/layer0": 14.105911358197524, + "step": 1400 + }, + { + "MSE": 702.3593349202476, + "MSE/layer0": 702.3593349202476, + "dead_code_fraction": 0.0404, + "dead_code_fraction/layer0": 0.0404, + "epoch": 0.14, + "input_norm": 31.99839937845865, + "input_norm/layer0": 31.99839937845865, + "learning_rate": 0.0005, + "loss": 2.5407, + "max_norm": 40.98182678222656, + "max_norm/layer0": 40.98182678222656, + "mean_norm": 35.30343246459961, + "mean_norm/layer0": 35.30343246459961, + "multicode_k": 1, + "output_norm": 14.2450444761912, + "output_norm/layer0": 14.2450444761912, + "step": 1450 + }, + { + "MSE": 699.0307844034837, + "MSE/layer0": 699.0307844034837, + "dead_code_fraction": 0.04535, + "dead_code_fraction/layer0": 0.04535, + "epoch": 0.15, + "input_norm": 31.998390986124676, + "input_norm/layer0": 31.998390986124676, + "learning_rate": 0.0005, + "loss": 2.5145, + "max_norm": 41.328433990478516, + "max_norm/layer0": 41.328433990478516, + "mean_norm": 35.445411682128906, + "mean_norm/layer0": 35.445411682128906, + "multicode_k": 1, + "output_norm": 14.399013953208918, + "output_norm/layer0": 14.399013953208918, + "step": 1500 + }, + { + "epoch": 0.15, + "eval_MSE/layer0": 697.1178701616536, + "eval_accuracy": 0.4637486628652817, + "eval_dead_code_fraction/layer0": 0.05465, + "eval_input_norm/layer0": 31.99837304089923, + "eval_loss": 2.525156259536743, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 14.48893911880305, + "eval_runtime": 156.9005, + "eval_samples_per_second": 29.465, + "eval_steps_per_second": 1.842, + "step": 1500 + }, + { + "MSE": 696.0442759195965, + "MSE/layer0": 696.0442759195965, + "dead_code_fraction": 0.05145, + "dead_code_fraction/layer0": 0.05145, + "epoch": 0.15, + "input_norm": 31.99836520512899, + "input_norm/layer0": 31.99836520512899, + "learning_rate": 0.0005, + "loss": 2.4631, + "max_norm": 41.6606559753418, + "max_norm/layer0": 41.6606559753418, + "mean_norm": 35.58424758911133, + "mean_norm/layer0": 35.58424758911133, + "multicode_k": 1, + "output_norm": 14.54295777956645, + "output_norm/layer0": 14.54295777956645, + "step": 1550 + }, + { + "MSE": 691.8516132609051, + "MSE/layer0": 691.8516132609051, + "dead_code_fraction": 0.0558, + "dead_code_fraction/layer0": 0.0558, + "epoch": 0.16, + "input_norm": 31.998375968933097, + "input_norm/layer0": 31.998375968933097, + "learning_rate": 0.0005, + "loss": 2.5501, + "max_norm": 42.08574676513672, + "max_norm/layer0": 42.08574676513672, + "mean_norm": 35.72518730163574, + "mean_norm/layer0": 35.72518730163574, + "multicode_k": 1, + "output_norm": 14.692513732910157, + "output_norm/layer0": 14.692513732910157, + "step": 1600 + }, + { + "MSE": 688.7181396484375, + "MSE/layer0": 688.7181396484375, + "dead_code_fraction": 0.0595, + "dead_code_fraction/layer0": 0.0595, + "epoch": 0.17, + "input_norm": 31.99835859616598, + "input_norm/layer0": 31.99835859616598, + "learning_rate": 0.0005, + "loss": 2.4699, + "max_norm": 42.610233306884766, + "max_norm/layer0": 42.610233306884766, + "mean_norm": 35.86595916748047, + "mean_norm/layer0": 35.86595916748047, + "multicode_k": 1, + "output_norm": 14.833582207361854, + "output_norm/layer0": 14.833582207361854, + "step": 1650 + }, + { + "MSE": 685.5445822143549, + "MSE/layer0": 685.5445822143549, + "dead_code_fraction": 0.06595, + "dead_code_fraction/layer0": 0.06595, + "epoch": 0.17, + "input_norm": 31.99835782368978, + "input_norm/layer0": 31.99835782368978, + "learning_rate": 0.0005, + "loss": 2.5014, + "max_norm": 43.15216064453125, + "max_norm/layer0": 43.15216064453125, + "mean_norm": 36.00602149963379, + "mean_norm/layer0": 36.00602149963379, + "multicode_k": 1, + "output_norm": 14.96381513118744, + "output_norm/layer0": 14.96381513118744, + "step": 1700 + }, + { + "MSE": 683.2388099161783, + "MSE/layer0": 683.2388099161783, + "dead_code_fraction": 0.0708, + "dead_code_fraction/layer0": 0.0708, + "epoch": 0.17, + "input_norm": 31.998353064854925, + "input_norm/layer0": 31.998353064854925, + "learning_rate": 0.0005, + "loss": 2.4762, + "max_norm": 43.683807373046875, + "max_norm/layer0": 43.683807373046875, + "mean_norm": 36.14344596862793, + "mean_norm/layer0": 36.14344596862793, + "multicode_k": 1, + "output_norm": 15.08479848066965, + "output_norm/layer0": 15.08479848066965, + "step": 1750 + }, + { + "MSE": 680.5147140502929, + "MSE/layer0": 680.5147140502929, + "dead_code_fraction": 0.0711, + "dead_code_fraction/layer0": 0.0711, + "epoch": 0.18, + "input_norm": 31.998323942820228, + "input_norm/layer0": 31.998323942820228, + "learning_rate": 0.0005, + "loss": 2.4017, + "max_norm": 44.204158782958984, + "max_norm/layer0": 44.204158782958984, + "mean_norm": 36.281328201293945, + "mean_norm/layer0": 36.281328201293945, + "multicode_k": 1, + "output_norm": 15.21150853157043, + "output_norm/layer0": 15.21150853157043, + "step": 1800 + }, + { + "MSE": 677.8235699462891, + "MSE/layer0": 677.8235699462891, + "dead_code_fraction": 0.0789, + "dead_code_fraction/layer0": 0.0789, + "epoch": 0.18, + "input_norm": 31.99832211176553, + "input_norm/layer0": 31.99832211176553, + "learning_rate": 0.0005, + "loss": 2.4204, + "max_norm": 44.73421096801758, + "max_norm/layer0": 44.73421096801758, + "mean_norm": 36.41860580444336, + "mean_norm/layer0": 36.41860580444336, + "multicode_k": 1, + "output_norm": 15.32913914521535, + "output_norm/layer0": 15.32913914521535, + "step": 1850 + }, + { + "MSE": 674.8260657755535, + "MSE/layer0": 674.8260657755535, + "dead_code_fraction": 0.0859, + "dead_code_fraction/layer0": 0.0859, + "epoch": 0.19, + "input_norm": 31.998327109018952, + "input_norm/layer0": 31.998327109018952, + "learning_rate": 0.0005, + "loss": 2.4612, + "max_norm": 45.264217376708984, + "max_norm/layer0": 45.264217376708984, + "mean_norm": 36.55377197265625, + "mean_norm/layer0": 36.55377197265625, + "multicode_k": 1, + "output_norm": 15.449233846664427, + "output_norm/layer0": 15.449233846664427, + "step": 1900 + }, + { + "MSE": 672.4308366902667, + "MSE/layer0": 672.4308366902667, + "dead_code_fraction": 0.08975, + "dead_code_fraction/layer0": 0.08975, + "epoch": 0.2, + "input_norm": 31.998313461939492, + "input_norm/layer0": 31.998313461939492, + "learning_rate": 0.0005, + "loss": 2.413, + "max_norm": 45.7476692199707, + "max_norm/layer0": 45.7476692199707, + "mean_norm": 36.687320709228516, + "mean_norm/layer0": 36.687320709228516, + "multicode_k": 1, + "output_norm": 15.564360074996952, + "output_norm/layer0": 15.564360074996952, + "step": 1950 + }, + { + "MSE": 669.9350853474932, + "MSE/layer0": 669.9350853474932, + "dead_code_fraction": 0.09495, + "dead_code_fraction/layer0": 0.09495, + "epoch": 0.2, + "input_norm": 31.998307892481467, + "input_norm/layer0": 31.998307892481467, + "learning_rate": 0.0005, + "loss": 2.4197, + "max_norm": 46.2595100402832, + "max_norm/layer0": 46.2595100402832, + "mean_norm": 36.82127571105957, + "mean_norm/layer0": 36.82127571105957, + "multicode_k": 1, + "output_norm": 15.671763955752056, + "output_norm/layer0": 15.671763955752056, + "step": 2000 + }, + { + "epoch": 0.2, + "eval_MSE/layer0": 670.0254334077002, + "eval_accuracy": 0.47584128742153486, + "eval_dead_code_fraction/layer0": 0.0988, + "eval_input_norm/layer0": 31.99830309178647, + "eval_loss": 2.409283399581909, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 15.728763990528059, + "eval_runtime": 158.0617, + "eval_samples_per_second": 29.248, + "eval_steps_per_second": 1.828, + "step": 2000 + }, + { + "MSE": 667.9600658162435, + "MSE/layer0": 667.9600658162435, + "dead_code_fraction": 0.09825, + "dead_code_fraction/layer0": 0.09825, + "epoch": 0.2, + "input_norm": 31.99829890569051, + "input_norm/layer0": 31.99829890569051, + "learning_rate": 0.0005, + "loss": 2.3908, + "max_norm": 46.76186752319336, + "max_norm/layer0": 46.76186752319336, + "mean_norm": 36.954044342041016, + "mean_norm/layer0": 36.954044342041016, + "multicode_k": 1, + "output_norm": 15.786985732714339, + "output_norm/layer0": 15.786985732714339, + "step": 2050 + }, + { + "MSE": 665.8677533976238, + "MSE/layer0": 665.8677533976238, + "dead_code_fraction": 0.10105, + "dead_code_fraction/layer0": 0.10105, + "epoch": 0.21, + "input_norm": 31.998287776311233, + "input_norm/layer0": 31.998287776311233, + "learning_rate": 0.0005, + "loss": 2.3532, + "max_norm": 47.23879623413086, + "max_norm/layer0": 47.23879623413086, + "mean_norm": 37.08414268493652, + "mean_norm/layer0": 37.08414268493652, + "multicode_k": 1, + "output_norm": 15.887771523793544, + "output_norm/layer0": 15.887771523793544, + "step": 2100 + }, + { + "MSE": 664.0484969075521, + "MSE/layer0": 664.0484969075521, + "dead_code_fraction": 0.10515, + "dead_code_fraction/layer0": 0.10515, + "epoch": 0.21, + "input_norm": 31.998289143244435, + "input_norm/layer0": 31.998289143244435, + "learning_rate": 0.0005, + "loss": 2.3835, + "max_norm": 47.72446823120117, + "max_norm/layer0": 47.72446823120117, + "mean_norm": 37.21368408203125, + "mean_norm/layer0": 37.21368408203125, + "multicode_k": 1, + "output_norm": 15.987558364868171, + "output_norm/layer0": 15.987558364868171, + "step": 2150 + }, + { + "MSE": 662.043323059082, + "MSE/layer0": 662.043323059082, + "dead_code_fraction": 0.11065, + "dead_code_fraction/layer0": 0.11065, + "epoch": 0.22, + "input_norm": 31.998284489313747, + "input_norm/layer0": 31.998284489313747, + "learning_rate": 0.0005, + "loss": 2.3711, + "max_norm": 48.21998596191406, + "max_norm/layer0": 48.21998596191406, + "mean_norm": 37.34214973449707, + "mean_norm/layer0": 37.34214973449707, + "multicode_k": 1, + "output_norm": 16.084624527295432, + "output_norm/layer0": 16.084624527295432, + "step": 2200 + }, + { + "MSE": 660.071201883952, + "MSE/layer0": 660.071201883952, + "dead_code_fraction": 0.1138, + "dead_code_fraction/layer0": 0.1138, + "epoch": 0.23, + "input_norm": 31.998274552027382, + "input_norm/layer0": 31.998274552027382, + "learning_rate": 0.0005, + "loss": 2.3361, + "max_norm": 48.656124114990234, + "max_norm/layer0": 48.656124114990234, + "mean_norm": 37.46707344055176, + "mean_norm/layer0": 37.46707344055176, + "multicode_k": 1, + "output_norm": 16.1770029671987, + "output_norm/layer0": 16.1770029671987, + "step": 2250 + }, + { + "MSE": 658.2848066202794, + "MSE/layer0": 658.2848066202794, + "dead_code_fraction": 0.11715, + "dead_code_fraction/layer0": 0.11715, + "epoch": 0.23, + "input_norm": 31.998281342188513, + "input_norm/layer0": 31.998281342188513, + "learning_rate": 0.0005, + "loss": 2.3697, + "max_norm": 49.14850616455078, + "max_norm/layer0": 49.14850616455078, + "mean_norm": 37.592119216918945, + "mean_norm/layer0": 37.592119216918945, + "multicode_k": 1, + "output_norm": 16.273267321586616, + "output_norm/layer0": 16.273267321586616, + "step": 2300 + }, + { + "MSE": 656.6614913940434, + "MSE/layer0": 656.6614913940434, + "dead_code_fraction": 0.1208, + "dead_code_fraction/layer0": 0.1208, + "epoch": 0.23, + "input_norm": 31.99827545166017, + "input_norm/layer0": 31.99827545166017, + "learning_rate": 0.0005, + "loss": 2.3691, + "max_norm": 49.611228942871094, + "max_norm/layer0": 49.611228942871094, + "mean_norm": 37.71496772766113, + "mean_norm/layer0": 37.71496772766113, + "multicode_k": 1, + "output_norm": 16.361617434819536, + "output_norm/layer0": 16.361617434819536, + "step": 2350 + }, + { + "MSE": 654.7551118977863, + "MSE/layer0": 654.7551118977863, + "dead_code_fraction": 0.12205, + "dead_code_fraction/layer0": 0.12205, + "epoch": 0.24, + "input_norm": 31.998258228302007, + "input_norm/layer0": 31.998258228302007, + "learning_rate": 0.0005, + "loss": 2.3413, + "max_norm": 50.082008361816406, + "max_norm/layer0": 50.082008361816406, + "mean_norm": 37.836740493774414, + "mean_norm/layer0": 37.836740493774414, + "multicode_k": 1, + "output_norm": 16.442067163785307, + "output_norm/layer0": 16.442067163785307, + "step": 2400 + }, + { + "MSE": 653.2320398966472, + "MSE/layer0": 653.2320398966472, + "dead_code_fraction": 0.1261, + "dead_code_fraction/layer0": 0.1261, + "epoch": 0.24, + "input_norm": 31.99826599121093, + "input_norm/layer0": 31.99826599121093, + "learning_rate": 0.0005, + "loss": 2.3415, + "max_norm": 50.542850494384766, + "max_norm/layer0": 50.542850494384766, + "mean_norm": 37.956573486328125, + "mean_norm/layer0": 37.956573486328125, + "multicode_k": 1, + "output_norm": 16.545647277832018, + "output_norm/layer0": 16.545647277832018, + "step": 2450 + }, + { + "MSE": 652.0689453124999, + "MSE/layer0": 652.0689453124999, + "dead_code_fraction": 0.1305, + "dead_code_fraction/layer0": 0.1305, + "epoch": 0.25, + "input_norm": 31.998266054789227, + "input_norm/layer0": 31.998266054789227, + "learning_rate": 0.0005, + "loss": 2.3541, + "max_norm": 50.972904205322266, + "max_norm/layer0": 50.972904205322266, + "mean_norm": 38.07469177246094, + "mean_norm/layer0": 38.07469177246094, + "multicode_k": 1, + "output_norm": 16.614015088081356, + "output_norm/layer0": 16.614015088081356, + "step": 2500 + }, + { + "epoch": 0.25, + "eval_MSE/layer0": 651.1296869864225, + "eval_accuracy": 0.48371217143066175, + "eval_dead_code_fraction/layer0": 0.1337, + "eval_input_norm/layer0": 31.998264631048162, + "eval_loss": 2.340399742126465, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 16.66022368217996, + "eval_runtime": 157.8946, + "eval_samples_per_second": 29.279, + "eval_steps_per_second": 1.83, + "step": 2500 + }, + { + "MSE": 650.5154676310221, + "MSE/layer0": 650.5154676310221, + "dead_code_fraction": 0.1312, + "dead_code_fraction/layer0": 0.1312, + "epoch": 0.26, + "input_norm": 31.99826429367065, + "input_norm/layer0": 31.99826429367065, + "learning_rate": 0.0005, + "loss": 2.3374, + "max_norm": 51.42794418334961, + "max_norm/layer0": 51.42794418334961, + "mean_norm": 38.19082260131836, + "mean_norm/layer0": 38.19082260131836, + "multicode_k": 1, + "output_norm": 16.705677251815793, + "output_norm/layer0": 16.705677251815793, + "step": 2550 + }, + { + "MSE": 649.4798397827149, + "MSE/layer0": 649.4798397827149, + "dead_code_fraction": 0.13625, + "dead_code_fraction/layer0": 0.13625, + "epoch": 0.26, + "input_norm": 31.99826188405354, + "input_norm/layer0": 31.99826188405354, + "learning_rate": 0.0005, + "loss": 2.3364, + "max_norm": 51.84079360961914, + "max_norm/layer0": 51.84079360961914, + "mean_norm": 38.306650161743164, + "mean_norm/layer0": 38.306650161743164, + "multicode_k": 1, + "output_norm": 16.774758176803587, + "output_norm/layer0": 16.774758176803587, + "step": 2600 + }, + { + "MSE": 648.4373052978513, + "MSE/layer0": 648.4373052978513, + "dead_code_fraction": 0.13795, + "dead_code_fraction/layer0": 0.13795, + "epoch": 0.27, + "input_norm": 31.998252007166542, + "input_norm/layer0": 31.998252007166542, + "learning_rate": 0.0005, + "loss": 2.3162, + "max_norm": 52.24661636352539, + "max_norm/layer0": 52.24661636352539, + "mean_norm": 38.41937828063965, + "mean_norm/layer0": 38.41937828063965, + "multicode_k": 1, + "output_norm": 16.851604979832963, + "output_norm/layer0": 16.851604979832963, + "step": 2650 + }, + { + "MSE": 647.0678014119467, + "MSE/layer0": 647.0678014119467, + "dead_code_fraction": 0.1397, + "dead_code_fraction/layer0": 0.1397, + "epoch": 0.27, + "input_norm": 31.998265930811563, + "input_norm/layer0": 31.998265930811563, + "learning_rate": 0.0005, + "loss": 2.3497, + "max_norm": 52.66170120239258, + "max_norm/layer0": 52.66170120239258, + "mean_norm": 38.53024482727051, + "mean_norm/layer0": 38.53024482727051, + "multicode_k": 1, + "output_norm": 16.925416787465398, + "output_norm/layer0": 16.925416787465398, + "step": 2700 + }, + { + "MSE": 646.4085242716471, + "MSE/layer0": 646.4085242716471, + "dead_code_fraction": 0.14125, + "dead_code_fraction/layer0": 0.14125, + "epoch": 0.28, + "input_norm": 31.99825245221455, + "input_norm/layer0": 31.99825245221455, + "learning_rate": 0.0005, + "loss": 2.301, + "max_norm": 53.03037643432617, + "max_norm/layer0": 53.03037643432617, + "mean_norm": 38.63713836669922, + "mean_norm/layer0": 38.63713836669922, + "multicode_k": 1, + "output_norm": 16.985576423009235, + "output_norm/layer0": 16.985576423009235, + "step": 2750 + }, + { + "MSE": 644.7344170125325, + "MSE/layer0": 644.7344170125325, + "dead_code_fraction": 0.14415, + "dead_code_fraction/layer0": 0.14415, + "epoch": 0.28, + "input_norm": 31.998260081609082, + "input_norm/layer0": 31.998260081609082, + "learning_rate": 0.0005, + "loss": 2.3395, + "max_norm": 53.41487503051758, + "max_norm/layer0": 53.41487503051758, + "mean_norm": 38.74285697937012, + "mean_norm/layer0": 38.74285697937012, + "multicode_k": 1, + "output_norm": 17.068980147043867, + "output_norm/layer0": 17.068980147043867, + "step": 2800 + }, + { + "MSE": 644.636144104004, + "MSE/layer0": 644.636144104004, + "dead_code_fraction": 0.14565, + "dead_code_fraction/layer0": 0.14565, + "epoch": 0.28, + "input_norm": 31.998243366877247, + "input_norm/layer0": 31.998243366877247, + "learning_rate": 0.0005, + "loss": 2.2757, + "max_norm": 53.792579650878906, + "max_norm/layer0": 53.792579650878906, + "mean_norm": 38.84635543823242, + "mean_norm/layer0": 38.84635543823242, + "multicode_k": 1, + "output_norm": 17.124992834726967, + "output_norm/layer0": 17.124992834726967, + "step": 2850 + }, + { + "MSE": 643.8843309529623, + "MSE/layer0": 643.8843309529623, + "dead_code_fraction": 0.14495, + "dead_code_fraction/layer0": 0.14495, + "epoch": 0.29, + "input_norm": 31.998242295583093, + "input_norm/layer0": 31.998242295583093, + "learning_rate": 0.0005, + "loss": 2.3057, + "max_norm": 54.146453857421875, + "max_norm/layer0": 54.146453857421875, + "mean_norm": 38.947309494018555, + "mean_norm/layer0": 38.947309494018555, + "multicode_k": 1, + "output_norm": 17.17694611549377, + "output_norm/layer0": 17.17694611549377, + "step": 2900 + }, + { + "MSE": 642.6776557413741, + "MSE/layer0": 642.6776557413741, + "dead_code_fraction": 0.1504, + "dead_code_fraction/layer0": 0.1504, + "epoch": 0.29, + "input_norm": 31.998272593816125, + "input_norm/layer0": 31.998272593816125, + "learning_rate": 0.0005, + "loss": 2.3545, + "max_norm": 54.51527404785156, + "max_norm/layer0": 54.51527404785156, + "mean_norm": 39.047607421875, + "mean_norm/layer0": 39.047607421875, + "multicode_k": 1, + "output_norm": 17.240235595703133, + "output_norm/layer0": 17.240235595703133, + "step": 2950 + }, + { + "MSE": 643.1047460937498, + "MSE/layer0": 643.1047460937498, + "dead_code_fraction": 0.1483, + "dead_code_fraction/layer0": 0.1483, + "epoch": 0.3, + "input_norm": 31.998249003092454, + "input_norm/layer0": 31.998249003092454, + "learning_rate": 0.0005, + "loss": 2.2742, + "max_norm": 54.86568832397461, + "max_norm/layer0": 54.86568832397461, + "mean_norm": 39.14469337463379, + "mean_norm/layer0": 39.14469337463379, + "multicode_k": 1, + "output_norm": 17.28876600265503, + "output_norm/layer0": 17.28876600265503, + "step": 3000 + }, + { + "epoch": 0.3, + "eval_MSE/layer0": 642.6360311704152, + "eval_accuracy": 0.49030507287608877, + "eval_dead_code_fraction/layer0": 0.14995, + "eval_input_norm/layer0": 31.998255163205542, + "eval_loss": 2.2907073497772217, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 17.324301861386118, + "eval_runtime": 157.9262, + "eval_samples_per_second": 29.273, + "eval_steps_per_second": 1.83, + "step": 3000 + }, + { + "MSE": 641.9817254638668, + "MSE/layer0": 641.9817254638668, + "dead_code_fraction": 0.1511, + "dead_code_fraction/layer0": 0.1511, + "epoch": 0.3, + "input_norm": 31.99826343536376, + "input_norm/layer0": 31.99826343536376, + "learning_rate": 0.0005, + "loss": 2.3422, + "max_norm": 55.2226676940918, + "max_norm/layer0": 55.2226676940918, + "mean_norm": 39.23999786376953, + "mean_norm/layer0": 39.23999786376953, + "multicode_k": 1, + "output_norm": 17.350644410451252, + "output_norm/layer0": 17.350644410451252, + "step": 3050 + }, + { + "MSE": 641.9993333943689, + "MSE/layer0": 641.9993333943689, + "dead_code_fraction": 0.1504, + "dead_code_fraction/layer0": 0.1504, + "epoch": 0.31, + "input_norm": 31.998250141143807, + "input_norm/layer0": 31.998250141143807, + "learning_rate": 0.0005, + "loss": 2.2814, + "max_norm": 55.56163787841797, + "max_norm/layer0": 55.56163787841797, + "mean_norm": 39.33370780944824, + "mean_norm/layer0": 39.33370780944824, + "multicode_k": 1, + "output_norm": 17.39312816301982, + "output_norm/layer0": 17.39312816301982, + "step": 3100 + }, + { + "MSE": 641.5148900349936, + "MSE/layer0": 641.5148900349936, + "dead_code_fraction": 0.15185, + "dead_code_fraction/layer0": 0.15185, + "epoch": 0.32, + "input_norm": 31.998260364532467, + "input_norm/layer0": 31.998260364532467, + "learning_rate": 0.0005, + "loss": 2.3152, + "max_norm": 55.8856315612793, + "max_norm/layer0": 55.8856315612793, + "mean_norm": 39.42481803894043, + "mean_norm/layer0": 39.42481803894043, + "multicode_k": 1, + "output_norm": 17.44178107897441, + "output_norm/layer0": 17.44178107897441, + "step": 3150 + }, + { + "MSE": 640.499552408854, + "MSE/layer0": 640.499552408854, + "dead_code_fraction": 0.1516, + "dead_code_fraction/layer0": 0.1516, + "epoch": 0.32, + "input_norm": 31.99825292587281, + "input_norm/layer0": 31.99825292587281, + "learning_rate": 0.0005, + "loss": 2.2462, + "max_norm": 56.21445846557617, + "max_norm/layer0": 56.21445846557617, + "mean_norm": 39.51395606994629, + "mean_norm/layer0": 39.51395606994629, + "multicode_k": 1, + "output_norm": 17.50789775530497, + "output_norm/layer0": 17.50789775530497, + "step": 3200 + }, + { + "MSE": 640.565166829427, + "MSE/layer0": 640.565166829427, + "dead_code_fraction": 0.15285, + "dead_code_fraction/layer0": 0.15285, + "epoch": 0.33, + "input_norm": 31.998250306447353, + "input_norm/layer0": 31.998250306447353, + "learning_rate": 0.0005, + "loss": 2.2595, + "max_norm": 56.526973724365234, + "max_norm/layer0": 56.526973724365234, + "mean_norm": 39.601173400878906, + "mean_norm/layer0": 39.601173400878906, + "multicode_k": 1, + "output_norm": 17.54366443951924, + "output_norm/layer0": 17.54366443951924, + "step": 3250 + }, + { + "MSE": 640.8991118367509, + "MSE/layer0": 640.8991118367509, + "dead_code_fraction": 0.1531, + "dead_code_fraction/layer0": 0.1531, + "epoch": 0.33, + "input_norm": 31.998245798746755, + "input_norm/layer0": 31.998245798746755, + "learning_rate": 0.0005, + "loss": 2.2326, + "max_norm": 56.82651138305664, + "max_norm/layer0": 56.82651138305664, + "mean_norm": 39.684635162353516, + "mean_norm/layer0": 39.684635162353516, + "multicode_k": 1, + "output_norm": 17.578553660710664, + "output_norm/layer0": 17.578553660710664, + "step": 3300 + }, + { + "MSE": 640.486218770345, + "MSE/layer0": 640.486218770345, + "dead_code_fraction": 0.15345, + "dead_code_fraction/layer0": 0.15345, + "epoch": 0.34, + "input_norm": 31.998255780537924, + "input_norm/layer0": 31.998255780537924, + "learning_rate": 0.0005, + "loss": 2.2733, + "max_norm": 57.12877655029297, + "max_norm/layer0": 57.12877655029297, + "mean_norm": 39.76711463928223, + "mean_norm/layer0": 39.76711463928223, + "multicode_k": 1, + "output_norm": 17.619242086410516, + "output_norm/layer0": 17.619242086410516, + "step": 3350 + }, + { + "MSE": 639.5240251668292, + "MSE/layer0": 639.5240251668292, + "dead_code_fraction": 0.15565, + "dead_code_fraction/layer0": 0.15565, + "epoch": 0.34, + "input_norm": 31.998264500300095, + "input_norm/layer0": 31.998264500300095, + "learning_rate": 0.0005, + "loss": 2.2633, + "max_norm": 57.42041778564453, + "max_norm/layer0": 57.42041778564453, + "mean_norm": 39.84800338745117, + "mean_norm/layer0": 39.84800338745117, + "multicode_k": 1, + "output_norm": 17.667484652201342, + "output_norm/layer0": 17.667484652201342, + "step": 3400 + }, + { + "MSE": 639.2691174316408, + "MSE/layer0": 639.2691174316408, + "dead_code_fraction": 0.15605, + "dead_code_fraction/layer0": 0.15605, + "epoch": 0.34, + "input_norm": 31.99825723965962, + "input_norm/layer0": 31.99825723965962, + "learning_rate": 0.0005, + "loss": 2.2495, + "max_norm": 57.706260681152344, + "max_norm/layer0": 57.706260681152344, + "mean_norm": 39.92698097229004, + "mean_norm/layer0": 39.92698097229004, + "multicode_k": 1, + "output_norm": 17.705148900349947, + "output_norm/layer0": 17.705148900349947, + "step": 3450 + }, + { + "MSE": 639.3908192952478, + "MSE/layer0": 639.3908192952478, + "dead_code_fraction": 0.15655, + "dead_code_fraction/layer0": 0.15655, + "epoch": 0.35, + "input_norm": 31.9982618745168, + "input_norm/layer0": 31.9982618745168, + "learning_rate": 0.0005, + "loss": 2.2488, + "max_norm": 57.98209762573242, + "max_norm/layer0": 57.98209762573242, + "mean_norm": 40.005022048950195, + "mean_norm/layer0": 40.005022048950195, + "multicode_k": 1, + "output_norm": 17.73683495521545, + "output_norm/layer0": 17.73683495521545, + "step": 3500 + }, + { + "epoch": 0.35, + "eval_MSE/layer0": 640.3158307464355, + "eval_accuracy": 0.49451074349024987, + "eval_dead_code_fraction/layer0": 0.1575, + "eval_input_norm/layer0": 31.99825158244007, + "eval_loss": 2.2564537525177, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 17.756634140179678, + "eval_runtime": 157.599, + "eval_samples_per_second": 29.334, + "eval_steps_per_second": 1.834, + "step": 3500 + }, + { + "MSE": 639.6838141886391, + "MSE/layer0": 639.6838141886391, + "dead_code_fraction": 0.157, + "dead_code_fraction/layer0": 0.157, + "epoch": 0.35, + "input_norm": 31.99826737085978, + "input_norm/layer0": 31.99826737085978, + "learning_rate": 0.0005, + "loss": 2.2738, + "max_norm": 58.24713897705078, + "max_norm/layer0": 58.24713897705078, + "mean_norm": 40.08023262023926, + "mean_norm/layer0": 40.08023262023926, + "multicode_k": 1, + "output_norm": 17.755876312255864, + "output_norm/layer0": 17.755876312255864, + "step": 3550 + }, + { + "MSE": 639.2954257202149, + "MSE/layer0": 639.2954257202149, + "dead_code_fraction": 0.1559, + "dead_code_fraction/layer0": 0.1559, + "epoch": 0.36, + "input_norm": 31.998245531717938, + "input_norm/layer0": 31.998245531717938, + "learning_rate": 0.0005, + "loss": 2.2036, + "max_norm": 58.50635528564453, + "max_norm/layer0": 58.50635528564453, + "mean_norm": 40.15370178222656, + "mean_norm/layer0": 40.15370178222656, + "multicode_k": 1, + "output_norm": 17.812968953450515, + "output_norm/layer0": 17.812968953450515, + "step": 3600 + }, + { + "MSE": 639.3338773600263, + "MSE/layer0": 639.3338773600263, + "dead_code_fraction": 0.15905, + "dead_code_fraction/layer0": 0.15905, + "epoch": 0.36, + "input_norm": 31.99827084223429, + "input_norm/layer0": 31.99827084223429, + "learning_rate": 0.0005, + "loss": 2.2672, + "max_norm": 58.76622009277344, + "max_norm/layer0": 58.76622009277344, + "mean_norm": 40.22719192504883, + "mean_norm/layer0": 40.22719192504883, + "multicode_k": 1, + "output_norm": 17.821751413345332, + "output_norm/layer0": 17.821751413345332, + "step": 3650 + }, + { + "MSE": 639.0531684366863, + "MSE/layer0": 639.0531684366863, + "dead_code_fraction": 0.15975, + "dead_code_fraction/layer0": 0.15975, + "epoch": 0.37, + "input_norm": 31.99827636400858, + "input_norm/layer0": 31.99827636400858, + "learning_rate": 0.0005, + "loss": 2.2444, + "max_norm": 59.02393341064453, + "max_norm/layer0": 59.02393341064453, + "mean_norm": 40.298166275024414, + "mean_norm/layer0": 40.298166275024414, + "multicode_k": 1, + "output_norm": 17.85403926849365, + "output_norm/layer0": 17.85403926849365, + "step": 3700 + }, + { + "MSE": 638.9355230712894, + "MSE/layer0": 638.9355230712894, + "dead_code_fraction": 0.1605, + "dead_code_fraction/layer0": 0.1605, + "epoch": 0.38, + "input_norm": 31.99827863057454, + "input_norm/layer0": 31.99827863057454, + "learning_rate": 0.0005, + "loss": 2.2454, + "max_norm": 59.28853225708008, + "max_norm/layer0": 59.28853225708008, + "mean_norm": 40.36880111694336, + "mean_norm/layer0": 40.36880111694336, + "multicode_k": 1, + "output_norm": 17.88599282582601, + "output_norm/layer0": 17.88599282582601, + "step": 3750 + }, + { + "MSE": 639.0086972045899, + "MSE/layer0": 639.0086972045899, + "dead_code_fraction": 0.16125, + "dead_code_fraction/layer0": 0.16125, + "epoch": 0.38, + "input_norm": 31.9982850710551, + "input_norm/layer0": 31.9982850710551, + "learning_rate": 0.0005, + "loss": 2.27, + "max_norm": 59.546451568603516, + "max_norm/layer0": 59.546451568603516, + "mean_norm": 40.43776512145996, + "mean_norm/layer0": 40.43776512145996, + "multicode_k": 1, + "output_norm": 17.90943570454915, + "output_norm/layer0": 17.90943570454915, + "step": 3800 + }, + { + "MSE": 638.9462019856769, + "MSE/layer0": 638.9462019856769, + "dead_code_fraction": 0.1583, + "dead_code_fraction/layer0": 0.1583, + "epoch": 0.39, + "input_norm": 31.998278980255122, + "input_norm/layer0": 31.998278980255122, + "learning_rate": 0.0005, + "loss": 2.2438, + "max_norm": 59.80894470214844, + "max_norm/layer0": 59.80894470214844, + "mean_norm": 40.50556945800781, + "mean_norm/layer0": 40.50556945800781, + "multicode_k": 1, + "output_norm": 17.947645209630338, + "output_norm/layer0": 17.947645209630338, + "step": 3850 + }, + { + "MSE": 639.4130173746743, + "MSE/layer0": 639.4130173746743, + "dead_code_fraction": 0.16135, + "dead_code_fraction/layer0": 0.16135, + "epoch": 0.39, + "input_norm": 31.998284943898526, + "input_norm/layer0": 31.998284943898526, + "learning_rate": 0.0005, + "loss": 2.2526, + "max_norm": 60.04655075073242, + "max_norm/layer0": 60.04655075073242, + "mean_norm": 40.57136535644531, + "mean_norm/layer0": 40.57136535644531, + "multicode_k": 1, + "output_norm": 17.960218969980872, + "output_norm/layer0": 17.960218969980872, + "step": 3900 + }, + { + "MSE": 639.8756245930986, + "MSE/layer0": 639.8756245930986, + "dead_code_fraction": 0.15755, + "dead_code_fraction/layer0": 0.15755, + "epoch": 0.4, + "input_norm": 31.998285398483272, + "input_norm/layer0": 31.998285398483272, + "learning_rate": 0.0005, + "loss": 2.2266, + "max_norm": 60.29011154174805, + "max_norm/layer0": 60.29011154174805, + "mean_norm": 40.63625144958496, + "mean_norm/layer0": 40.63625144958496, + "multicode_k": 1, + "output_norm": 17.97526204744974, + "output_norm/layer0": 17.97526204744974, + "step": 3950 + }, + { + "MSE": 640.046054585775, + "MSE/layer0": 640.046054585775, + "dead_code_fraction": 0.1605, + "dead_code_fraction/layer0": 0.1605, + "epoch": 0.4, + "input_norm": 31.998285433451336, + "input_norm/layer0": 31.998285433451336, + "learning_rate": 0.0005, + "loss": 2.2287, + "max_norm": 60.52168655395508, + "max_norm/layer0": 60.52168655395508, + "mean_norm": 40.698753356933594, + "mean_norm/layer0": 40.698753356933594, + "multicode_k": 1, + "output_norm": 17.997498016357426, + "output_norm/layer0": 17.997498016357426, + "step": 4000 + }, + { + "epoch": 0.4, + "eval_MSE/layer0": 638.8422855589264, + "eval_accuracy": 0.49670513512593434, + "eval_dead_code_fraction/layer0": 0.16135, + "eval_input_norm/layer0": 31.99827300782795, + "eval_loss": 2.2332887649536133, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.022313365115252, + "eval_runtime": 158.1975, + "eval_samples_per_second": 29.223, + "eval_steps_per_second": 1.827, + "step": 4000 + }, + { + "MSE": 639.952128804525, + "MSE/layer0": 639.952128804525, + "dead_code_fraction": 0.16035, + "dead_code_fraction/layer0": 0.16035, + "epoch": 0.41, + "input_norm": 31.998286927541105, + "input_norm/layer0": 31.998286927541105, + "learning_rate": 0.0005, + "loss": 2.2193, + "max_norm": 60.76009750366211, + "max_norm/layer0": 60.76009750366211, + "mean_norm": 40.75992393493652, + "mean_norm/layer0": 40.75992393493652, + "multicode_k": 1, + "output_norm": 18.024092137018826, + "output_norm/layer0": 18.024092137018826, + "step": 4050 + }, + { + "MSE": 640.5730131022133, + "MSE/layer0": 640.5730131022133, + "dead_code_fraction": 0.1634, + "dead_code_fraction/layer0": 0.1634, + "epoch": 0.41, + "input_norm": 31.99828769365946, + "input_norm/layer0": 31.99828769365946, + "learning_rate": 0.0005, + "loss": 2.2301, + "max_norm": 60.98118591308594, + "max_norm/layer0": 60.98118591308594, + "mean_norm": 40.8208122253418, + "mean_norm/layer0": 40.8208122253418, + "multicode_k": 1, + "output_norm": 18.02807092984518, + "output_norm/layer0": 18.02807092984518, + "step": 4100 + }, + { + "MSE": 640.4258350626628, + "MSE/layer0": 640.4258350626628, + "dead_code_fraction": 0.1612, + "dead_code_fraction/layer0": 0.1612, + "epoch": 0.41, + "input_norm": 31.998297268549607, + "input_norm/layer0": 31.998297268549607, + "learning_rate": 0.0005, + "loss": 2.2307, + "max_norm": 61.19542694091797, + "max_norm/layer0": 61.19542694091797, + "mean_norm": 40.88128852844238, + "mean_norm/layer0": 40.88128852844238, + "multicode_k": 1, + "output_norm": 18.04158842404684, + "output_norm/layer0": 18.04158842404684, + "step": 4150 + }, + { + "MSE": 639.5022987874349, + "MSE/layer0": 639.5022987874349, + "dead_code_fraction": 0.16015, + "dead_code_fraction/layer0": 0.16015, + "epoch": 0.42, + "input_norm": 31.99830362319948, + "input_norm/layer0": 31.99830362319948, + "learning_rate": 0.0005, + "loss": 2.247, + "max_norm": 61.4282341003418, + "max_norm/layer0": 61.4282341003418, + "mean_norm": 40.941017150878906, + "mean_norm/layer0": 40.941017150878906, + "multicode_k": 1, + "output_norm": 18.079462760289516, + "output_norm/layer0": 18.079462760289516, + "step": 4200 + }, + { + "MSE": 640.0252755737306, + "MSE/layer0": 640.0252755737306, + "dead_code_fraction": 0.1604, + "dead_code_fraction/layer0": 0.1604, + "epoch": 0.42, + "input_norm": 31.99830138524374, + "input_norm/layer0": 31.99830138524374, + "learning_rate": 0.0005, + "loss": 2.2314, + "max_norm": 61.648414611816406, + "max_norm/layer0": 61.648414611816406, + "mean_norm": 40.99977684020996, + "mean_norm/layer0": 40.99977684020996, + "multicode_k": 1, + "output_norm": 18.09024664878845, + "output_norm/layer0": 18.09024664878845, + "step": 4250 + }, + { + "MSE": 639.7621870930992, + "MSE/layer0": 639.7621870930992, + "dead_code_fraction": 0.16365, + "dead_code_fraction/layer0": 0.16365, + "epoch": 0.43, + "input_norm": 31.99830169359842, + "input_norm/layer0": 31.99830169359842, + "learning_rate": 0.0005, + "loss": 2.2144, + "max_norm": 61.86562728881836, + "max_norm/layer0": 61.86562728881836, + "mean_norm": 41.05688667297363, + "mean_norm/layer0": 41.05688667297363, + "multicode_k": 1, + "output_norm": 18.11899041493734, + "output_norm/layer0": 18.11899041493734, + "step": 4300 + }, + { + "MSE": 640.3955947875975, + "MSE/layer0": 640.3955947875975, + "dead_code_fraction": 0.1592, + "dead_code_fraction/layer0": 0.1592, + "epoch": 0.43, + "input_norm": 31.998302787144976, + "input_norm/layer0": 31.998302787144976, + "learning_rate": 0.0005, + "loss": 2.2077, + "max_norm": 62.060550689697266, + "max_norm/layer0": 62.060550689697266, + "mean_norm": 41.11246681213379, + "mean_norm/layer0": 41.11246681213379, + "multicode_k": 1, + "output_norm": 18.121066271464024, + "output_norm/layer0": 18.121066271464024, + "step": 4350 + }, + { + "MSE": 639.8066222127281, + "MSE/layer0": 639.8066222127281, + "dead_code_fraction": 0.1635, + "dead_code_fraction/layer0": 0.1635, + "epoch": 0.44, + "input_norm": 31.998314228057872, + "input_norm/layer0": 31.998314228057872, + "learning_rate": 0.0005, + "loss": 2.2287, + "max_norm": 62.275943756103516, + "max_norm/layer0": 62.275943756103516, + "mean_norm": 41.167396545410156, + "mean_norm/layer0": 41.167396545410156, + "multicode_k": 1, + "output_norm": 18.142933632532753, + "output_norm/layer0": 18.142933632532753, + "step": 4400 + }, + { + "MSE": 639.8160334269206, + "MSE/layer0": 639.8160334269206, + "dead_code_fraction": 0.16385, + "dead_code_fraction/layer0": 0.16385, + "epoch": 0.45, + "input_norm": 31.99831516901653, + "input_norm/layer0": 31.99831516901653, + "learning_rate": 0.0005, + "loss": 2.215, + "max_norm": 62.486793518066406, + "max_norm/layer0": 62.486793518066406, + "mean_norm": 41.221702575683594, + "mean_norm/layer0": 41.221702575683594, + "multicode_k": 1, + "output_norm": 18.167670075098677, + "output_norm/layer0": 18.167670075098677, + "step": 4450 + }, + { + "MSE": 640.1416244506836, + "MSE/layer0": 640.1416244506836, + "dead_code_fraction": 0.16675, + "dead_code_fraction/layer0": 0.16675, + "epoch": 0.45, + "input_norm": 31.998327512741074, + "input_norm/layer0": 31.998327512741074, + "learning_rate": 0.0005, + "loss": 2.2576, + "max_norm": 62.67790222167969, + "max_norm/layer0": 62.67790222167969, + "mean_norm": 41.275705337524414, + "mean_norm/layer0": 41.275705337524414, + "multicode_k": 1, + "output_norm": 18.162402251561495, + "output_norm/layer0": 18.162402251561495, + "step": 4500 + }, + { + "epoch": 0.45, + "eval_MSE/layer0": 639.7464034476376, + "eval_accuracy": 0.49916912103175737, + "eval_dead_code_fraction/layer0": 0.16755, + "eval_input_norm/layer0": 31.998309449821527, + "eval_loss": 2.215489387512207, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.191884751910905, + "eval_runtime": 157.9108, + "eval_samples_per_second": 29.276, + "eval_steps_per_second": 1.83, + "step": 4500 + }, + { + "MSE": 640.4858755493162, + "MSE/layer0": 640.4858755493162, + "dead_code_fraction": 0.1633, + "dead_code_fraction/layer0": 0.1633, + "epoch": 0.46, + "input_norm": 31.99831475257874, + "input_norm/layer0": 31.99831475257874, + "learning_rate": 0.0005, + "loss": 2.1869, + "max_norm": 62.88029861450195, + "max_norm/layer0": 62.88029861450195, + "mean_norm": 41.32845115661621, + "mean_norm/layer0": 41.32845115661621, + "multicode_k": 1, + "output_norm": 18.18791744550069, + "output_norm/layer0": 18.18791744550069, + "step": 4550 + }, + { + "MSE": 640.7411174519859, + "MSE/layer0": 640.7411174519859, + "dead_code_fraction": 0.16375, + "dead_code_fraction/layer0": 0.16375, + "epoch": 0.46, + "input_norm": 31.998337395985924, + "input_norm/layer0": 31.998337395985924, + "learning_rate": 0.0005, + "loss": 2.2426, + "max_norm": 63.06687545776367, + "max_norm/layer0": 63.06687545776367, + "mean_norm": 41.38063049316406, + "mean_norm/layer0": 41.38063049316406, + "multicode_k": 1, + "output_norm": 18.185693721771244, + "output_norm/layer0": 18.185693721771244, + "step": 4600 + }, + { + "MSE": 640.3254055786131, + "MSE/layer0": 640.3254055786131, + "dead_code_fraction": 0.1637, + "dead_code_fraction/layer0": 0.1637, + "epoch": 0.47, + "input_norm": 31.998331034978236, + "input_norm/layer0": 31.998331034978236, + "learning_rate": 0.0005, + "loss": 2.2103, + "max_norm": 63.24494171142578, + "max_norm/layer0": 63.24494171142578, + "mean_norm": 41.4316463470459, + "mean_norm/layer0": 41.4316463470459, + "multicode_k": 1, + "output_norm": 18.215761318206788, + "output_norm/layer0": 18.215761318206788, + "step": 4650 + }, + { + "MSE": 640.0117889404299, + "MSE/layer0": 640.0117889404299, + "dead_code_fraction": 0.1653, + "dead_code_fraction/layer0": 0.1653, + "epoch": 0.47, + "input_norm": 31.998331683476753, + "input_norm/layer0": 31.998331683476753, + "learning_rate": 0.0005, + "loss": 2.189, + "max_norm": 63.429969787597656, + "max_norm/layer0": 63.429969787597656, + "mean_norm": 41.481590270996094, + "mean_norm/layer0": 41.481590270996094, + "multicode_k": 1, + "output_norm": 18.22781534512837, + "output_norm/layer0": 18.22781534512837, + "step": 4700 + }, + { + "MSE": 640.034366455078, + "MSE/layer0": 640.034366455078, + "dead_code_fraction": 0.16355, + "dead_code_fraction/layer0": 0.16355, + "epoch": 0.47, + "input_norm": 31.998335037231442, + "input_norm/layer0": 31.998335037231442, + "learning_rate": 0.0005, + "loss": 2.1746, + "max_norm": 63.604644775390625, + "max_norm/layer0": 63.604644775390625, + "mean_norm": 41.530447006225586, + "mean_norm/layer0": 41.530447006225586, + "multicode_k": 1, + "output_norm": 18.247568238576257, + "output_norm/layer0": 18.247568238576257, + "step": 4750 + }, + { + "MSE": 641.3402144411094, + "MSE/layer0": 641.3402144411094, + "dead_code_fraction": 0.16465, + "dead_code_fraction/layer0": 0.16465, + "epoch": 1.0, + "input_norm": 31.998328861016873, + "input_norm/layer0": 31.998328861016873, + "learning_rate": 0.0005, + "loss": 2.1589, + "max_norm": 63.7794303894043, + "max_norm/layer0": 63.7794303894043, + "mean_norm": 41.577613830566406, + "mean_norm/layer0": 41.577613830566406, + "multicode_k": 1, + "output_norm": 18.227145007068557, + "output_norm/layer0": 18.227145007068557, + "step": 4800 + }, + { + "MSE": 640.0454110717772, + "MSE/layer0": 640.0454110717772, + "dead_code_fraction": 0.16635, + "dead_code_fraction/layer0": 0.16635, + "epoch": 1.01, + "input_norm": 31.998361120224008, + "input_norm/layer0": 31.998361120224008, + "learning_rate": 0.0005, + "loss": 2.2585, + "max_norm": 63.96126937866211, + "max_norm/layer0": 63.96126937866211, + "mean_norm": 41.62501525878906, + "mean_norm/layer0": 41.62501525878906, + "multicode_k": 1, + "output_norm": 18.258941303888953, + "output_norm/layer0": 18.258941303888953, + "step": 4850 + }, + { + "MSE": 640.0055624389651, + "MSE/layer0": 640.0055624389651, + "dead_code_fraction": 0.16515, + "dead_code_fraction/layer0": 0.16515, + "epoch": 1.01, + "input_norm": 31.998340495427446, + "input_norm/layer0": 31.998340495427446, + "learning_rate": 0.0005, + "loss": 2.1578, + "max_norm": 64.13137817382812, + "max_norm/layer0": 64.13137817382812, + "mean_norm": 41.672542572021484, + "mean_norm/layer0": 41.672542572021484, + "multicode_k": 1, + "output_norm": 18.272732003529867, + "output_norm/layer0": 18.272732003529867, + "step": 4900 + }, + { + "MSE": 640.108183898926, + "MSE/layer0": 640.108183898926, + "dead_code_fraction": 0.1668, + "dead_code_fraction/layer0": 0.1668, + "epoch": 1.02, + "input_norm": 31.998351519902535, + "input_norm/layer0": 31.998351519902535, + "learning_rate": 0.0005, + "loss": 2.1809, + "max_norm": 64.30120086669922, + "max_norm/layer0": 64.30120086669922, + "mean_norm": 41.71914291381836, + "mean_norm/layer0": 41.71914291381836, + "multicode_k": 1, + "output_norm": 18.278290322621658, + "output_norm/layer0": 18.278290322621658, + "step": 4950 + }, + { + "MSE": 639.8438139851887, + "MSE/layer0": 639.8438139851887, + "dead_code_fraction": 0.1671, + "dead_code_fraction/layer0": 0.1671, + "epoch": 1.02, + "input_norm": 31.998358796437586, + "input_norm/layer0": 31.998358796437586, + "learning_rate": 0.0005, + "loss": 2.1901, + "max_norm": 64.4720230102539, + "max_norm/layer0": 64.4720230102539, + "mean_norm": 41.76571464538574, + "mean_norm/layer0": 41.76571464538574, + "multicode_k": 1, + "output_norm": 18.29636260350546, + "output_norm/layer0": 18.29636260350546, + "step": 5000 + }, + { + "epoch": 1.02, + "eval_MSE/layer0": 638.1766108092672, + "eval_accuracy": 0.5013711247409516, + "eval_dead_code_fraction/layer0": 0.16955, + "eval_input_norm/layer0": 31.99836045128427, + "eval_loss": 2.202561616897583, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.311866774487846, + "eval_runtime": 158.3836, + "eval_samples_per_second": 29.189, + "eval_steps_per_second": 1.825, + "step": 5000 + }, + { + "MSE": 639.5863418579103, + "MSE/layer0": 639.5863418579103, + "dead_code_fraction": 0.1675, + "dead_code_fraction/layer0": 0.1675, + "epoch": 1.03, + "input_norm": 31.99836014429728, + "input_norm/layer0": 31.99836014429728, + "learning_rate": 0.0005, + "loss": 2.1914, + "max_norm": 64.65907287597656, + "max_norm/layer0": 64.65907287597656, + "mean_norm": 41.8120174407959, + "mean_norm/layer0": 41.8120174407959, + "multicode_k": 1, + "output_norm": 18.301887426376346, + "output_norm/layer0": 18.301887426376346, + "step": 5050 + }, + { + "MSE": 639.5830181884764, + "MSE/layer0": 639.5830181884764, + "dead_code_fraction": 0.16545, + "dead_code_fraction/layer0": 0.16545, + "epoch": 1.03, + "input_norm": 31.998363596598292, + "input_norm/layer0": 31.998363596598292, + "learning_rate": 0.0005, + "loss": 2.1503, + "max_norm": 64.83207702636719, + "max_norm/layer0": 64.83207702636719, + "mean_norm": 41.85700988769531, + "mean_norm/layer0": 41.85700988769531, + "multicode_k": 1, + "output_norm": 18.3204355875651, + "output_norm/layer0": 18.3204355875651, + "step": 5100 + }, + { + "MSE": 640.3749603271485, + "MSE/layer0": 640.3749603271485, + "dead_code_fraction": 0.16725, + "dead_code_fraction/layer0": 0.16725, + "epoch": 1.04, + "input_norm": 31.9983703358968, + "input_norm/layer0": 31.9983703358968, + "learning_rate": 0.0005, + "loss": 2.1634, + "max_norm": 65.003662109375, + "max_norm/layer0": 65.003662109375, + "mean_norm": 41.90180778503418, + "mean_norm/layer0": 41.90180778503418, + "multicode_k": 1, + "output_norm": 18.316434319814057, + "output_norm/layer0": 18.316434319814057, + "step": 5150 + }, + { + "MSE": 639.0211893717446, + "MSE/layer0": 639.0211893717446, + "dead_code_fraction": 0.16875, + "dead_code_fraction/layer0": 0.16875, + "epoch": 1.04, + "input_norm": 31.998389561971024, + "input_norm/layer0": 31.998389561971024, + "learning_rate": 0.0005, + "loss": 2.224, + "max_norm": 65.19213104248047, + "max_norm/layer0": 65.19213104248047, + "mean_norm": 41.94645309448242, + "mean_norm/layer0": 41.94645309448242, + "multicode_k": 1, + "output_norm": 18.33804360071819, + "output_norm/layer0": 18.33804360071819, + "step": 5200 + }, + { + "MSE": 638.6207899983721, + "MSE/layer0": 638.6207899983721, + "dead_code_fraction": 0.17055, + "dead_code_fraction/layer0": 0.17055, + "epoch": 1.05, + "input_norm": 31.998394203186038, + "input_norm/layer0": 31.998394203186038, + "learning_rate": 0.0005, + "loss": 2.2235, + "max_norm": 65.36846160888672, + "max_norm/layer0": 65.36846160888672, + "mean_norm": 41.991315841674805, + "mean_norm/layer0": 41.991315841674805, + "multicode_k": 1, + "output_norm": 18.346421286265045, + "output_norm/layer0": 18.346421286265045, + "step": 5250 + }, + { + "MSE": 638.3484961954751, + "MSE/layer0": 638.3484961954751, + "dead_code_fraction": 0.1704, + "dead_code_fraction/layer0": 0.1704, + "epoch": 1.05, + "input_norm": 31.998402004241942, + "input_norm/layer0": 31.998402004241942, + "learning_rate": 0.0005, + "loss": 2.209, + "max_norm": 65.53041076660156, + "max_norm/layer0": 65.53041076660156, + "mean_norm": 42.0357780456543, + "mean_norm/layer0": 42.0357780456543, + "multicode_k": 1, + "output_norm": 18.351918992996215, + "output_norm/layer0": 18.351918992996215, + "step": 5300 + }, + { + "MSE": 638.9349023437496, + "MSE/layer0": 638.9349023437496, + "dead_code_fraction": 0.1671, + "dead_code_fraction/layer0": 0.1671, + "epoch": 1.06, + "input_norm": 31.998392171859756, + "input_norm/layer0": 31.998392171859756, + "learning_rate": 0.0005, + "loss": 2.1737, + "max_norm": 65.69444274902344, + "max_norm/layer0": 65.69444274902344, + "mean_norm": 42.078935623168945, + "mean_norm/layer0": 42.078935623168945, + "multicode_k": 1, + "output_norm": 18.365610707600908, + "output_norm/layer0": 18.365610707600908, + "step": 5350 + }, + { + "MSE": 638.1850768025716, + "MSE/layer0": 638.1850768025716, + "dead_code_fraction": 0.17125, + "dead_code_fraction/layer0": 0.17125, + "epoch": 1.06, + "input_norm": 31.99840373039246, + "input_norm/layer0": 31.99840373039246, + "learning_rate": 0.0005, + "loss": 2.1904, + "max_norm": 65.84613037109375, + "max_norm/layer0": 65.84613037109375, + "mean_norm": 42.122589111328125, + "mean_norm/layer0": 42.122589111328125, + "multicode_k": 1, + "output_norm": 18.371175734202062, + "output_norm/layer0": 18.371175734202062, + "step": 5400 + }, + { + "MSE": 637.5771400960282, + "MSE/layer0": 637.5771400960282, + "dead_code_fraction": 0.17005, + "dead_code_fraction/layer0": 0.17005, + "epoch": 1.07, + "input_norm": 31.998408838907892, + "input_norm/layer0": 31.998408838907892, + "learning_rate": 0.0005, + "loss": 2.2013, + "max_norm": 66.00259399414062, + "max_norm/layer0": 66.00259399414062, + "mean_norm": 42.16551399230957, + "mean_norm/layer0": 42.16551399230957, + "multicode_k": 1, + "output_norm": 18.396056934992465, + "output_norm/layer0": 18.396056934992465, + "step": 5450 + }, + { + "MSE": 637.4973764038084, + "MSE/layer0": 637.4973764038084, + "dead_code_fraction": 0.17135, + "dead_code_fraction/layer0": 0.17135, + "epoch": 1.07, + "input_norm": 31.998402996063238, + "input_norm/layer0": 31.998402996063238, + "learning_rate": 0.0005, + "loss": 2.1686, + "max_norm": 66.15951538085938, + "max_norm/layer0": 66.15951538085938, + "mean_norm": 42.207963943481445, + "mean_norm/layer0": 42.207963943481445, + "multicode_k": 1, + "output_norm": 18.402882191340133, + "output_norm/layer0": 18.402882191340133, + "step": 5500 + }, + { + "epoch": 1.07, + "eval_MSE/layer0": 638.6084431543663, + "eval_accuracy": 0.5026125270625071, + "eval_dead_code_fraction/layer0": 0.17165, + "eval_input_norm/layer0": 31.99841410479916, + "eval_loss": 2.1934523582458496, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.401259186926417, + "eval_runtime": 158.4926, + "eval_samples_per_second": 29.169, + "eval_steps_per_second": 1.823, + "step": 5500 + }, + { + "MSE": 637.3816906738282, + "MSE/layer0": 637.3816906738282, + "dead_code_fraction": 0.17125, + "dead_code_fraction/layer0": 0.17125, + "epoch": 1.08, + "input_norm": 31.998415158589676, + "input_norm/layer0": 31.998415158589676, + "learning_rate": 0.0005, + "loss": 2.2097, + "max_norm": 66.32366180419922, + "max_norm/layer0": 66.32366180419922, + "mean_norm": 42.25027084350586, + "mean_norm/layer0": 42.25027084350586, + "multicode_k": 1, + "output_norm": 18.40568763732911, + "output_norm/layer0": 18.40568763732911, + "step": 5550 + }, + { + "MSE": 636.5928268432615, + "MSE/layer0": 636.5928268432615, + "dead_code_fraction": 0.1711, + "dead_code_fraction/layer0": 0.1711, + "epoch": 1.08, + "input_norm": 31.99841807047526, + "input_norm/layer0": 31.99841807047526, + "learning_rate": 0.0005, + "loss": 2.1987, + "max_norm": 66.49840545654297, + "max_norm/layer0": 66.49840545654297, + "mean_norm": 42.29284858703613, + "mean_norm/layer0": 42.29284858703613, + "multicode_k": 1, + "output_norm": 18.424939454396565, + "output_norm/layer0": 18.424939454396565, + "step": 5600 + }, + { + "MSE": 637.195534973145, + "MSE/layer0": 637.195534973145, + "dead_code_fraction": 0.17175, + "dead_code_fraction/layer0": 0.17175, + "epoch": 1.09, + "input_norm": 31.99841377894082, + "input_norm/layer0": 31.99841377894082, + "learning_rate": 0.0005, + "loss": 2.1571, + "max_norm": 66.6655502319336, + "max_norm/layer0": 66.6655502319336, + "mean_norm": 42.33401679992676, + "mean_norm/layer0": 42.33401679992676, + "multicode_k": 1, + "output_norm": 18.427337226867675, + "output_norm/layer0": 18.427337226867675, + "step": 5650 + }, + { + "MSE": 635.8865025838217, + "MSE/layer0": 635.8865025838217, + "dead_code_fraction": 0.1736, + "dead_code_fraction/layer0": 0.1736, + "epoch": 1.09, + "input_norm": 31.998435058593753, + "input_norm/layer0": 31.998435058593753, + "learning_rate": 0.0005, + "loss": 2.2146, + "max_norm": 66.82868957519531, + "max_norm/layer0": 66.82868957519531, + "mean_norm": 42.37582206726074, + "mean_norm/layer0": 42.37582206726074, + "multicode_k": 1, + "output_norm": 18.443573204676298, + "output_norm/layer0": 18.443573204676298, + "step": 5700 + }, + { + "MSE": 636.1581252034503, + "MSE/layer0": 636.1581252034503, + "dead_code_fraction": 0.17225, + "dead_code_fraction/layer0": 0.17225, + "epoch": 1.1, + "input_norm": 31.998433354695635, + "input_norm/layer0": 31.998433354695635, + "learning_rate": 0.0005, + "loss": 2.171, + "max_norm": 66.9796371459961, + "max_norm/layer0": 66.9796371459961, + "mean_norm": 42.41728591918945, + "mean_norm/layer0": 42.41728591918945, + "multicode_k": 1, + "output_norm": 18.440257479349775, + "output_norm/layer0": 18.440257479349775, + "step": 5750 + }, + { + "MSE": 636.7286339314779, + "MSE/layer0": 636.7286339314779, + "dead_code_fraction": 0.1738, + "dead_code_fraction/layer0": 0.1738, + "epoch": 1.1, + "input_norm": 31.998429416020713, + "input_norm/layer0": 31.998429416020713, + "learning_rate": 0.0005, + "loss": 2.1502, + "max_norm": 67.13478088378906, + "max_norm/layer0": 67.13478088378906, + "mean_norm": 42.45817756652832, + "mean_norm/layer0": 42.45817756652832, + "multicode_k": 1, + "output_norm": 18.442232058842986, + "output_norm/layer0": 18.442232058842986, + "step": 5800 + }, + { + "MSE": 635.2576449584958, + "MSE/layer0": 635.2576449584958, + "dead_code_fraction": 0.17405, + "dead_code_fraction/layer0": 0.17405, + "epoch": 1.11, + "input_norm": 31.99844219843547, + "input_norm/layer0": 31.99844219843547, + "learning_rate": 0.0005, + "loss": 2.2067, + "max_norm": 67.28919982910156, + "max_norm/layer0": 67.28919982910156, + "mean_norm": 42.49948501586914, + "mean_norm/layer0": 42.49948501586914, + "multicode_k": 1, + "output_norm": 18.46717386881511, + "output_norm/layer0": 18.46717386881511, + "step": 5850 + }, + { + "MSE": 636.0759664916989, + "MSE/layer0": 636.0759664916989, + "dead_code_fraction": 0.17355, + "dead_code_fraction/layer0": 0.17355, + "epoch": 1.11, + "input_norm": 31.998439470926915, + "input_norm/layer0": 31.998439470926915, + "learning_rate": 0.0005, + "loss": 2.1543, + "max_norm": 67.44383239746094, + "max_norm/layer0": 67.44383239746094, + "mean_norm": 42.53946495056152, + "mean_norm/layer0": 42.53946495056152, + "multicode_k": 1, + "output_norm": 18.469777971903483, + "output_norm/layer0": 18.469777971903483, + "step": 5900 + }, + { + "MSE": 635.3813305664057, + "MSE/layer0": 635.3813305664057, + "dead_code_fraction": 0.17405, + "dead_code_fraction/layer0": 0.17405, + "epoch": 1.12, + "input_norm": 31.99844372113545, + "input_norm/layer0": 31.99844372113545, + "learning_rate": 0.0005, + "loss": 2.1846, + "max_norm": 67.59025573730469, + "max_norm/layer0": 67.59025573730469, + "mean_norm": 42.58071327209473, + "mean_norm/layer0": 42.58071327209473, + "multicode_k": 1, + "output_norm": 18.477715517679847, + "output_norm/layer0": 18.477715517679847, + "step": 5950 + }, + { + "MSE": 634.5524212646484, + "MSE/layer0": 634.5524212646484, + "dead_code_fraction": 0.17535, + "dead_code_fraction/layer0": 0.17535, + "epoch": 1.12, + "input_norm": 31.998457225163776, + "input_norm/layer0": 31.998457225163776, + "learning_rate": 0.0005, + "loss": 2.2158, + "max_norm": 67.7379379272461, + "max_norm/layer0": 67.7379379272461, + "mean_norm": 42.62178421020508, + "mean_norm/layer0": 42.62178421020508, + "multicode_k": 1, + "output_norm": 18.489366165796913, + "output_norm/layer0": 18.489366165796913, + "step": 6000 + }, + { + "epoch": 1.12, + "eval_MSE/layer0": 632.9325560995336, + "eval_accuracy": 0.5036799089257694, + "eval_dead_code_fraction/layer0": 0.17795, + "eval_input_norm/layer0": 31.998461353451354, + "eval_loss": 2.1832942962646484, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.51493810096293, + "eval_runtime": 158.8489, + "eval_samples_per_second": 29.103, + "eval_steps_per_second": 1.819, + "step": 6000 + }, + { + "MSE": 634.7784757486979, + "MSE/layer0": 634.7784757486979, + "dead_code_fraction": 0.1755, + "dead_code_fraction/layer0": 0.1755, + "epoch": 1.13, + "input_norm": 31.99845712025961, + "input_norm/layer0": 31.99845712025961, + "learning_rate": 0.0005, + "loss": 2.1789, + "max_norm": 67.8902816772461, + "max_norm/layer0": 67.8902816772461, + "mean_norm": 42.66269874572754, + "mean_norm/layer0": 42.66269874572754, + "multicode_k": 1, + "output_norm": 18.49625307718913, + "output_norm/layer0": 18.49625307718913, + "step": 6050 + }, + { + "MSE": 634.5078458658851, + "MSE/layer0": 634.5078458658851, + "dead_code_fraction": 0.17445, + "dead_code_fraction/layer0": 0.17445, + "epoch": 1.13, + "input_norm": 31.99845917383831, + "input_norm/layer0": 31.99845917383831, + "learning_rate": 0.0005, + "loss": 2.2009, + "max_norm": 68.04124450683594, + "max_norm/layer0": 68.04124450683594, + "mean_norm": 42.70250701904297, + "mean_norm/layer0": 42.70250701904297, + "multicode_k": 1, + "output_norm": 18.514623686472582, + "output_norm/layer0": 18.514623686472582, + "step": 6100 + }, + { + "MSE": 634.443066914876, + "MSE/layer0": 634.443066914876, + "dead_code_fraction": 0.17575, + "dead_code_fraction/layer0": 0.17575, + "epoch": 1.14, + "input_norm": 31.99845913887024, + "input_norm/layer0": 31.99845913887024, + "learning_rate": 0.0005, + "loss": 2.1623, + "max_norm": 68.17865753173828, + "max_norm/layer0": 68.17865753173828, + "mean_norm": 42.742488861083984, + "mean_norm/layer0": 42.742488861083984, + "multicode_k": 1, + "output_norm": 18.513023862838743, + "output_norm/layer0": 18.513023862838743, + "step": 6150 + }, + { + "MSE": 633.6522382609048, + "MSE/layer0": 633.6522382609048, + "dead_code_fraction": 0.17475, + "dead_code_fraction/layer0": 0.17475, + "epoch": 1.14, + "input_norm": 31.998471844991045, + "input_norm/layer0": 31.998471844991045, + "learning_rate": 0.0005, + "loss": 2.1824, + "max_norm": 68.31253051757812, + "max_norm/layer0": 68.31253051757812, + "mean_norm": 42.782148361206055, + "mean_norm/layer0": 42.782148361206055, + "multicode_k": 1, + "output_norm": 18.529316590627033, + "output_norm/layer0": 18.529316590627033, + "step": 6200 + }, + { + "MSE": 634.0474910481774, + "MSE/layer0": 634.0474910481774, + "dead_code_fraction": 0.1771, + "dead_code_fraction/layer0": 0.1771, + "epoch": 1.15, + "input_norm": 31.998480736414585, + "input_norm/layer0": 31.998480736414585, + "learning_rate": 0.0005, + "loss": 2.1948, + "max_norm": 68.44271850585938, + "max_norm/layer0": 68.44271850585938, + "mean_norm": 42.82079887390137, + "mean_norm/layer0": 42.82079887390137, + "multicode_k": 1, + "output_norm": 18.524528849919633, + "output_norm/layer0": 18.524528849919633, + "step": 6250 + }, + { + "MSE": 633.648407084147, + "MSE/layer0": 633.648407084147, + "dead_code_fraction": 0.1745, + "dead_code_fraction/layer0": 0.1745, + "epoch": 1.15, + "input_norm": 31.998468182881673, + "input_norm/layer0": 31.998468182881673, + "learning_rate": 0.0005, + "loss": 2.1145, + "max_norm": 68.57721710205078, + "max_norm/layer0": 68.57721710205078, + "mean_norm": 42.859825134277344, + "mean_norm/layer0": 42.859825134277344, + "multicode_k": 1, + "output_norm": 18.540853935877482, + "output_norm/layer0": 18.540853935877482, + "step": 6300 + }, + { + "MSE": 633.5945191446937, + "MSE/layer0": 633.5945191446937, + "dead_code_fraction": 0.17705, + "dead_code_fraction/layer0": 0.17705, + "epoch": 1.16, + "input_norm": 31.99847273508707, + "input_norm/layer0": 31.99847273508707, + "learning_rate": 0.0005, + "loss": 2.1507, + "max_norm": 68.7186050415039, + "max_norm/layer0": 68.7186050415039, + "mean_norm": 42.897830963134766, + "mean_norm/layer0": 42.897830963134766, + "multicode_k": 1, + "output_norm": 18.55124579429626, + "output_norm/layer0": 18.55124579429626, + "step": 6350 + }, + { + "MSE": 632.1478841145836, + "MSE/layer0": 632.1478841145836, + "dead_code_fraction": 0.1775, + "dead_code_fraction/layer0": 0.1775, + "epoch": 1.16, + "input_norm": 31.9984964243571, + "input_norm/layer0": 31.9984964243571, + "learning_rate": 0.0005, + "loss": 2.1962, + "max_norm": 68.85418701171875, + "max_norm/layer0": 68.85418701171875, + "mean_norm": 42.937448501586914, + "mean_norm/layer0": 42.937448501586914, + "multicode_k": 1, + "output_norm": 18.5615934785207, + "output_norm/layer0": 18.5615934785207, + "step": 6400 + }, + { + "MSE": 632.57952931722, + "MSE/layer0": 632.57952931722, + "dead_code_fraction": 0.1777, + "dead_code_fraction/layer0": 0.1777, + "epoch": 1.17, + "input_norm": 31.998487294514977, + "input_norm/layer0": 31.998487294514977, + "learning_rate": 0.0005, + "loss": 2.1627, + "max_norm": 69.0008316040039, + "max_norm/layer0": 69.0008316040039, + "mean_norm": 42.97622108459473, + "mean_norm/layer0": 42.97622108459473, + "multicode_k": 1, + "output_norm": 18.57248200734457, + "output_norm/layer0": 18.57248200734457, + "step": 6450 + }, + { + "MSE": 631.0360174560547, + "MSE/layer0": 631.0360174560547, + "dead_code_fraction": 0.1784, + "dead_code_fraction/layer0": 0.1784, + "epoch": 1.17, + "input_norm": 31.998495709101356, + "input_norm/layer0": 31.998495709101356, + "learning_rate": 0.0005, + "loss": 2.1843, + "max_norm": 69.13652038574219, + "max_norm/layer0": 69.13652038574219, + "mean_norm": 43.01558876037598, + "mean_norm/layer0": 43.01558876037598, + "multicode_k": 1, + "output_norm": 18.591586551666268, + "output_norm/layer0": 18.591586551666268, + "step": 6500 + }, + { + "epoch": 1.17, + "eval_MSE/layer0": 631.2925020152297, + "eval_accuracy": 0.5039093283634951, + "eval_dead_code_fraction/layer0": 0.1797, + "eval_input_norm/layer0": 31.99848882414009, + "eval_loss": 2.175981044769287, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.598594732777567, + "eval_runtime": 158.1453, + "eval_samples_per_second": 29.233, + "eval_steps_per_second": 1.827, + "step": 6500 + }, + { + "MSE": 631.294188741048, + "MSE/layer0": 631.294188741048, + "dead_code_fraction": 0.1796, + "dead_code_fraction/layer0": 0.1796, + "epoch": 1.18, + "input_norm": 31.998505541483564, + "input_norm/layer0": 31.998505541483564, + "learning_rate": 0.0005, + "loss": 2.1855, + "max_norm": 69.26646423339844, + "max_norm/layer0": 69.26646423339844, + "mean_norm": 43.0548152923584, + "mean_norm/layer0": 43.0548152923584, + "multicode_k": 1, + "output_norm": 18.585241152445477, + "output_norm/layer0": 18.585241152445477, + "step": 6550 + }, + { + "MSE": 631.297376505534, + "MSE/layer0": 631.297376505534, + "dead_code_fraction": 0.1779, + "dead_code_fraction/layer0": 0.1779, + "epoch": 1.18, + "input_norm": 31.998487745920816, + "input_norm/layer0": 31.998487745920816, + "learning_rate": 0.0005, + "loss": 2.1197, + "max_norm": 69.3987808227539, + "max_norm/layer0": 69.3987808227539, + "mean_norm": 43.093589782714844, + "mean_norm/layer0": 43.093589782714844, + "multicode_k": 1, + "output_norm": 18.605287278493257, + "output_norm/layer0": 18.605287278493257, + "step": 6600 + }, + { + "MSE": 630.8991915893555, + "MSE/layer0": 630.8991915893555, + "dead_code_fraction": 0.17815, + "dead_code_fraction/layer0": 0.17815, + "epoch": 1.19, + "input_norm": 31.99848988215129, + "input_norm/layer0": 31.99848988215129, + "learning_rate": 0.0005, + "loss": 2.1248, + "max_norm": 69.52507019042969, + "max_norm/layer0": 69.52507019042969, + "mean_norm": 43.132524490356445, + "mean_norm/layer0": 43.132524490356445, + "multicode_k": 1, + "output_norm": 18.61235850652059, + "output_norm/layer0": 18.61235850652059, + "step": 6650 + }, + { + "MSE": 629.604686584473, + "MSE/layer0": 629.604686584473, + "dead_code_fraction": 0.17965, + "dead_code_fraction/layer0": 0.17965, + "epoch": 1.19, + "input_norm": 31.99852681477865, + "input_norm/layer0": 31.99852681477865, + "learning_rate": 0.0005, + "loss": 2.2265, + "max_norm": 69.66030883789062, + "max_norm/layer0": 69.66030883789062, + "mean_norm": 43.17206573486328, + "mean_norm/layer0": 43.17206573486328, + "multicode_k": 1, + "output_norm": 18.626948499679564, + "output_norm/layer0": 18.626948499679564, + "step": 6700 + }, + { + "MSE": 629.7875715128578, + "MSE/layer0": 629.7875715128578, + "dead_code_fraction": 0.1802, + "dead_code_fraction/layer0": 0.1802, + "epoch": 1.2, + "input_norm": 31.998509550094596, + "input_norm/layer0": 31.998509550094596, + "learning_rate": 0.0005, + "loss": 2.1432, + "max_norm": 69.78119659423828, + "max_norm/layer0": 69.78119659423828, + "mean_norm": 43.21029472351074, + "mean_norm/layer0": 43.21029472351074, + "multicode_k": 1, + "output_norm": 18.639319947560622, + "output_norm/layer0": 18.639319947560622, + "step": 6750 + }, + { + "MSE": 629.3708419799802, + "MSE/layer0": 629.3708419799802, + "dead_code_fraction": 0.18015, + "dead_code_fraction/layer0": 0.18015, + "epoch": 1.2, + "input_norm": 31.99851152102152, + "input_norm/layer0": 31.99851152102152, + "learning_rate": 0.0005, + "loss": 2.1606, + "max_norm": 69.91252899169922, + "max_norm/layer0": 69.91252899169922, + "mean_norm": 43.24948692321777, + "mean_norm/layer0": 43.24948692321777, + "multicode_k": 1, + "output_norm": 18.64606482187906, + "output_norm/layer0": 18.64606482187906, + "step": 6800 + }, + { + "MSE": 628.4038922119142, + "MSE/layer0": 628.4038922119142, + "dead_code_fraction": 0.1806, + "dead_code_fraction/layer0": 0.1806, + "epoch": 1.21, + "input_norm": 31.998516721725462, + "input_norm/layer0": 31.998516721725462, + "learning_rate": 0.0005, + "loss": 2.1582, + "max_norm": 70.04332733154297, + "max_norm/layer0": 70.04332733154297, + "mean_norm": 43.28862762451172, + "mean_norm/layer0": 43.28862762451172, + "multicode_k": 1, + "output_norm": 18.669758415222162, + "output_norm/layer0": 18.669758415222162, + "step": 6850 + }, + { + "MSE": 628.1812467447919, + "MSE/layer0": 628.1812467447919, + "dead_code_fraction": 0.18055, + "dead_code_fraction/layer0": 0.18055, + "epoch": 1.21, + "input_norm": 31.998515844345086, + "input_norm/layer0": 31.998515844345086, + "learning_rate": 0.0005, + "loss": 2.1433, + "max_norm": 70.16979217529297, + "max_norm/layer0": 70.16979217529297, + "mean_norm": 43.327192306518555, + "mean_norm/layer0": 43.327192306518555, + "multicode_k": 1, + "output_norm": 18.674684073130294, + "output_norm/layer0": 18.674684073130294, + "step": 6900 + }, + { + "MSE": 628.1862957763672, + "MSE/layer0": 628.1862957763672, + "dead_code_fraction": 0.18045, + "dead_code_fraction/layer0": 0.18045, + "epoch": 1.22, + "input_norm": 31.99852259953816, + "input_norm/layer0": 31.99852259953816, + "learning_rate": 0.0005, + "loss": 2.1458, + "max_norm": 70.29747772216797, + "max_norm/layer0": 70.29747772216797, + "mean_norm": 43.36609077453613, + "mean_norm/layer0": 43.36609077453613, + "multicode_k": 1, + "output_norm": 18.682749029795335, + "output_norm/layer0": 18.682749029795335, + "step": 6950 + }, + { + "MSE": 627.7981392415361, + "MSE/layer0": 627.7981392415361, + "dead_code_fraction": 0.18045, + "dead_code_fraction/layer0": 0.18045, + "epoch": 1.22, + "input_norm": 31.998523871103927, + "input_norm/layer0": 31.998523871103927, + "learning_rate": 0.0005, + "loss": 2.1339, + "max_norm": 70.425537109375, + "max_norm/layer0": 70.425537109375, + "mean_norm": 43.40445899963379, + "mean_norm/layer0": 43.40445899963379, + "multicode_k": 1, + "output_norm": 18.696380834579458, + "output_norm/layer0": 18.696380834579458, + "step": 7000 + }, + { + "epoch": 1.22, + "eval_MSE/layer0": 627.9790743019787, + "eval_accuracy": 0.5048263717749389, + "eval_dead_code_fraction/layer0": 0.1819, + "eval_input_norm/layer0": 31.998524618592334, + "eval_loss": 2.1696202754974365, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.705300997223095, + "eval_runtime": 159.1692, + "eval_samples_per_second": 29.045, + "eval_steps_per_second": 1.816, + "step": 7000 + }, + { + "MSE": 627.3165437825519, + "MSE/layer0": 627.3165437825519, + "dead_code_fraction": 0.1822, + "dead_code_fraction/layer0": 0.1822, + "epoch": 1.23, + "input_norm": 31.99852600097656, + "input_norm/layer0": 31.99852600097656, + "learning_rate": 0.0005, + "loss": 2.1483, + "max_norm": 70.54450988769531, + "max_norm/layer0": 70.54450988769531, + "mean_norm": 43.442848205566406, + "mean_norm/layer0": 43.442848205566406, + "multicode_k": 1, + "output_norm": 18.700957148869843, + "output_norm/layer0": 18.700957148869843, + "step": 7050 + }, + { + "MSE": 626.7479965209961, + "MSE/layer0": 626.7479965209961, + "dead_code_fraction": 0.1804, + "dead_code_fraction/layer0": 0.1804, + "epoch": 1.23, + "input_norm": 31.998541386922206, + "input_norm/layer0": 31.998541386922206, + "learning_rate": 0.0005, + "loss": 2.1512, + "max_norm": 70.66608428955078, + "max_norm/layer0": 70.66608428955078, + "mean_norm": 43.48159599304199, + "mean_norm/layer0": 43.48159599304199, + "multicode_k": 1, + "output_norm": 18.714396947224948, + "output_norm/layer0": 18.714396947224948, + "step": 7100 + }, + { + "MSE": 626.4450497436519, + "MSE/layer0": 626.4450497436519, + "dead_code_fraction": 0.1823, + "dead_code_fraction/layer0": 0.1823, + "epoch": 1.24, + "input_norm": 31.998545411427806, + "input_norm/layer0": 31.998545411427806, + "learning_rate": 0.0005, + "loss": 2.1586, + "max_norm": 70.7937240600586, + "max_norm/layer0": 70.7937240600586, + "mean_norm": 43.5198860168457, + "mean_norm/layer0": 43.5198860168457, + "multicode_k": 1, + "output_norm": 18.726943721771242, + "output_norm/layer0": 18.726943721771242, + "step": 7150 + }, + { + "MSE": 626.1652618408202, + "MSE/layer0": 626.1652618408202, + "dead_code_fraction": 0.1814, + "dead_code_fraction/layer0": 0.1814, + "epoch": 1.24, + "input_norm": 31.998541978200272, + "input_norm/layer0": 31.998541978200272, + "learning_rate": 0.0005, + "loss": 2.1552, + "max_norm": 70.90862274169922, + "max_norm/layer0": 70.90862274169922, + "mean_norm": 43.55833053588867, + "mean_norm/layer0": 43.55833053588867, + "multicode_k": 1, + "output_norm": 18.731371542612706, + "output_norm/layer0": 18.731371542612706, + "step": 7200 + }, + { + "MSE": 625.2572497558597, + "MSE/layer0": 625.2572497558597, + "dead_code_fraction": 0.1839, + "dead_code_fraction/layer0": 0.1839, + "epoch": 1.25, + "input_norm": 31.998552770614626, + "input_norm/layer0": 31.998552770614626, + "learning_rate": 0.0005, + "loss": 2.1673, + "max_norm": 71.0332260131836, + "max_norm/layer0": 71.0332260131836, + "mean_norm": 43.5967960357666, + "mean_norm/layer0": 43.5967960357666, + "multicode_k": 1, + "output_norm": 18.756609748204536, + "output_norm/layer0": 18.756609748204536, + "step": 7250 + }, + { + "MSE": 624.7860372924804, + "MSE/layer0": 624.7860372924804, + "dead_code_fraction": 0.1831, + "dead_code_fraction/layer0": 0.1831, + "epoch": 1.25, + "input_norm": 31.998555002212534, + "input_norm/layer0": 31.998555002212534, + "learning_rate": 0.0005, + "loss": 2.1575, + "max_norm": 71.15364837646484, + "max_norm/layer0": 71.15364837646484, + "mean_norm": 43.63525199890137, + "mean_norm/layer0": 43.63525199890137, + "multicode_k": 1, + "output_norm": 18.767410192489628, + "output_norm/layer0": 18.767410192489628, + "step": 7300 + }, + { + "MSE": 624.7060753377278, + "MSE/layer0": 624.7060753377278, + "dead_code_fraction": 0.18335, + "dead_code_fraction/layer0": 0.18335, + "epoch": 1.26, + "input_norm": 31.99856230099995, + "input_norm/layer0": 31.99856230099995, + "learning_rate": 0.0005, + "loss": 2.1622, + "max_norm": 71.2812271118164, + "max_norm/layer0": 71.2812271118164, + "mean_norm": 43.67383575439453, + "mean_norm/layer0": 43.67383575439453, + "multicode_k": 1, + "output_norm": 18.77556623776755, + "output_norm/layer0": 18.77556623776755, + "step": 7350 + }, + { + "MSE": 623.9612900797528, + "MSE/layer0": 623.9612900797528, + "dead_code_fraction": 0.1834, + "dead_code_fraction/layer0": 0.1834, + "epoch": 1.26, + "input_norm": 31.998564265569062, + "input_norm/layer0": 31.998564265569062, + "learning_rate": 0.0005, + "loss": 2.1721, + "max_norm": 71.4082260131836, + "max_norm/layer0": 71.4082260131836, + "mean_norm": 43.71280097961426, + "mean_norm/layer0": 43.71280097961426, + "multicode_k": 1, + "output_norm": 18.78839166323344, + "output_norm/layer0": 18.78839166323344, + "step": 7400 + }, + { + "MSE": 623.9870674641929, + "MSE/layer0": 623.9870674641929, + "dead_code_fraction": 0.18355, + "dead_code_fraction/layer0": 0.18355, + "epoch": 1.27, + "input_norm": 31.998560991287228, + "input_norm/layer0": 31.998560991287228, + "learning_rate": 0.0005, + "loss": 2.1424, + "max_norm": 71.52973937988281, + "max_norm/layer0": 71.52973937988281, + "mean_norm": 43.75117111206055, + "mean_norm/layer0": 43.75117111206055, + "multicode_k": 1, + "output_norm": 18.79942525227863, + "output_norm/layer0": 18.79942525227863, + "step": 7450 + }, + { + "MSE": 622.7629538981118, + "MSE/layer0": 622.7629538981118, + "dead_code_fraction": 0.1844, + "dead_code_fraction/layer0": 0.1844, + "epoch": 1.27, + "input_norm": 31.998580735524506, + "input_norm/layer0": 31.998580735524506, + "learning_rate": 0.0005, + "loss": 2.187, + "max_norm": 71.64968872070312, + "max_norm/layer0": 71.64968872070312, + "mean_norm": 43.790061950683594, + "mean_norm/layer0": 43.790061950683594, + "multicode_k": 1, + "output_norm": 18.81509483655294, + "output_norm/layer0": 18.81509483655294, + "step": 7500 + }, + { + "epoch": 1.27, + "eval_MSE/layer0": 622.122652727573, + "eval_accuracy": 0.5062701283839631, + "eval_dead_code_fraction/layer0": 0.18665, + "eval_input_norm/layer0": 31.998566619663464, + "eval_loss": 2.1583967208862305, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.83381110374323, + "eval_runtime": 158.6442, + "eval_samples_per_second": 29.141, + "eval_steps_per_second": 1.822, + "step": 7500 + }, + { + "MSE": 622.9042826334635, + "MSE/layer0": 622.9042826334635, + "dead_code_fraction": 0.1841, + "dead_code_fraction/layer0": 0.1841, + "epoch": 1.28, + "input_norm": 31.998572101593023, + "input_norm/layer0": 31.998572101593023, + "learning_rate": 0.0005, + "loss": 2.1434, + "max_norm": 71.76019287109375, + "max_norm/layer0": 71.76019287109375, + "mean_norm": 43.828460693359375, + "mean_norm/layer0": 43.828460693359375, + "multicode_k": 1, + "output_norm": 18.82229045232136, + "output_norm/layer0": 18.82229045232136, + "step": 7550 + }, + { + "MSE": 621.695281575521, + "MSE/layer0": 621.695281575521, + "dead_code_fraction": 0.1854, + "dead_code_fraction/layer0": 0.1854, + "epoch": 1.28, + "input_norm": 31.998584995269773, + "input_norm/layer0": 31.998584995269773, + "learning_rate": 0.0005, + "loss": 2.1712, + "max_norm": 71.87606048583984, + "max_norm/layer0": 71.87606048583984, + "mean_norm": 43.867136001586914, + "mean_norm/layer0": 43.867136001586914, + "multicode_k": 1, + "output_norm": 18.84749958992006, + "output_norm/layer0": 18.84749958992006, + "step": 7600 + }, + { + "MSE": 622.6274766031902, + "MSE/layer0": 622.6274766031902, + "dead_code_fraction": 0.18355, + "dead_code_fraction/layer0": 0.18355, + "epoch": 1.29, + "input_norm": 31.998571812311802, + "input_norm/layer0": 31.998571812311802, + "learning_rate": 0.0005, + "loss": 2.1412, + "max_norm": 71.98139953613281, + "max_norm/layer0": 71.98139953613281, + "mean_norm": 43.90544891357422, + "mean_norm/layer0": 43.90544891357422, + "multicode_k": 1, + "output_norm": 18.83851943016053, + "output_norm/layer0": 18.83851943016053, + "step": 7650 + }, + { + "MSE": 621.3046355183919, + "MSE/layer0": 621.3046355183919, + "dead_code_fraction": 0.18495, + "dead_code_fraction/layer0": 0.18495, + "epoch": 1.29, + "input_norm": 31.998585087458295, + "input_norm/layer0": 31.998585087458295, + "learning_rate": 0.0005, + "loss": 2.1711, + "max_norm": 72.08447265625, + "max_norm/layer0": 72.08447265625, + "mean_norm": 43.94407653808594, + "mean_norm/layer0": 43.94407653808594, + "multicode_k": 1, + "output_norm": 18.86037411053976, + "output_norm/layer0": 18.86037411053976, + "step": 7700 + }, + { + "MSE": 620.5873645019533, + "MSE/layer0": 620.5873645019533, + "dead_code_fraction": 0.18485, + "dead_code_fraction/layer0": 0.18485, + "epoch": 1.3, + "input_norm": 31.998606751759848, + "input_norm/layer0": 31.998606751759848, + "learning_rate": 0.0005, + "loss": 2.2069, + "max_norm": 72.18034362792969, + "max_norm/layer0": 72.18034362792969, + "mean_norm": 43.9833927154541, + "mean_norm/layer0": 43.9833927154541, + "multicode_k": 1, + "output_norm": 18.87507179578146, + "output_norm/layer0": 18.87507179578146, + "step": 7750 + }, + { + "MSE": 621.2272378540041, + "MSE/layer0": 621.2272378540041, + "dead_code_fraction": 0.18385, + "dead_code_fraction/layer0": 0.18385, + "epoch": 1.3, + "input_norm": 31.998583949406935, + "input_norm/layer0": 31.998583949406935, + "learning_rate": 0.0005, + "loss": 2.1217, + "max_norm": 72.27928924560547, + "max_norm/layer0": 72.27928924560547, + "mean_norm": 44.021806716918945, + "mean_norm/layer0": 44.021806716918945, + "multicode_k": 1, + "output_norm": 18.877027104695642, + "output_norm/layer0": 18.877027104695642, + "step": 7800 + }, + { + "MSE": 620.067134602865, + "MSE/layer0": 620.067134602865, + "dead_code_fraction": 0.18535, + "dead_code_fraction/layer0": 0.18535, + "epoch": 1.31, + "input_norm": 31.998594888051343, + "input_norm/layer0": 31.998594888051343, + "learning_rate": 0.0005, + "loss": 2.1753, + "max_norm": 72.39033508300781, + "max_norm/layer0": 72.39033508300781, + "mean_norm": 44.060611724853516, + "mean_norm/layer0": 44.060611724853516, + "multicode_k": 1, + "output_norm": 18.89820697466533, + "output_norm/layer0": 18.89820697466533, + "step": 7850 + }, + { + "MSE": 620.6704218546549, + "MSE/layer0": 620.6704218546549, + "dead_code_fraction": 0.18735, + "dead_code_fraction/layer0": 0.18735, + "epoch": 1.31, + "input_norm": 31.998597246805822, + "input_norm/layer0": 31.998597246805822, + "learning_rate": 0.0005, + "loss": 2.1778, + "max_norm": 72.4916000366211, + "max_norm/layer0": 72.4916000366211, + "mean_norm": 44.09913635253906, + "mean_norm/layer0": 44.09913635253906, + "multicode_k": 1, + "output_norm": 18.890051161448145, + "output_norm/layer0": 18.890051161448145, + "step": 7900 + }, + { + "MSE": 619.2155123901367, + "MSE/layer0": 619.2155123901367, + "dead_code_fraction": 0.1863, + "dead_code_fraction/layer0": 0.1863, + "epoch": 1.32, + "input_norm": 31.99860541343688, + "input_norm/layer0": 31.99860541343688, + "learning_rate": 0.0005, + "loss": 2.1684, + "max_norm": 72.59037017822266, + "max_norm/layer0": 72.59037017822266, + "mean_norm": 44.13744926452637, + "mean_norm/layer0": 44.13744926452637, + "multicode_k": 1, + "output_norm": 18.920912733078, + "output_norm/layer0": 18.920912733078, + "step": 7950 + }, + { + "MSE": 618.8985408528646, + "MSE/layer0": 618.8985408528646, + "dead_code_fraction": 0.1867, + "dead_code_fraction/layer0": 0.1867, + "epoch": 1.32, + "input_norm": 31.998596220016488, + "input_norm/layer0": 31.998596220016488, + "learning_rate": 0.0005, + "loss": 2.1302, + "max_norm": 72.69281768798828, + "max_norm/layer0": 72.69281768798828, + "mean_norm": 44.176042556762695, + "mean_norm/layer0": 44.176042556762695, + "multicode_k": 1, + "output_norm": 18.93559975624085, + "output_norm/layer0": 18.93559975624085, + "step": 8000 + }, + { + "epoch": 1.32, + "eval_MSE/layer0": 617.7161538934592, + "eval_accuracy": 0.5071360017457022, + "eval_dead_code_fraction/layer0": 0.18755, + "eval_input_norm/layer0": 31.99860155017712, + "eval_loss": 2.150786876678467, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 18.949325338731256, + "eval_runtime": 158.4669, + "eval_samples_per_second": 29.173, + "eval_steps_per_second": 1.824, + "step": 8000 + }, + { + "MSE": 619.1937561035155, + "MSE/layer0": 619.1937561035155, + "dead_code_fraction": 0.18685, + "dead_code_fraction/layer0": 0.18685, + "epoch": 1.33, + "input_norm": 31.998596970240285, + "input_norm/layer0": 31.998596970240285, + "learning_rate": 0.0005, + "loss": 2.1279, + "max_norm": 72.79032135009766, + "max_norm/layer0": 72.79032135009766, + "mean_norm": 44.21445846557617, + "mean_norm/layer0": 44.21445846557617, + "multicode_k": 1, + "output_norm": 18.93686810175578, + "output_norm/layer0": 18.93686810175578, + "step": 8050 + }, + { + "MSE": 619.539402567546, + "MSE/layer0": 619.539402567546, + "dead_code_fraction": 0.18665, + "dead_code_fraction/layer0": 0.18665, + "epoch": 1.33, + "input_norm": 31.998598492940268, + "input_norm/layer0": 31.998598492940268, + "learning_rate": 0.0005, + "loss": 2.1113, + "max_norm": 72.88322448730469, + "max_norm/layer0": 72.88322448730469, + "mean_norm": 44.251609802246094, + "mean_norm/layer0": 44.251609802246094, + "multicode_k": 1, + "output_norm": 18.939144274393726, + "output_norm/layer0": 18.939144274393726, + "step": 8100 + }, + { + "MSE": 617.7248203531905, + "MSE/layer0": 617.7248203531905, + "dead_code_fraction": 0.18555, + "dead_code_fraction/layer0": 0.18555, + "epoch": 1.34, + "input_norm": 31.99861437161764, + "input_norm/layer0": 31.99861437161764, + "learning_rate": 0.0005, + "loss": 2.1592, + "max_norm": 72.97504425048828, + "max_norm/layer0": 72.97504425048828, + "mean_norm": 44.289913177490234, + "mean_norm/layer0": 44.289913177490234, + "multicode_k": 1, + "output_norm": 18.963457323710102, + "output_norm/layer0": 18.963457323710102, + "step": 8150 + }, + { + "MSE": 617.1626446533202, + "MSE/layer0": 617.1626446533202, + "dead_code_fraction": 0.1856, + "dead_code_fraction/layer0": 0.1856, + "epoch": 1.34, + "input_norm": 31.998610553741443, + "input_norm/layer0": 31.998610553741443, + "learning_rate": 0.0005, + "loss": 2.1339, + "max_norm": 73.06546020507812, + "max_norm/layer0": 73.06546020507812, + "mean_norm": 44.32819747924805, + "mean_norm/layer0": 44.32819747924805, + "multicode_k": 1, + "output_norm": 18.980771627426144, + "output_norm/layer0": 18.980771627426144, + "step": 8200 + }, + { + "MSE": 616.5359758504233, + "MSE/layer0": 616.5359758504233, + "dead_code_fraction": 0.18785, + "dead_code_fraction/layer0": 0.18785, + "epoch": 1.35, + "input_norm": 31.99861484845479, + "input_norm/layer0": 31.99861484845479, + "learning_rate": 0.0005, + "loss": 2.1287, + "max_norm": 73.1684341430664, + "max_norm/layer0": 73.1684341430664, + "mean_norm": 44.36627197265625, + "mean_norm/layer0": 44.36627197265625, + "multicode_k": 1, + "output_norm": 19.002285525004055, + "output_norm/layer0": 19.002285525004055, + "step": 8250 + }, + { + "MSE": 616.9324924723311, + "MSE/layer0": 616.9324924723311, + "dead_code_fraction": 0.18715, + "dead_code_fraction/layer0": 0.18715, + "epoch": 1.35, + "input_norm": 31.998625895182286, + "input_norm/layer0": 31.998625895182286, + "learning_rate": 0.0005, + "loss": 2.1575, + "max_norm": 73.259521484375, + "max_norm/layer0": 73.259521484375, + "mean_norm": 44.40446090698242, + "mean_norm/layer0": 44.40446090698242, + "multicode_k": 1, + "output_norm": 18.992992315292362, + "output_norm/layer0": 18.992992315292362, + "step": 8300 + }, + { + "MSE": 616.2650039672851, + "MSE/layer0": 616.2650039672851, + "dead_code_fraction": 0.18655, + "dead_code_fraction/layer0": 0.18655, + "epoch": 1.36, + "input_norm": 31.99862662315369, + "input_norm/layer0": 31.99862662315369, + "learning_rate": 0.0005, + "loss": 2.139, + "max_norm": 73.36270141601562, + "max_norm/layer0": 73.36270141601562, + "mean_norm": 44.44254493713379, + "mean_norm/layer0": 44.44254493713379, + "multicode_k": 1, + "output_norm": 19.00672375679015, + "output_norm/layer0": 19.00672375679015, + "step": 8350 + }, + { + "MSE": 615.5159185791019, + "MSE/layer0": 615.5159185791019, + "dead_code_fraction": 0.18685, + "dead_code_fraction/layer0": 0.18685, + "epoch": 1.36, + "input_norm": 31.998618663152055, + "input_norm/layer0": 31.998618663152055, + "learning_rate": 0.0005, + "loss": 2.1207, + "max_norm": 73.45561981201172, + "max_norm/layer0": 73.45561981201172, + "mean_norm": 44.48077201843262, + "mean_norm/layer0": 44.48077201843262, + "multicode_k": 1, + "output_norm": 19.030768597920748, + "output_norm/layer0": 19.030768597920748, + "step": 8400 + }, + { + "MSE": 615.7112675984704, + "MSE/layer0": 615.7112675984704, + "dead_code_fraction": 0.18675, + "dead_code_fraction/layer0": 0.18675, + "epoch": 1.37, + "input_norm": 31.99863114674885, + "input_norm/layer0": 31.99863114674885, + "learning_rate": 0.0005, + "loss": 2.1394, + "max_norm": 73.54468536376953, + "max_norm/layer0": 73.54468536376953, + "mean_norm": 44.5194206237793, + "mean_norm/layer0": 44.5194206237793, + "multicode_k": 1, + "output_norm": 19.03362373669942, + "output_norm/layer0": 19.03362373669942, + "step": 8450 + }, + { + "MSE": 615.0864140828453, + "MSE/layer0": 615.0864140828453, + "dead_code_fraction": 0.1866, + "dead_code_fraction/layer0": 0.1866, + "epoch": 1.37, + "input_norm": 31.9986399269104, + "input_norm/layer0": 31.9986399269104, + "learning_rate": 0.0005, + "loss": 2.1471, + "max_norm": 73.64068603515625, + "max_norm/layer0": 73.64068603515625, + "mean_norm": 44.55780220031738, + "mean_norm/layer0": 44.55780220031738, + "multicode_k": 1, + "output_norm": 19.04360143979391, + "output_norm/layer0": 19.04360143979391, + "step": 8500 + }, + { + "epoch": 1.37, + "eval_MSE/layer0": 613.7248421548741, + "eval_accuracy": 0.5081896395873495, + "eval_dead_code_fraction/layer0": 0.1885, + "eval_input_norm/layer0": 31.998632826486393, + "eval_loss": 2.1443779468536377, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 19.066619998676906, + "eval_runtime": 158.5923, + "eval_samples_per_second": 29.15, + "eval_steps_per_second": 1.822, + "step": 8500 + }, + { + "MSE": 614.1585445149744, + "MSE/layer0": 614.1585445149744, + "dead_code_fraction": 0.18715, + "dead_code_fraction/layer0": 0.18715, + "epoch": 1.38, + "input_norm": 31.99863867441813, + "input_norm/layer0": 31.99863867441813, + "learning_rate": 0.0005, + "loss": 2.1506, + "max_norm": 73.73002624511719, + "max_norm/layer0": 73.73002624511719, + "mean_norm": 44.597002029418945, + "mean_norm/layer0": 44.597002029418945, + "multicode_k": 1, + "output_norm": 19.06499721844991, + "output_norm/layer0": 19.06499721844991, + "step": 8550 + }, + { + "MSE": 614.256539204915, + "MSE/layer0": 614.256539204915, + "dead_code_fraction": 0.1879, + "dead_code_fraction/layer0": 0.1879, + "epoch": 1.38, + "input_norm": 31.998648173014317, + "input_norm/layer0": 31.998648173014317, + "learning_rate": 0.0005, + "loss": 2.1643, + "max_norm": 73.80333709716797, + "max_norm/layer0": 73.80333709716797, + "mean_norm": 44.63543891906738, + "mean_norm/layer0": 44.63543891906738, + "multicode_k": 1, + "output_norm": 19.078293412526467, + "output_norm/layer0": 19.078293412526467, + "step": 8600 + }, + { + "MSE": 613.3546946207681, + "MSE/layer0": 613.3546946207681, + "dead_code_fraction": 0.1879, + "dead_code_fraction/layer0": 0.1879, + "epoch": 1.39, + "input_norm": 31.99864864667257, + "input_norm/layer0": 31.99864864667257, + "learning_rate": 0.0005, + "loss": 2.1535, + "max_norm": 73.89517974853516, + "max_norm/layer0": 73.89517974853516, + "mean_norm": 44.674211502075195, + "mean_norm/layer0": 44.674211502075195, + "multicode_k": 1, + "output_norm": 19.09559381167095, + "output_norm/layer0": 19.09559381167095, + "step": 8650 + }, + { + "MSE": 613.6053087361654, + "MSE/layer0": 613.6053087361654, + "dead_code_fraction": 0.18645, + "dead_code_fraction/layer0": 0.18645, + "epoch": 1.39, + "input_norm": 31.998652140299477, + "input_norm/layer0": 31.998652140299477, + "learning_rate": 0.0005, + "loss": 2.137, + "max_norm": 73.9770736694336, + "max_norm/layer0": 73.9770736694336, + "mean_norm": 44.71265983581543, + "mean_norm/layer0": 44.71265983581543, + "multicode_k": 1, + "output_norm": 19.098618446985878, + "output_norm/layer0": 19.098618446985878, + "step": 8700 + }, + { + "MSE": 613.292506408691, + "MSE/layer0": 613.292506408691, + "dead_code_fraction": 0.1876, + "dead_code_fraction/layer0": 0.1876, + "epoch": 1.4, + "input_norm": 31.998654588063562, + "input_norm/layer0": 31.998654588063562, + "learning_rate": 0.0005, + "loss": 2.1482, + "max_norm": 74.05269622802734, + "max_norm/layer0": 74.05269622802734, + "mean_norm": 44.750946044921875, + "mean_norm/layer0": 44.750946044921875, + "multicode_k": 1, + "output_norm": 19.104494848251342, + "output_norm/layer0": 19.104494848251342, + "step": 8750 + }, + { + "MSE": 613.8824895222986, + "MSE/layer0": 613.8824895222986, + "dead_code_fraction": 0.1868, + "dead_code_fraction/layer0": 0.1868, + "epoch": 1.4, + "input_norm": 31.998655049006146, + "input_norm/layer0": 31.998655049006146, + "learning_rate": 0.0005, + "loss": 2.1331, + "max_norm": 74.12651824951172, + "max_norm/layer0": 74.12651824951172, + "mean_norm": 44.7886848449707, + "mean_norm/layer0": 44.7886848449707, + "multicode_k": 1, + "output_norm": 19.110120385487882, + "output_norm/layer0": 19.110120385487882, + "step": 8800 + }, + { + "MSE": 613.8568901570636, + "MSE/layer0": 613.8568901570636, + "dead_code_fraction": 0.18675, + "dead_code_fraction/layer0": 0.18675, + "epoch": 1.41, + "input_norm": 31.99864878336588, + "input_norm/layer0": 31.99864878336588, + "learning_rate": 0.0005, + "loss": 2.1038, + "max_norm": 74.20288848876953, + "max_norm/layer0": 74.20288848876953, + "mean_norm": 44.82563400268555, + "mean_norm/layer0": 44.82563400268555, + "multicode_k": 1, + "output_norm": 19.120709832509363, + "output_norm/layer0": 19.120709832509363, + "step": 8850 + }, + { + "MSE": 612.8203454589843, + "MSE/layer0": 612.8203454589843, + "dead_code_fraction": 0.18635, + "dead_code_fraction/layer0": 0.18635, + "epoch": 1.41, + "input_norm": 31.99866209030152, + "input_norm/layer0": 31.99866209030152, + "learning_rate": 0.0005, + "loss": 2.1619, + "max_norm": 74.27029418945312, + "max_norm/layer0": 74.27029418945312, + "mean_norm": 44.863847732543945, + "mean_norm/layer0": 44.863847732543945, + "multicode_k": 1, + "output_norm": 19.13362557093303, + "output_norm/layer0": 19.13362557093303, + "step": 8900 + }, + { + "MSE": 612.7508836873369, + "MSE/layer0": 612.7508836873369, + "dead_code_fraction": 0.1865, + "dead_code_fraction/layer0": 0.1865, + "epoch": 1.42, + "input_norm": 31.998662964502977, + "input_norm/layer0": 31.998662964502977, + "learning_rate": 0.0005, + "loss": 2.1274, + "max_norm": 74.35165405273438, + "max_norm/layer0": 74.35165405273438, + "mean_norm": 44.90276908874512, + "mean_norm/layer0": 44.90276908874512, + "multicode_k": 1, + "output_norm": 19.13368027687074, + "output_norm/layer0": 19.13368027687074, + "step": 8950 + }, + { + "MSE": 611.3088948567707, + "MSE/layer0": 611.3088948567707, + "dead_code_fraction": 0.18625, + "dead_code_fraction/layer0": 0.18625, + "epoch": 1.42, + "input_norm": 31.998670199712116, + "input_norm/layer0": 31.998670199712116, + "learning_rate": 0.0005, + "loss": 2.1556, + "max_norm": 74.43575286865234, + "max_norm/layer0": 74.43575286865234, + "mean_norm": 44.94179916381836, + "mean_norm/layer0": 44.94179916381836, + "multicode_k": 1, + "output_norm": 19.165478760401413, + "output_norm/layer0": 19.165478760401413, + "step": 9000 + }, + { + "epoch": 1.42, + "eval_MSE/layer0": 610.3757424029645, + "eval_accuracy": 0.5087341142897861, + "eval_dead_code_fraction/layer0": 0.18805, + "eval_input_norm/layer0": 31.998659288421646, + "eval_loss": 2.139230489730835, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 19.181722183648382, + "eval_runtime": 158.0526, + "eval_samples_per_second": 29.25, + "eval_steps_per_second": 1.829, + "step": 9000 + }, + { + "MSE": 611.2356985473632, + "MSE/layer0": 611.2356985473632, + "dead_code_fraction": 0.1879, + "dead_code_fraction/layer0": 0.1879, + "epoch": 1.43, + "input_norm": 31.998666836420703, + "input_norm/layer0": 31.998666836420703, + "learning_rate": 0.0005, + "loss": 2.1388, + "max_norm": 74.51050567626953, + "max_norm/layer0": 74.51050567626953, + "mean_norm": 44.98063850402832, + "mean_norm/layer0": 44.98063850402832, + "multicode_k": 1, + "output_norm": 19.177389281590777, + "output_norm/layer0": 19.177389281590777, + "step": 9050 + }, + { + "MSE": 610.8344569905598, + "MSE/layer0": 610.8344569905598, + "dead_code_fraction": 0.18865, + "dead_code_fraction/layer0": 0.18865, + "epoch": 1.43, + "input_norm": 31.99867141723631, + "input_norm/layer0": 31.99867141723631, + "learning_rate": 0.0005, + "loss": 2.1328, + "max_norm": 74.59440612792969, + "max_norm/layer0": 74.59440612792969, + "mean_norm": 45.01910400390625, + "mean_norm/layer0": 45.01910400390625, + "multicode_k": 1, + "output_norm": 19.185275354385375, + "output_norm/layer0": 19.185275354385375, + "step": 9100 + }, + { + "MSE": 610.7402758789062, + "MSE/layer0": 610.7402758789062, + "dead_code_fraction": 0.1871, + "dead_code_fraction/layer0": 0.1871, + "epoch": 1.44, + "input_norm": 31.99866997400921, + "input_norm/layer0": 31.99866997400921, + "learning_rate": 0.0005, + "loss": 2.117, + "max_norm": 74.67122650146484, + "max_norm/layer0": 74.67122650146484, + "mean_norm": 45.05727577209473, + "mean_norm/layer0": 45.05727577209473, + "multicode_k": 1, + "output_norm": 19.190109596252437, + "output_norm/layer0": 19.190109596252437, + "step": 9150 + }, + { + "MSE": 610.1339531453451, + "MSE/layer0": 610.1339531453451, + "dead_code_fraction": 0.18745, + "dead_code_fraction/layer0": 0.18745, + "epoch": 1.44, + "input_norm": 31.998679358164473, + "input_norm/layer0": 31.998679358164473, + "learning_rate": 0.0005, + "loss": 2.1459, + "max_norm": 74.7430419921875, + "max_norm/layer0": 74.7430419921875, + "mean_norm": 45.095571517944336, + "mean_norm/layer0": 45.095571517944336, + "multicode_k": 1, + "output_norm": 19.203376553853335, + "output_norm/layer0": 19.203376553853335, + "step": 9200 + }, + { + "MSE": 609.6957601928709, + "MSE/layer0": 609.6957601928709, + "dead_code_fraction": 0.1878, + "dead_code_fraction/layer0": 0.1878, + "epoch": 1.45, + "input_norm": 31.99868172009785, + "input_norm/layer0": 31.99868172009785, + "learning_rate": 0.0005, + "loss": 2.142, + "max_norm": 74.8177490234375, + "max_norm/layer0": 74.8177490234375, + "mean_norm": 45.133853912353516, + "mean_norm/layer0": 45.133853912353516, + "multicode_k": 1, + "output_norm": 19.22210531552632, + "output_norm/layer0": 19.22210531552632, + "step": 9250 + }, + { + "MSE": 609.5997785441082, + "MSE/layer0": 609.5997785441082, + "dead_code_fraction": 0.18805, + "dead_code_fraction/layer0": 0.18805, + "epoch": 1.45, + "input_norm": 31.998693205515544, + "input_norm/layer0": 31.998693205515544, + "learning_rate": 0.0005, + "loss": 2.18, + "max_norm": 74.87744140625, + "max_norm/layer0": 74.87744140625, + "mean_norm": 45.172555923461914, + "mean_norm/layer0": 45.172555923461914, + "multicode_k": 1, + "output_norm": 19.226630802154542, + "output_norm/layer0": 19.226630802154542, + "step": 9300 + }, + { + "MSE": 609.8342389933271, + "MSE/layer0": 609.8342389933271, + "dead_code_fraction": 0.18735, + "dead_code_fraction/layer0": 0.18735, + "epoch": 1.46, + "input_norm": 31.998687505722053, + "input_norm/layer0": 31.998687505722053, + "learning_rate": 0.0005, + "loss": 2.1164, + "max_norm": 74.94609069824219, + "max_norm/layer0": 74.94609069824219, + "mean_norm": 45.21059799194336, + "mean_norm/layer0": 45.21059799194336, + "multicode_k": 1, + "output_norm": 19.234882882436114, + "output_norm/layer0": 19.234882882436114, + "step": 9350 + }, + { + "MSE": 609.2034523518882, + "MSE/layer0": 609.2034523518882, + "dead_code_fraction": 0.1869, + "dead_code_fraction/layer0": 0.1869, + "epoch": 1.46, + "input_norm": 31.99869050979616, + "input_norm/layer0": 31.99869050979616, + "learning_rate": 0.0005, + "loss": 2.1316, + "max_norm": 75.01142883300781, + "max_norm/layer0": 75.01142883300781, + "mean_norm": 45.248979568481445, + "mean_norm/layer0": 45.248979568481445, + "multicode_k": 1, + "output_norm": 19.247848326365144, + "output_norm/layer0": 19.247848326365144, + "step": 9400 + }, + { + "MSE": 609.0324313354497, + "MSE/layer0": 609.0324313354497, + "dead_code_fraction": 0.18745, + "dead_code_fraction/layer0": 0.18745, + "epoch": 1.47, + "input_norm": 31.99869132041931, + "input_norm/layer0": 31.99869132041931, + "learning_rate": 0.0005, + "loss": 2.1214, + "max_norm": 75.07112121582031, + "max_norm/layer0": 75.07112121582031, + "mean_norm": 45.287214279174805, + "mean_norm/layer0": 45.287214279174805, + "multicode_k": 1, + "output_norm": 19.25519768079122, + "output_norm/layer0": 19.25519768079122, + "step": 9450 + }, + { + "MSE": 607.8594933064783, + "MSE/layer0": 607.8594933064783, + "dead_code_fraction": 0.18835, + "dead_code_fraction/layer0": 0.18835, + "epoch": 1.47, + "input_norm": 31.998687744140625, + "input_norm/layer0": 31.998687744140625, + "learning_rate": 0.0005, + "loss": 2.1067, + "max_norm": 75.15766143798828, + "max_norm/layer0": 75.15766143798828, + "mean_norm": 45.32560920715332, + "mean_norm/layer0": 45.32560920715332, + "multicode_k": 1, + "output_norm": 19.27704188664754, + "output_norm/layer0": 19.27704188664754, + "step": 9500 + }, + { + "epoch": 1.47, + "eval_MSE/layer0": 608.6866096036146, + "eval_accuracy": 0.5090880757079915, + "eval_dead_code_fraction/layer0": 0.18755, + "eval_input_norm/layer0": 31.998685899710146, + "eval_loss": 2.1350600719451904, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 19.283631281241068, + "eval_runtime": 158.1797, + "eval_samples_per_second": 29.226, + "eval_steps_per_second": 1.827, + "step": 9500 + }, + { + "MSE": 607.5302533983886, + "MSE/layer0": 607.5302533983886, + "dead_code_fraction": 0.1872, + "dead_code_fraction/layer0": 0.1872, + "epoch": 1.48, + "input_norm": 31.99869025141972, + "input_norm/layer0": 31.99869025141972, + "learning_rate": 0.0005, + "loss": 2.1075, + "max_norm": 75.2263412475586, + "max_norm/layer0": 75.2263412475586, + "mean_norm": 45.363752365112305, + "mean_norm/layer0": 45.363752365112305, + "multicode_k": 1, + "output_norm": 19.2927733112995, + "output_norm/layer0": 19.2927733112995, + "step": 9550 + }, + { + "MSE": 608.902215973978, + "MSE/layer0": 608.902215973978, + "dead_code_fraction": 0.187, + "dead_code_fraction/layer0": 0.187, + "epoch": 2.0, + "input_norm": 31.998686492629858, + "input_norm/layer0": 31.998686492629858, + "learning_rate": 0.0005, + "loss": 2.1013, + "max_norm": 75.294677734375, + "max_norm/layer0": 75.294677734375, + "mean_norm": 45.40024948120117, + "mean_norm/layer0": 45.40024948120117, + "multicode_k": 1, + "output_norm": 19.268582361188244, + "output_norm/layer0": 19.268582361188244, + "step": 9600 + }, + { + "MSE": 606.3796120198567, + "MSE/layer0": 606.3796120198567, + "dead_code_fraction": 0.18715, + "dead_code_fraction/layer0": 0.18715, + "epoch": 2.01, + "input_norm": 31.998710851669312, + "input_norm/layer0": 31.998710851669312, + "learning_rate": 0.0005, + "loss": 2.17, + "max_norm": 75.35186004638672, + "max_norm/layer0": 75.35186004638672, + "mean_norm": 45.4382266998291, + "mean_norm/layer0": 45.4382266998291, + "multicode_k": 1, + "output_norm": 19.314183537165327, + "output_norm/layer0": 19.314183537165327, + "step": 9650 + }, + { + "MSE": 606.9239878336591, + "MSE/layer0": 606.9239878336591, + "dead_code_fraction": 0.1877, + "dead_code_fraction/layer0": 0.1877, + "epoch": 2.01, + "input_norm": 31.99869126637776, + "input_norm/layer0": 31.99869126637776, + "learning_rate": 0.0005, + "loss": 2.0661, + "max_norm": 75.44601440429688, + "max_norm/layer0": 75.44601440429688, + "mean_norm": 45.47653579711914, + "mean_norm/layer0": 45.47653579711914, + "multicode_k": 1, + "output_norm": 19.313949975967407, + "output_norm/layer0": 19.313949975967407, + "step": 9700 + }, + { + "MSE": 606.1468785603844, + "MSE/layer0": 606.1468785603844, + "dead_code_fraction": 0.18755, + "dead_code_fraction/layer0": 0.18755, + "epoch": 2.02, + "input_norm": 31.998706903457652, + "input_norm/layer0": 31.998706903457652, + "learning_rate": 0.0005, + "loss": 2.1325, + "max_norm": 75.6237564086914, + "max_norm/layer0": 75.6237564086914, + "mean_norm": 45.51473808288574, + "mean_norm/layer0": 45.51473808288574, + "multicode_k": 1, + "output_norm": 19.331538470586143, + "output_norm/layer0": 19.331538470586143, + "step": 9750 + }, + { + "MSE": 606.2908910115561, + "MSE/layer0": 606.2908910115561, + "dead_code_fraction": 0.18715, + "dead_code_fraction/layer0": 0.18715, + "epoch": 2.02, + "input_norm": 31.998702777226768, + "input_norm/layer0": 31.998702777226768, + "learning_rate": 0.0005, + "loss": 2.0999, + "max_norm": 75.77623748779297, + "max_norm/layer0": 75.77623748779297, + "mean_norm": 45.55307388305664, + "mean_norm/layer0": 45.55307388305664, + "multicode_k": 1, + "output_norm": 19.340178826649982, + "output_norm/layer0": 19.340178826649982, + "step": 9800 + }, + { + "MSE": 605.7215723673501, + "MSE/layer0": 605.7215723673501, + "dead_code_fraction": 0.18635, + "dead_code_fraction/layer0": 0.18635, + "epoch": 2.03, + "input_norm": 31.998708073298122, + "input_norm/layer0": 31.998708073298122, + "learning_rate": 0.0005, + "loss": 2.1015, + "max_norm": 75.92095184326172, + "max_norm/layer0": 75.92095184326172, + "mean_norm": 45.591548919677734, + "mean_norm/layer0": 45.591548919677734, + "multicode_k": 1, + "output_norm": 19.351260058085124, + "output_norm/layer0": 19.351260058085124, + "step": 9850 + }, + { + "MSE": 605.7307819620769, + "MSE/layer0": 605.7307819620769, + "dead_code_fraction": 0.1879, + "dead_code_fraction/layer0": 0.1879, + "epoch": 2.03, + "input_norm": 31.99871432304383, + "input_norm/layer0": 31.99871432304383, + "learning_rate": 0.0005, + "loss": 2.1079, + "max_norm": 76.06104278564453, + "max_norm/layer0": 76.06104278564453, + "mean_norm": 45.62945747375488, + "mean_norm/layer0": 45.62945747375488, + "multicode_k": 1, + "output_norm": 19.36078415234882, + "output_norm/layer0": 19.36078415234882, + "step": 9900 + }, + { + "MSE": 605.7736006673174, + "MSE/layer0": 605.7736006673174, + "dead_code_fraction": 0.1873, + "dead_code_fraction/layer0": 0.1873, + "epoch": 2.04, + "input_norm": 31.99871180534363, + "input_norm/layer0": 31.99871180534363, + "learning_rate": 0.0005, + "loss": 2.102, + "max_norm": 76.22486877441406, + "max_norm/layer0": 76.22486877441406, + "mean_norm": 45.66733360290527, + "mean_norm/layer0": 45.66733360290527, + "multicode_k": 1, + "output_norm": 19.36815209388733, + "output_norm/layer0": 19.36815209388733, + "step": 9950 + }, + { + "MSE": 604.9809751383466, + "MSE/layer0": 604.9809751383466, + "dead_code_fraction": 0.1872, + "dead_code_fraction/layer0": 0.1872, + "epoch": 2.04, + "input_norm": 31.998728539148978, + "input_norm/layer0": 31.998728539148978, + "learning_rate": 0.0005, + "loss": 2.1536, + "max_norm": 76.40007019042969, + "max_norm/layer0": 76.40007019042969, + "mean_norm": 45.70543670654297, + "mean_norm/layer0": 45.70543670654297, + "multicode_k": 1, + "output_norm": 19.38911464373271, + "output_norm/layer0": 19.38911464373271, + "step": 10000 + }, + { + "epoch": 2.04, + "eval_MSE/layer0": 604.5096733395267, + "eval_accuracy": 0.5091345939349958, + "eval_dead_code_fraction/layer0": 0.18795, + "eval_input_norm/layer0": 31.99872850438308, + "eval_loss": 2.132894992828369, + "eval_multicode_k": 1, + "eval_output_norm/layer0": 19.389702240368152, + "eval_runtime": 158.9177, + "eval_samples_per_second": 29.091, + "eval_steps_per_second": 1.819, + "step": 10000 + }, + { + "MSE": 0.0, + "MSE/layer0": 0.0, + "dead_code_fraction": 1.0, + "dead_code_fraction/layer0": 1.0, + "epoch": 2.04, + "input_norm": 0.0, + "input_norm/layer0": 0.0, + "max_norm": 76.40007019042969, + "max_norm/layer0": 76.40007019042969, + "mean_norm": 45.70543670654297, + "mean_norm/layer0": 45.70543670654297, + "multicode_k": 1, + "output_norm": 0.0, + "output_norm/layer0": 0.0, + "step": 10000, + "total_flos": 7.43098011353088e+16, + "train_loss": 2.325971780395508, + "train_runtime": 15639.0026, + "train_samples_per_second": 61.385, + "train_steps_per_second": 0.639 + } + ], + "logging_steps": 50, + "max_steps": 10000, + "num_train_epochs": 9223372036854775807, + "save_steps": 500, + "total_flos": 7.43098011353088e+16, + "trial_name": null, + "trial_params": null +}