|
{ |
|
"best_metric": 2.0353407859802246, |
|
"best_model_checkpoint": "output_main/wandb/run-20240211_075351-8o9ldy4a/files/train_output/checkpoint-10000", |
|
"epoch": 2.042133333333333, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"MSE": 892.0916341145833, |
|
"MSE/layer0": 892.0916341145833, |
|
"dead_code_fraction": 0.15045, |
|
"dead_code_fraction/layer0": 0.15045, |
|
"epoch": 0.0, |
|
"input_norm": 31.997259775797524, |
|
"input_norm/layer0": 31.997259775797524, |
|
"learning_rate": 1e-05, |
|
"loss": 8.134, |
|
"max_norm": 35.01011657714844, |
|
"max_norm/layer0": 35.01011657714844, |
|
"mean_norm": 31.990370750427246, |
|
"mean_norm/layer0": 31.990370750427246, |
|
"multicode_k": 1, |
|
"output_norm": 8.571834087371826, |
|
"output_norm/layer0": 8.571834087371826, |
|
"step": 1 |
|
}, |
|
{ |
|
"MSE": 889.7418754733337, |
|
"MSE/layer0": 889.7418754733337, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.99632342656454, |
|
"input_norm/layer0": 31.99632342656454, |
|
"learning_rate": 0.0005, |
|
"loss": 7.1762, |
|
"max_norm": 35.03640365600586, |
|
"max_norm/layer0": 35.03640365600586, |
|
"mean_norm": 32.01236152648926, |
|
"mean_norm/layer0": 32.01236152648926, |
|
"multicode_k": 1, |
|
"output_norm": 8.591146861614817, |
|
"output_norm/layer0": 8.591146861614817, |
|
"step": 50 |
|
}, |
|
{ |
|
"MSE": 869.5438468424481, |
|
"MSE/layer0": 869.5438468424481, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.996115023295076, |
|
"input_norm/layer0": 31.996115023295076, |
|
"learning_rate": 0.001, |
|
"loss": 5.0118, |
|
"max_norm": 35.15137481689453, |
|
"max_norm/layer0": 35.15137481689453, |
|
"mean_norm": 32.11746788024902, |
|
"mean_norm/layer0": 32.11746788024902, |
|
"multicode_k": 1, |
|
"output_norm": 8.768607576688133, |
|
"output_norm/layer0": 8.768607576688133, |
|
"step": 100 |
|
}, |
|
{ |
|
"MSE": 841.8395769246417, |
|
"MSE/layer0": 841.8395769246417, |
|
"dead_code_fraction": 0.0, |
|
"dead_code_fraction/layer0": 0.0, |
|
"epoch": 0.01, |
|
"input_norm": 31.996588408152256, |
|
"input_norm/layer0": 31.996588408152256, |
|
"learning_rate": 0.0015, |
|
"loss": 4.2338, |
|
"max_norm": 35.45364761352539, |
|
"max_norm/layer0": 35.45364761352539, |
|
"mean_norm": 32.34040641784668, |
|
"mean_norm/layer0": 32.34040641784668, |
|
"multicode_k": 1, |
|
"output_norm": 9.237536044120787, |
|
"output_norm/layer0": 9.237536044120787, |
|
"step": 150 |
|
}, |
|
{ |
|
"MSE": 817.2703357950843, |
|
"MSE/layer0": 817.2703357950843, |
|
"dead_code_fraction": 5e-05, |
|
"dead_code_fraction/layer0": 5e-05, |
|
"epoch": 0.02, |
|
"input_norm": 31.99718633969625, |
|
"input_norm/layer0": 31.99718633969625, |
|
"learning_rate": 0.002, |
|
"loss": 3.837, |
|
"max_norm": 36.11206817626953, |
|
"max_norm/layer0": 36.11206817626953, |
|
"mean_norm": 32.652212142944336, |
|
"mean_norm/layer0": 32.652212142944336, |
|
"multicode_k": 1, |
|
"output_norm": 9.962018431027724, |
|
"output_norm/layer0": 9.962018431027724, |
|
"step": 200 |
|
}, |
|
{ |
|
"MSE": 787.4571261596678, |
|
"MSE/layer0": 787.4571261596678, |
|
"dead_code_fraction": 0.0028, |
|
"dead_code_fraction/layer0": 0.0028, |
|
"epoch": 0.03, |
|
"input_norm": 31.99750740687052, |
|
"input_norm/layer0": 31.99750740687052, |
|
"learning_rate": 0.0025, |
|
"loss": 3.5507, |
|
"max_norm": 37.03396987915039, |
|
"max_norm/layer0": 37.03396987915039, |
|
"mean_norm": 33.030792236328125, |
|
"mean_norm/layer0": 33.030792236328125, |
|
"multicode_k": 1, |
|
"output_norm": 11.101801137924198, |
|
"output_norm/layer0": 11.101801137924198, |
|
"step": 250 |
|
}, |
|
{ |
|
"MSE": 759.7653246053058, |
|
"MSE/layer0": 759.7653246053058, |
|
"dead_code_fraction": 0.02905, |
|
"dead_code_fraction/layer0": 0.02905, |
|
"epoch": 0.03, |
|
"input_norm": 31.99749964078267, |
|
"input_norm/layer0": 31.99749964078267, |
|
"learning_rate": 0.003, |
|
"loss": 3.3015, |
|
"max_norm": 37.927757263183594, |
|
"max_norm/layer0": 37.927757263183594, |
|
"mean_norm": 33.33859634399414, |
|
"mean_norm/layer0": 33.33859634399414, |
|
"multicode_k": 1, |
|
"output_norm": 12.222484871546431, |
|
"output_norm/layer0": 12.222484871546431, |
|
"step": 300 |
|
}, |
|
{ |
|
"MSE": 734.5841912841795, |
|
"MSE/layer0": 734.5841912841795, |
|
"dead_code_fraction": 0.06455, |
|
"dead_code_fraction/layer0": 0.06455, |
|
"epoch": 0.04, |
|
"input_norm": 31.99746166547139, |
|
"input_norm/layer0": 31.99746166547139, |
|
"learning_rate": 0.0034999999999999996, |
|
"loss": 3.1483, |
|
"max_norm": 40.570350646972656, |
|
"max_norm/layer0": 40.570350646972656, |
|
"mean_norm": 33.79829216003418, |
|
"mean_norm/layer0": 33.79829216003418, |
|
"multicode_k": 1, |
|
"output_norm": 13.233797086079917, |
|
"output_norm/layer0": 13.233797086079917, |
|
"step": 350 |
|
}, |
|
{ |
|
"MSE": 705.9179516601566, |
|
"MSE/layer0": 705.9179516601566, |
|
"dead_code_fraction": 0.13495, |
|
"dead_code_fraction/layer0": 0.13495, |
|
"epoch": 0.04, |
|
"input_norm": 31.997578941980994, |
|
"input_norm/layer0": 31.997578941980994, |
|
"learning_rate": 0.004, |
|
"loss": 3.0479, |
|
"max_norm": 45.86402130126953, |
|
"max_norm/layer0": 45.86402130126953, |
|
"mean_norm": 34.60604667663574, |
|
"mean_norm/layer0": 34.60604667663574, |
|
"multicode_k": 1, |
|
"output_norm": 14.794977650642394, |
|
"output_norm/layer0": 14.794977650642394, |
|
"step": 400 |
|
}, |
|
{ |
|
"MSE": 673.0142825317382, |
|
"MSE/layer0": 673.0142825317382, |
|
"dead_code_fraction": 0.236, |
|
"dead_code_fraction/layer0": 0.236, |
|
"epoch": 0.04, |
|
"input_norm": 31.99772956212363, |
|
"input_norm/layer0": 31.99772956212363, |
|
"learning_rate": 0.0045000000000000005, |
|
"loss": 2.9234, |
|
"max_norm": 50.35022735595703, |
|
"max_norm/layer0": 50.35022735595703, |
|
"mean_norm": 35.50743293762207, |
|
"mean_norm/layer0": 35.50743293762207, |
|
"multicode_k": 1, |
|
"output_norm": 16.412540513674415, |
|
"output_norm/layer0": 16.412540513674415, |
|
"step": 450 |
|
}, |
|
{ |
|
"MSE": 646.1952704874673, |
|
"MSE/layer0": 646.1952704874673, |
|
"dead_code_fraction": 0.31565, |
|
"dead_code_fraction/layer0": 0.31565, |
|
"epoch": 0.05, |
|
"input_norm": 31.997816743850702, |
|
"input_norm/layer0": 31.997816743850702, |
|
"learning_rate": 0.005, |
|
"loss": 2.8364, |
|
"max_norm": 55.06960678100586, |
|
"max_norm/layer0": 55.06960678100586, |
|
"mean_norm": 36.40013122558594, |
|
"mean_norm/layer0": 36.40013122558594, |
|
"multicode_k": 1, |
|
"output_norm": 17.61372879664104, |
|
"output_norm/layer0": 17.61372879664104, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_MSE/layer0": 634.8931657946682, |
|
"eval_accuracy": 0.42267877747562077, |
|
"eval_dead_code_fraction/layer0": 0.3619, |
|
"eval_input_norm/layer0": 31.9978586178746, |
|
"eval_loss": 2.7649216651916504, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 18.081893277070293, |
|
"eval_runtime": 157.3558, |
|
"eval_samples_per_second": 29.379, |
|
"eval_steps_per_second": 1.837, |
|
"step": 500 |
|
}, |
|
{ |
|
"MSE": 627.919213663737, |
|
"MSE/layer0": 627.919213663737, |
|
"dead_code_fraction": 0.35455, |
|
"dead_code_fraction/layer0": 0.35455, |
|
"epoch": 0.06, |
|
"input_norm": 31.997863556543983, |
|
"input_norm/layer0": 31.997863556543983, |
|
"learning_rate": 0.005, |
|
"loss": 2.6999, |
|
"max_norm": 59.44381332397461, |
|
"max_norm/layer0": 59.44381332397461, |
|
"mean_norm": 37.23677062988281, |
|
"mean_norm/layer0": 37.23677062988281, |
|
"multicode_k": 1, |
|
"output_norm": 18.411861616770416, |
|
"output_norm/layer0": 18.411861616770416, |
|
"step": 550 |
|
}, |
|
{ |
|
"MSE": 613.3249330647789, |
|
"MSE/layer0": 613.3249330647789, |
|
"dead_code_fraction": 0.38215, |
|
"dead_code_fraction/layer0": 0.38215, |
|
"epoch": 0.06, |
|
"input_norm": 31.99789684613545, |
|
"input_norm/layer0": 31.99789684613545, |
|
"learning_rate": 0.005, |
|
"loss": 2.6511, |
|
"max_norm": 66.23004150390625, |
|
"max_norm/layer0": 66.23004150390625, |
|
"mean_norm": 38.00171661376953, |
|
"mean_norm/layer0": 38.00171661376953, |
|
"multicode_k": 1, |
|
"output_norm": 18.973640613555915, |
|
"output_norm/layer0": 18.973640613555915, |
|
"step": 600 |
|
}, |
|
{ |
|
"MSE": 601.0688813273114, |
|
"MSE/layer0": 601.0688813273114, |
|
"dead_code_fraction": 0.3855, |
|
"dead_code_fraction/layer0": 0.3855, |
|
"epoch": 0.07, |
|
"input_norm": 31.997907568613698, |
|
"input_norm/layer0": 31.997907568613698, |
|
"learning_rate": 0.005, |
|
"loss": 2.5727, |
|
"max_norm": 72.61077117919922, |
|
"max_norm/layer0": 72.61077117919922, |
|
"mean_norm": 38.68782615661621, |
|
"mean_norm/layer0": 38.68782615661621, |
|
"multicode_k": 1, |
|
"output_norm": 19.389015719095863, |
|
"output_norm/layer0": 19.389015719095863, |
|
"step": 650 |
|
}, |
|
{ |
|
"MSE": 595.7544806925458, |
|
"MSE/layer0": 595.7544806925458, |
|
"dead_code_fraction": 0.3847, |
|
"dead_code_fraction/layer0": 0.3847, |
|
"epoch": 0.07, |
|
"input_norm": 31.99792699813842, |
|
"input_norm/layer0": 31.99792699813842, |
|
"learning_rate": 0.005, |
|
"loss": 2.5303, |
|
"max_norm": 77.1572036743164, |
|
"max_norm/layer0": 77.1572036743164, |
|
"mean_norm": 39.301788330078125, |
|
"mean_norm/layer0": 39.301788330078125, |
|
"multicode_k": 1, |
|
"output_norm": 19.654865121841446, |
|
"output_norm/layer0": 19.654865121841446, |
|
"step": 700 |
|
}, |
|
{ |
|
"MSE": 587.4001970418295, |
|
"MSE/layer0": 587.4001970418295, |
|
"dead_code_fraction": 0.38495, |
|
"dead_code_fraction/layer0": 0.38495, |
|
"epoch": 0.07, |
|
"input_norm": 31.997964229583737, |
|
"input_norm/layer0": 31.997964229583737, |
|
"learning_rate": 0.005, |
|
"loss": 2.5181, |
|
"max_norm": 81.00206756591797, |
|
"max_norm/layer0": 81.00206756591797, |
|
"mean_norm": 39.8663330078125, |
|
"mean_norm/layer0": 39.8663330078125, |
|
"multicode_k": 1, |
|
"output_norm": 19.91484704653422, |
|
"output_norm/layer0": 19.91484704653422, |
|
"step": 750 |
|
}, |
|
{ |
|
"MSE": 582.8578649902345, |
|
"MSE/layer0": 582.8578649902345, |
|
"dead_code_fraction": 0.37595, |
|
"dead_code_fraction/layer0": 0.37595, |
|
"epoch": 0.08, |
|
"input_norm": 31.997961613337196, |
|
"input_norm/layer0": 31.997961613337196, |
|
"learning_rate": 0.005, |
|
"loss": 2.488, |
|
"max_norm": 84.8564682006836, |
|
"max_norm/layer0": 84.8564682006836, |
|
"mean_norm": 40.41610145568848, |
|
"mean_norm/layer0": 40.41610145568848, |
|
"multicode_k": 1, |
|
"output_norm": 20.113984060287464, |
|
"output_norm/layer0": 20.113984060287464, |
|
"step": 800 |
|
}, |
|
{ |
|
"MSE": 578.7394322713219, |
|
"MSE/layer0": 578.7394322713219, |
|
"dead_code_fraction": 0.36775, |
|
"dead_code_fraction/layer0": 0.36775, |
|
"epoch": 0.09, |
|
"input_norm": 31.99793098767598, |
|
"input_norm/layer0": 31.99793098767598, |
|
"learning_rate": 0.005, |
|
"loss": 2.3972, |
|
"max_norm": 88.52584838867188, |
|
"max_norm/layer0": 88.52584838867188, |
|
"mean_norm": 40.93037033081055, |
|
"mean_norm/layer0": 40.93037033081055, |
|
"multicode_k": 1, |
|
"output_norm": 20.255761035283413, |
|
"output_norm/layer0": 20.255761035283413, |
|
"step": 850 |
|
}, |
|
{ |
|
"MSE": 574.7943645222981, |
|
"MSE/layer0": 574.7943645222981, |
|
"dead_code_fraction": 0.3752, |
|
"dead_code_fraction/layer0": 0.3752, |
|
"epoch": 0.09, |
|
"input_norm": 31.99794203122458, |
|
"input_norm/layer0": 31.99794203122458, |
|
"learning_rate": 0.005, |
|
"loss": 2.4475, |
|
"max_norm": 91.37139129638672, |
|
"max_norm/layer0": 91.37139129638672, |
|
"mean_norm": 41.42861366271973, |
|
"mean_norm/layer0": 41.42861366271973, |
|
"multicode_k": 1, |
|
"output_norm": 20.38246509869893, |
|
"output_norm/layer0": 20.38246509869893, |
|
"step": 900 |
|
}, |
|
{ |
|
"MSE": 572.0475691731768, |
|
"MSE/layer0": 572.0475691731768, |
|
"dead_code_fraction": 0.369, |
|
"dead_code_fraction/layer0": 0.369, |
|
"epoch": 0.1, |
|
"input_norm": 31.997947810490906, |
|
"input_norm/layer0": 31.997947810490906, |
|
"learning_rate": 0.005, |
|
"loss": 2.3928, |
|
"max_norm": 93.76451873779297, |
|
"max_norm/layer0": 93.76451873779297, |
|
"mean_norm": 41.89710807800293, |
|
"mean_norm/layer0": 41.89710807800293, |
|
"multicode_k": 1, |
|
"output_norm": 20.522438500722256, |
|
"output_norm/layer0": 20.522438500722256, |
|
"step": 950 |
|
}, |
|
{ |
|
"MSE": 571.223816274007, |
|
"MSE/layer0": 571.223816274007, |
|
"dead_code_fraction": 0.35845, |
|
"dead_code_fraction/layer0": 0.35845, |
|
"epoch": 0.1, |
|
"input_norm": 31.997930752436314, |
|
"input_norm/layer0": 31.997930752436314, |
|
"learning_rate": 0.005, |
|
"loss": 2.3611, |
|
"max_norm": 95.86876678466797, |
|
"max_norm/layer0": 95.86876678466797, |
|
"mean_norm": 42.36003875732422, |
|
"mean_norm/layer0": 42.36003875732422, |
|
"multicode_k": 1, |
|
"output_norm": 20.59194125175477, |
|
"output_norm/layer0": 20.59194125175477, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_MSE/layer0": 568.7263942209383, |
|
"eval_accuracy": 0.47120194006380184, |
|
"eval_dead_code_fraction/layer0": 0.36065, |
|
"eval_input_norm/layer0": 31.997911268824648, |
|
"eval_loss": 2.370492935180664, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 20.66302644662364, |
|
"eval_runtime": 157.3974, |
|
"eval_samples_per_second": 29.372, |
|
"eval_steps_per_second": 1.836, |
|
"step": 1000 |
|
}, |
|
{ |
|
"MSE": 568.216246948242, |
|
"MSE/layer0": 568.216246948242, |
|
"dead_code_fraction": 0.35655, |
|
"dead_code_fraction/layer0": 0.35655, |
|
"epoch": 0.1, |
|
"input_norm": 31.997933057149254, |
|
"input_norm/layer0": 31.997933057149254, |
|
"learning_rate": 0.005, |
|
"loss": 2.3877, |
|
"max_norm": 97.93981170654297, |
|
"max_norm/layer0": 97.93981170654297, |
|
"mean_norm": 42.796369552612305, |
|
"mean_norm/layer0": 42.796369552612305, |
|
"multicode_k": 1, |
|
"output_norm": 20.69294343630473, |
|
"output_norm/layer0": 20.69294343630473, |
|
"step": 1050 |
|
}, |
|
{ |
|
"MSE": 566.0765097045902, |
|
"MSE/layer0": 566.0765097045902, |
|
"dead_code_fraction": 0.3515, |
|
"dead_code_fraction/layer0": 0.3515, |
|
"epoch": 0.11, |
|
"input_norm": 31.997944701512658, |
|
"input_norm/layer0": 31.997944701512658, |
|
"learning_rate": 0.005, |
|
"loss": 2.32, |
|
"max_norm": 99.40829467773438, |
|
"max_norm/layer0": 99.40829467773438, |
|
"mean_norm": 43.20481872558594, |
|
"mean_norm/layer0": 43.20481872558594, |
|
"multicode_k": 1, |
|
"output_norm": 20.780460087458298, |
|
"output_norm/layer0": 20.780460087458298, |
|
"step": 1100 |
|
}, |
|
{ |
|
"MSE": 563.1435256449383, |
|
"MSE/layer0": 563.1435256449383, |
|
"dead_code_fraction": 0.3425, |
|
"dead_code_fraction/layer0": 0.3425, |
|
"epoch": 0.12, |
|
"input_norm": 31.99793601353964, |
|
"input_norm/layer0": 31.99793601353964, |
|
"learning_rate": 0.005, |
|
"loss": 2.3309, |
|
"max_norm": 100.84235382080078, |
|
"max_norm/layer0": 100.84235382080078, |
|
"mean_norm": 43.63128852844238, |
|
"mean_norm/layer0": 43.63128852844238, |
|
"multicode_k": 1, |
|
"output_norm": 20.85479287147521, |
|
"output_norm/layer0": 20.85479287147521, |
|
"step": 1150 |
|
}, |
|
{ |
|
"MSE": 561.2093427530926, |
|
"MSE/layer0": 561.2093427530926, |
|
"dead_code_fraction": 0.3403, |
|
"dead_code_fraction/layer0": 0.3403, |
|
"epoch": 0.12, |
|
"input_norm": 31.99792865435282, |
|
"input_norm/layer0": 31.99792865435282, |
|
"learning_rate": 0.005, |
|
"loss": 2.3308, |
|
"max_norm": 102.74110412597656, |
|
"max_norm/layer0": 102.74110412597656, |
|
"mean_norm": 44.03978157043457, |
|
"mean_norm/layer0": 44.03978157043457, |
|
"multicode_k": 1, |
|
"output_norm": 20.931864147186282, |
|
"output_norm/layer0": 20.931864147186282, |
|
"step": 1200 |
|
}, |
|
{ |
|
"MSE": 559.3785518391925, |
|
"MSE/layer0": 559.3785518391925, |
|
"dead_code_fraction": 0.3412, |
|
"dead_code_fraction/layer0": 0.3412, |
|
"epoch": 0.12, |
|
"input_norm": 31.99792771339417, |
|
"input_norm/layer0": 31.99792771339417, |
|
"learning_rate": 0.005, |
|
"loss": 2.3437, |
|
"max_norm": 104.6494369506836, |
|
"max_norm/layer0": 104.6494369506836, |
|
"mean_norm": 44.438026428222656, |
|
"mean_norm/layer0": 44.438026428222656, |
|
"multicode_k": 1, |
|
"output_norm": 21.008427244822187, |
|
"output_norm/layer0": 21.008427244822187, |
|
"step": 1250 |
|
}, |
|
{ |
|
"MSE": 557.9434753417968, |
|
"MSE/layer0": 557.9434753417968, |
|
"dead_code_fraction": 0.33015, |
|
"dead_code_fraction/layer0": 0.33015, |
|
"epoch": 0.13, |
|
"input_norm": 31.997915770212824, |
|
"input_norm/layer0": 31.997915770212824, |
|
"learning_rate": 0.005, |
|
"loss": 2.2785, |
|
"max_norm": 106.27558135986328, |
|
"max_norm/layer0": 106.27558135986328, |
|
"mean_norm": 44.82562255859375, |
|
"mean_norm/layer0": 44.82562255859375, |
|
"multicode_k": 1, |
|
"output_norm": 21.07806761741638, |
|
"output_norm/layer0": 21.07806761741638, |
|
"step": 1300 |
|
}, |
|
{ |
|
"MSE": 556.1554424031574, |
|
"MSE/layer0": 556.1554424031574, |
|
"dead_code_fraction": 0.3277, |
|
"dead_code_fraction/layer0": 0.3277, |
|
"epoch": 0.14, |
|
"input_norm": 31.9979051399231, |
|
"input_norm/layer0": 31.9979051399231, |
|
"learning_rate": 0.005, |
|
"loss": 2.2823, |
|
"max_norm": 107.8658676147461, |
|
"max_norm/layer0": 107.8658676147461, |
|
"mean_norm": 45.21988105773926, |
|
"mean_norm/layer0": 45.21988105773926, |
|
"multicode_k": 1, |
|
"output_norm": 21.124666048685715, |
|
"output_norm/layer0": 21.124666048685715, |
|
"step": 1350 |
|
}, |
|
{ |
|
"MSE": 554.1472004191082, |
|
"MSE/layer0": 554.1472004191082, |
|
"dead_code_fraction": 0.32535, |
|
"dead_code_fraction/layer0": 0.32535, |
|
"epoch": 0.14, |
|
"input_norm": 31.99791674613953, |
|
"input_norm/layer0": 31.99791674613953, |
|
"learning_rate": 0.005, |
|
"loss": 2.3034, |
|
"max_norm": 109.18831634521484, |
|
"max_norm/layer0": 109.18831634521484, |
|
"mean_norm": 45.60391616821289, |
|
"mean_norm/layer0": 45.60391616821289, |
|
"multicode_k": 1, |
|
"output_norm": 21.184103918075557, |
|
"output_norm/layer0": 21.184103918075557, |
|
"step": 1400 |
|
}, |
|
{ |
|
"MSE": 553.0813423156735, |
|
"MSE/layer0": 553.0813423156735, |
|
"dead_code_fraction": 0.3218, |
|
"dead_code_fraction/layer0": 0.3218, |
|
"epoch": 0.14, |
|
"input_norm": 31.997899109522507, |
|
"input_norm/layer0": 31.997899109522507, |
|
"learning_rate": 0.005, |
|
"loss": 2.2583, |
|
"max_norm": 110.68695831298828, |
|
"max_norm/layer0": 110.68695831298828, |
|
"mean_norm": 45.98097801208496, |
|
"mean_norm/layer0": 45.98097801208496, |
|
"multicode_k": 1, |
|
"output_norm": 21.234303328196226, |
|
"output_norm/layer0": 21.234303328196226, |
|
"step": 1450 |
|
}, |
|
{ |
|
"MSE": 551.1942003377276, |
|
"MSE/layer0": 551.1942003377276, |
|
"dead_code_fraction": 0.32175, |
|
"dead_code_fraction/layer0": 0.32175, |
|
"epoch": 0.15, |
|
"input_norm": 31.997910699844365, |
|
"input_norm/layer0": 31.997910699844365, |
|
"learning_rate": 0.005, |
|
"loss": 2.2395, |
|
"max_norm": 112.16923522949219, |
|
"max_norm/layer0": 112.16923522949219, |
|
"mean_norm": 46.355411529541016, |
|
"mean_norm/layer0": 46.355411529541016, |
|
"multicode_k": 1, |
|
"output_norm": 21.303704795837398, |
|
"output_norm/layer0": 21.303704795837398, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_MSE/layer0": 550.3311246673497, |
|
"eval_accuracy": 0.486590169556823, |
|
"eval_dead_code_fraction/layer0": 0.32665, |
|
"eval_input_norm/layer0": 31.99789719372221, |
|
"eval_loss": 2.253082513809204, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 21.329729693291277, |
|
"eval_runtime": 160.4576, |
|
"eval_samples_per_second": 28.811, |
|
"eval_steps_per_second": 1.801, |
|
"step": 1500 |
|
}, |
|
{ |
|
"MSE": 551.4163179524738, |
|
"MSE/layer0": 551.4163179524738, |
|
"dead_code_fraction": 0.3174, |
|
"dead_code_fraction/layer0": 0.3174, |
|
"epoch": 0.15, |
|
"input_norm": 31.997892700831095, |
|
"input_norm/layer0": 31.997892700831095, |
|
"learning_rate": 0.005, |
|
"loss": 2.1968, |
|
"max_norm": 113.21269989013672, |
|
"max_norm/layer0": 113.21269989013672, |
|
"mean_norm": 46.7271785736084, |
|
"mean_norm/layer0": 46.7271785736084, |
|
"multicode_k": 1, |
|
"output_norm": 21.321544698079432, |
|
"output_norm/layer0": 21.321544698079432, |
|
"step": 1550 |
|
}, |
|
{ |
|
"MSE": 549.0553175354001, |
|
"MSE/layer0": 549.0553175354001, |
|
"dead_code_fraction": 0.31715, |
|
"dead_code_fraction/layer0": 0.31715, |
|
"epoch": 0.16, |
|
"input_norm": 31.99791664441427, |
|
"input_norm/layer0": 31.99791664441427, |
|
"learning_rate": 0.005, |
|
"loss": 2.2863, |
|
"max_norm": 114.45362854003906, |
|
"max_norm/layer0": 114.45362854003906, |
|
"mean_norm": 47.08230972290039, |
|
"mean_norm/layer0": 47.08230972290039, |
|
"multicode_k": 1, |
|
"output_norm": 21.38125430742899, |
|
"output_norm/layer0": 21.38125430742899, |
|
"step": 1600 |
|
}, |
|
{ |
|
"MSE": 547.4109810384114, |
|
"MSE/layer0": 547.4109810384114, |
|
"dead_code_fraction": 0.3131, |
|
"dead_code_fraction/layer0": 0.3131, |
|
"epoch": 0.17, |
|
"input_norm": 31.997924680709843, |
|
"input_norm/layer0": 31.997924680709843, |
|
"learning_rate": 0.005, |
|
"loss": 2.2147, |
|
"max_norm": 115.29362487792969, |
|
"max_norm/layer0": 115.29362487792969, |
|
"mean_norm": 47.438798904418945, |
|
"mean_norm/layer0": 47.438798904418945, |
|
"multicode_k": 1, |
|
"output_norm": 21.454637037913013, |
|
"output_norm/layer0": 21.454637037913013, |
|
"step": 1650 |
|
}, |
|
{ |
|
"MSE": 546.0445864868163, |
|
"MSE/layer0": 546.0445864868163, |
|
"dead_code_fraction": 0.31475, |
|
"dead_code_fraction/layer0": 0.31475, |
|
"epoch": 0.17, |
|
"input_norm": 31.997929503122954, |
|
"input_norm/layer0": 31.997929503122954, |
|
"learning_rate": 0.005, |
|
"loss": 2.2501, |
|
"max_norm": 116.09871673583984, |
|
"max_norm/layer0": 116.09871673583984, |
|
"mean_norm": 47.79398536682129, |
|
"mean_norm/layer0": 47.79398536682129, |
|
"multicode_k": 1, |
|
"output_norm": 21.4808695602417, |
|
"output_norm/layer0": 21.4808695602417, |
|
"step": 1700 |
|
}, |
|
{ |
|
"MSE": 545.4600128173831, |
|
"MSE/layer0": 545.4600128173831, |
|
"dead_code_fraction": 0.30905, |
|
"dead_code_fraction/layer0": 0.30905, |
|
"epoch": 0.17, |
|
"input_norm": 31.997937501271572, |
|
"input_norm/layer0": 31.997937501271572, |
|
"learning_rate": 0.005, |
|
"loss": 2.2296, |
|
"max_norm": 117.0920181274414, |
|
"max_norm/layer0": 117.0920181274414, |
|
"mean_norm": 48.138267517089844, |
|
"mean_norm/layer0": 48.138267517089844, |
|
"multicode_k": 1, |
|
"output_norm": 21.52623297691346, |
|
"output_norm/layer0": 21.52623297691346, |
|
"step": 1750 |
|
}, |
|
{ |
|
"MSE": 543.9589634704591, |
|
"MSE/layer0": 543.9589634704591, |
|
"dead_code_fraction": 0.3074, |
|
"dead_code_fraction/layer0": 0.3074, |
|
"epoch": 0.18, |
|
"input_norm": 31.997916940053315, |
|
"input_norm/layer0": 31.997916940053315, |
|
"learning_rate": 0.005, |
|
"loss": 2.1632, |
|
"max_norm": 118.44883728027344, |
|
"max_norm/layer0": 118.44883728027344, |
|
"mean_norm": 48.48598670959473, |
|
"mean_norm/layer0": 48.48598670959473, |
|
"multicode_k": 1, |
|
"output_norm": 21.572722558975222, |
|
"output_norm/layer0": 21.572722558975222, |
|
"step": 1800 |
|
}, |
|
{ |
|
"MSE": 543.3154680887858, |
|
"MSE/layer0": 543.3154680887858, |
|
"dead_code_fraction": 0.30485, |
|
"dead_code_fraction/layer0": 0.30485, |
|
"epoch": 0.18, |
|
"input_norm": 31.997930173873904, |
|
"input_norm/layer0": 31.997930173873904, |
|
"learning_rate": 0.005, |
|
"loss": 2.1874, |
|
"max_norm": 119.3927001953125, |
|
"max_norm/layer0": 119.3927001953125, |
|
"mean_norm": 48.82695388793945, |
|
"mean_norm/layer0": 48.82695388793945, |
|
"multicode_k": 1, |
|
"output_norm": 21.595847959518437, |
|
"output_norm/layer0": 21.595847959518437, |
|
"step": 1850 |
|
}, |
|
{ |
|
"MSE": 542.2137928263345, |
|
"MSE/layer0": 542.2137928263345, |
|
"dead_code_fraction": 0.30715, |
|
"dead_code_fraction/layer0": 0.30715, |
|
"epoch": 0.19, |
|
"input_norm": 31.997955802281705, |
|
"input_norm/layer0": 31.997955802281705, |
|
"learning_rate": 0.005, |
|
"loss": 2.2323, |
|
"max_norm": 121.5817642211914, |
|
"max_norm/layer0": 121.5817642211914, |
|
"mean_norm": 49.15649604797363, |
|
"mean_norm/layer0": 49.15649604797363, |
|
"multicode_k": 1, |
|
"output_norm": 21.63884919484457, |
|
"output_norm/layer0": 21.63884919484457, |
|
"step": 1900 |
|
}, |
|
{ |
|
"MSE": 539.4505286661786, |
|
"MSE/layer0": 539.4505286661786, |
|
"dead_code_fraction": 0.3033, |
|
"dead_code_fraction/layer0": 0.3033, |
|
"epoch": 0.2, |
|
"input_norm": 31.997942549387595, |
|
"input_norm/layer0": 31.997942549387595, |
|
"learning_rate": 0.005, |
|
"loss": 2.1894, |
|
"max_norm": 123.63184356689453, |
|
"max_norm/layer0": 123.63184356689453, |
|
"mean_norm": 49.49074363708496, |
|
"mean_norm/layer0": 49.49074363708496, |
|
"multicode_k": 1, |
|
"output_norm": 21.689245723088575, |
|
"output_norm/layer0": 21.689245723088575, |
|
"step": 1950 |
|
}, |
|
{ |
|
"MSE": 539.8872321573892, |
|
"MSE/layer0": 539.8872321573892, |
|
"dead_code_fraction": 0.29975, |
|
"dead_code_fraction/layer0": 0.29975, |
|
"epoch": 0.2, |
|
"input_norm": 31.997952289581303, |
|
"input_norm/layer0": 31.997952289581303, |
|
"learning_rate": 0.005, |
|
"loss": 2.1999, |
|
"max_norm": 125.97776794433594, |
|
"max_norm/layer0": 125.97776794433594, |
|
"mean_norm": 49.814876556396484, |
|
"mean_norm/layer0": 49.814876556396484, |
|
"multicode_k": 1, |
|
"output_norm": 21.72016517957053, |
|
"output_norm/layer0": 21.72016517957053, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_MSE/layer0": 539.0149815035619, |
|
"eval_accuracy": 0.4955417565578542, |
|
"eval_dead_code_fraction/layer0": 0.30475, |
|
"eval_input_norm/layer0": 31.997959356660743, |
|
"eval_loss": 2.1908392906188965, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 21.766283075917258, |
|
"eval_runtime": 158.005, |
|
"eval_samples_per_second": 29.259, |
|
"eval_steps_per_second": 1.829, |
|
"step": 2000 |
|
}, |
|
{ |
|
"MSE": 538.042401936849, |
|
"MSE/layer0": 538.042401936849, |
|
"dead_code_fraction": 0.30175, |
|
"dead_code_fraction/layer0": 0.30175, |
|
"epoch": 0.2, |
|
"input_norm": 31.99795293172201, |
|
"input_norm/layer0": 31.99795293172201, |
|
"learning_rate": 0.005, |
|
"loss": 2.1768, |
|
"max_norm": 127.91316986083984, |
|
"max_norm/layer0": 127.91316986083984, |
|
"mean_norm": 50.13774490356445, |
|
"mean_norm/layer0": 50.13774490356445, |
|
"multicode_k": 1, |
|
"output_norm": 21.768677377700797, |
|
"output_norm/layer0": 21.768677377700797, |
|
"step": 2050 |
|
}, |
|
{ |
|
"MSE": 537.4139138285318, |
|
"MSE/layer0": 537.4139138285318, |
|
"dead_code_fraction": 0.29605, |
|
"dead_code_fraction/layer0": 0.29605, |
|
"epoch": 0.21, |
|
"input_norm": 31.997954098383584, |
|
"input_norm/layer0": 31.997954098383584, |
|
"learning_rate": 0.005, |
|
"loss": 2.1417, |
|
"max_norm": 129.62669372558594, |
|
"max_norm/layer0": 129.62669372558594, |
|
"mean_norm": 50.44980430603027, |
|
"mean_norm/layer0": 50.44980430603027, |
|
"multicode_k": 1, |
|
"output_norm": 21.780523262023927, |
|
"output_norm/layer0": 21.780523262023927, |
|
"step": 2100 |
|
}, |
|
{ |
|
"MSE": 537.116479644775, |
|
"MSE/layer0": 537.116479644775, |
|
"dead_code_fraction": 0.298, |
|
"dead_code_fraction/layer0": 0.298, |
|
"epoch": 0.21, |
|
"input_norm": 31.99796335220337, |
|
"input_norm/layer0": 31.99796335220337, |
|
"learning_rate": 0.005, |
|
"loss": 2.1753, |
|
"max_norm": 131.71987915039062, |
|
"max_norm/layer0": 131.71987915039062, |
|
"mean_norm": 50.758169174194336, |
|
"mean_norm/layer0": 50.758169174194336, |
|
"multicode_k": 1, |
|
"output_norm": 21.819064016342168, |
|
"output_norm/layer0": 21.819064016342168, |
|
"step": 2150 |
|
}, |
|
{ |
|
"MSE": 536.1095620218915, |
|
"MSE/layer0": 536.1095620218915, |
|
"dead_code_fraction": 0.29655, |
|
"dead_code_fraction/layer0": 0.29655, |
|
"epoch": 0.22, |
|
"input_norm": 31.997976016998287, |
|
"input_norm/layer0": 31.997976016998287, |
|
"learning_rate": 0.005, |
|
"loss": 2.1676, |
|
"max_norm": 133.67694091796875, |
|
"max_norm/layer0": 133.67694091796875, |
|
"mean_norm": 51.058135986328125, |
|
"mean_norm/layer0": 51.058135986328125, |
|
"multicode_k": 1, |
|
"output_norm": 21.83967799504598, |
|
"output_norm/layer0": 21.83967799504598, |
|
"step": 2200 |
|
}, |
|
{ |
|
"MSE": 535.5964339701336, |
|
"MSE/layer0": 535.5964339701336, |
|
"dead_code_fraction": 0.2945, |
|
"dead_code_fraction/layer0": 0.2945, |
|
"epoch": 0.23, |
|
"input_norm": 31.997973279952987, |
|
"input_norm/layer0": 31.997973279952987, |
|
"learning_rate": 0.005, |
|
"loss": 2.1347, |
|
"max_norm": 135.40386962890625, |
|
"max_norm/layer0": 135.40386962890625, |
|
"mean_norm": 51.35656929016113, |
|
"mean_norm/layer0": 51.35656929016113, |
|
"multicode_k": 1, |
|
"output_norm": 21.857111148834242, |
|
"output_norm/layer0": 21.857111148834242, |
|
"step": 2250 |
|
}, |
|
{ |
|
"MSE": 534.8214352925618, |
|
"MSE/layer0": 534.8214352925618, |
|
"dead_code_fraction": 0.2943, |
|
"dead_code_fraction/layer0": 0.2943, |
|
"epoch": 0.23, |
|
"input_norm": 31.99798300425212, |
|
"input_norm/layer0": 31.99798300425212, |
|
"learning_rate": 0.005, |
|
"loss": 2.1712, |
|
"max_norm": 137.13648986816406, |
|
"max_norm/layer0": 137.13648986816406, |
|
"mean_norm": 51.64659118652344, |
|
"mean_norm/layer0": 51.64659118652344, |
|
"multicode_k": 1, |
|
"output_norm": 21.901708291371662, |
|
"output_norm/layer0": 21.901708291371662, |
|
"step": 2300 |
|
}, |
|
{ |
|
"MSE": 533.4100613403319, |
|
"MSE/layer0": 533.4100613403319, |
|
"dead_code_fraction": 0.29105, |
|
"dead_code_fraction/layer0": 0.29105, |
|
"epoch": 0.23, |
|
"input_norm": 31.99798559824626, |
|
"input_norm/layer0": 31.99798559824626, |
|
"learning_rate": 0.005, |
|
"loss": 2.1726, |
|
"max_norm": 138.62417602539062, |
|
"max_norm/layer0": 138.62417602539062, |
|
"mean_norm": 51.931190490722656, |
|
"mean_norm/layer0": 51.931190490722656, |
|
"multicode_k": 1, |
|
"output_norm": 21.91944276809694, |
|
"output_norm/layer0": 21.91944276809694, |
|
"step": 2350 |
|
}, |
|
{ |
|
"MSE": 533.0944277445471, |
|
"MSE/layer0": 533.0944277445471, |
|
"dead_code_fraction": 0.29235, |
|
"dead_code_fraction/layer0": 0.29235, |
|
"epoch": 0.24, |
|
"input_norm": 31.99797873497009, |
|
"input_norm/layer0": 31.99797873497009, |
|
"learning_rate": 0.005, |
|
"loss": 2.1496, |
|
"max_norm": 140.219970703125, |
|
"max_norm/layer0": 140.219970703125, |
|
"mean_norm": 52.213850021362305, |
|
"mean_norm/layer0": 52.213850021362305, |
|
"multicode_k": 1, |
|
"output_norm": 21.941968046824137, |
|
"output_norm/layer0": 21.941968046824137, |
|
"step": 2400 |
|
}, |
|
{ |
|
"MSE": 531.4289741007487, |
|
"MSE/layer0": 531.4289741007487, |
|
"dead_code_fraction": 0.29335, |
|
"dead_code_fraction/layer0": 0.29335, |
|
"epoch": 0.24, |
|
"input_norm": 31.998000961939493, |
|
"input_norm/layer0": 31.998000961939493, |
|
"learning_rate": 0.005, |
|
"loss": 2.153, |
|
"max_norm": 141.84396362304688, |
|
"max_norm/layer0": 141.84396362304688, |
|
"mean_norm": 52.47932052612305, |
|
"mean_norm/layer0": 52.47932052612305, |
|
"multicode_k": 1, |
|
"output_norm": 21.982840156555177, |
|
"output_norm/layer0": 21.982840156555177, |
|
"step": 2450 |
|
}, |
|
{ |
|
"MSE": 531.2627974446617, |
|
"MSE/layer0": 531.2627974446617, |
|
"dead_code_fraction": 0.28885, |
|
"dead_code_fraction/layer0": 0.28885, |
|
"epoch": 0.25, |
|
"input_norm": 31.99799962997436, |
|
"input_norm/layer0": 31.99799962997436, |
|
"learning_rate": 0.005, |
|
"loss": 2.1688, |
|
"max_norm": 143.0140838623047, |
|
"max_norm/layer0": 143.0140838623047, |
|
"mean_norm": 52.74382019042969, |
|
"mean_norm/layer0": 52.74382019042969, |
|
"multicode_k": 1, |
|
"output_norm": 22.00004559199015, |
|
"output_norm/layer0": 22.00004559199015, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_MSE/layer0": 530.4651256365718, |
|
"eval_accuracy": 0.5006363482007701, |
|
"eval_dead_code_fraction/layer0": 0.29495, |
|
"eval_input_norm/layer0": 31.99800563596064, |
|
"eval_loss": 2.155103921890259, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.022818533393835, |
|
"eval_runtime": 157.5009, |
|
"eval_samples_per_second": 29.352, |
|
"eval_steps_per_second": 1.835, |
|
"step": 2500 |
|
}, |
|
{ |
|
"MSE": 530.4989952596026, |
|
"MSE/layer0": 530.4989952596026, |
|
"dead_code_fraction": 0.29025, |
|
"dead_code_fraction/layer0": 0.29025, |
|
"epoch": 0.26, |
|
"input_norm": 31.99801852544149, |
|
"input_norm/layer0": 31.99801852544149, |
|
"learning_rate": 0.005, |
|
"loss": 2.1541, |
|
"max_norm": 144.50558471679688, |
|
"max_norm/layer0": 144.50558471679688, |
|
"mean_norm": 52.992868423461914, |
|
"mean_norm/layer0": 52.992868423461914, |
|
"multicode_k": 1, |
|
"output_norm": 22.044915301005062, |
|
"output_norm/layer0": 22.044915301005062, |
|
"step": 2550 |
|
}, |
|
{ |
|
"MSE": 529.2955647786457, |
|
"MSE/layer0": 529.2955647786457, |
|
"dead_code_fraction": 0.288, |
|
"dead_code_fraction/layer0": 0.288, |
|
"epoch": 0.26, |
|
"input_norm": 31.998021106719975, |
|
"input_norm/layer0": 31.998021106719975, |
|
"learning_rate": 0.005, |
|
"loss": 2.1563, |
|
"max_norm": 146.2478485107422, |
|
"max_norm/layer0": 146.2478485107422, |
|
"mean_norm": 53.24601364135742, |
|
"mean_norm/layer0": 53.24601364135742, |
|
"multicode_k": 1, |
|
"output_norm": 22.048689235051476, |
|
"output_norm/layer0": 22.048689235051476, |
|
"step": 2600 |
|
}, |
|
{ |
|
"MSE": 529.877343190511, |
|
"MSE/layer0": 529.877343190511, |
|
"dead_code_fraction": 0.288, |
|
"dead_code_fraction/layer0": 0.288, |
|
"epoch": 0.27, |
|
"input_norm": 31.998024587631217, |
|
"input_norm/layer0": 31.998024587631217, |
|
"learning_rate": 0.005, |
|
"loss": 2.1382, |
|
"max_norm": 147.41587829589844, |
|
"max_norm/layer0": 147.41587829589844, |
|
"mean_norm": 53.48561096191406, |
|
"mean_norm/layer0": 53.48561096191406, |
|
"multicode_k": 1, |
|
"output_norm": 22.0797532526652, |
|
"output_norm/layer0": 22.0797532526652, |
|
"step": 2650 |
|
}, |
|
{ |
|
"MSE": 528.3514750671387, |
|
"MSE/layer0": 528.3514750671387, |
|
"dead_code_fraction": 0.28825, |
|
"dead_code_fraction/layer0": 0.28825, |
|
"epoch": 0.27, |
|
"input_norm": 31.99804752349852, |
|
"input_norm/layer0": 31.99804752349852, |
|
"learning_rate": 0.005, |
|
"loss": 2.1742, |
|
"max_norm": 148.7862091064453, |
|
"max_norm/layer0": 148.7862091064453, |
|
"mean_norm": 53.71611213684082, |
|
"mean_norm/layer0": 53.71611213684082, |
|
"multicode_k": 1, |
|
"output_norm": 22.09869578997295, |
|
"output_norm/layer0": 22.09869578997295, |
|
"step": 2700 |
|
}, |
|
{ |
|
"MSE": 528.2884072875979, |
|
"MSE/layer0": 528.2884072875979, |
|
"dead_code_fraction": 0.28335, |
|
"dead_code_fraction/layer0": 0.28335, |
|
"epoch": 0.28, |
|
"input_norm": 31.998042856852216, |
|
"input_norm/layer0": 31.998042856852216, |
|
"learning_rate": 0.005, |
|
"loss": 2.1277, |
|
"max_norm": 150.35140991210938, |
|
"max_norm/layer0": 150.35140991210938, |
|
"mean_norm": 53.946285247802734, |
|
"mean_norm/layer0": 53.946285247802734, |
|
"multicode_k": 1, |
|
"output_norm": 22.106029316584255, |
|
"output_norm/layer0": 22.106029316584255, |
|
"step": 2750 |
|
}, |
|
{ |
|
"MSE": 527.2996965026854, |
|
"MSE/layer0": 527.2996965026854, |
|
"dead_code_fraction": 0.2844, |
|
"dead_code_fraction/layer0": 0.2844, |
|
"epoch": 0.28, |
|
"input_norm": 31.9980613454183, |
|
"input_norm/layer0": 31.9980613454183, |
|
"learning_rate": 0.005, |
|
"loss": 2.1676, |
|
"max_norm": 152.27590942382812, |
|
"max_norm/layer0": 152.27590942382812, |
|
"mean_norm": 54.16430473327637, |
|
"mean_norm/layer0": 54.16430473327637, |
|
"multicode_k": 1, |
|
"output_norm": 22.141783040364587, |
|
"output_norm/layer0": 22.141783040364587, |
|
"step": 2800 |
|
}, |
|
{ |
|
"MSE": 527.5191156514486, |
|
"MSE/layer0": 527.5191156514486, |
|
"dead_code_fraction": 0.28045, |
|
"dead_code_fraction/layer0": 0.28045, |
|
"epoch": 0.28, |
|
"input_norm": 31.998067801793418, |
|
"input_norm/layer0": 31.998067801793418, |
|
"learning_rate": 0.005, |
|
"loss": 2.1076, |
|
"max_norm": 153.54779052734375, |
|
"max_norm/layer0": 153.54779052734375, |
|
"mean_norm": 54.38737678527832, |
|
"mean_norm/layer0": 54.38737678527832, |
|
"multicode_k": 1, |
|
"output_norm": 22.13956375757853, |
|
"output_norm/layer0": 22.13956375757853, |
|
"step": 2850 |
|
}, |
|
{ |
|
"MSE": 527.3752633666991, |
|
"MSE/layer0": 527.3752633666991, |
|
"dead_code_fraction": 0.28165, |
|
"dead_code_fraction/layer0": 0.28165, |
|
"epoch": 0.29, |
|
"input_norm": 31.998070557912186, |
|
"input_norm/layer0": 31.998070557912186, |
|
"learning_rate": 0.005, |
|
"loss": 2.1379, |
|
"max_norm": 155.25857543945312, |
|
"max_norm/layer0": 155.25857543945312, |
|
"mean_norm": 54.598867416381836, |
|
"mean_norm/layer0": 54.598867416381836, |
|
"multicode_k": 1, |
|
"output_norm": 22.1554997475942, |
|
"output_norm/layer0": 22.1554997475942, |
|
"step": 2900 |
|
}, |
|
{ |
|
"MSE": 525.2142114257812, |
|
"MSE/layer0": 525.2142114257812, |
|
"dead_code_fraction": 0.2841, |
|
"dead_code_fraction/layer0": 0.2841, |
|
"epoch": 0.29, |
|
"input_norm": 31.998104591369632, |
|
"input_norm/layer0": 31.998104591369632, |
|
"learning_rate": 0.005, |
|
"loss": 2.1887, |
|
"max_norm": 157.656494140625, |
|
"max_norm/layer0": 157.656494140625, |
|
"mean_norm": 54.80296516418457, |
|
"mean_norm/layer0": 54.80296516418457, |
|
"multicode_k": 1, |
|
"output_norm": 22.194608500798537, |
|
"output_norm/layer0": 22.194608500798537, |
|
"step": 2950 |
|
}, |
|
{ |
|
"MSE": 525.7639581807456, |
|
"MSE/layer0": 525.7639581807456, |
|
"dead_code_fraction": 0.28035, |
|
"dead_code_fraction/layer0": 0.28035, |
|
"epoch": 0.3, |
|
"input_norm": 31.998085311253874, |
|
"input_norm/layer0": 31.998085311253874, |
|
"learning_rate": 0.005, |
|
"loss": 2.1108, |
|
"max_norm": 159.0706787109375, |
|
"max_norm/layer0": 159.0706787109375, |
|
"mean_norm": 55.01374816894531, |
|
"mean_norm/layer0": 55.01374816894531, |
|
"multicode_k": 1, |
|
"output_norm": 22.19143549601236, |
|
"output_norm/layer0": 22.19143549601236, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_MSE/layer0": 524.9529532255765, |
|
"eval_accuracy": 0.5051228197488481, |
|
"eval_dead_code_fraction/layer0": 0.2809, |
|
"eval_input_norm/layer0": 31.998092802783354, |
|
"eval_loss": 2.126948595046997, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.207113418645122, |
|
"eval_runtime": 157.5523, |
|
"eval_samples_per_second": 29.343, |
|
"eval_steps_per_second": 1.834, |
|
"step": 3000 |
|
}, |
|
{ |
|
"MSE": 525.49979405721, |
|
"MSE/layer0": 525.49979405721, |
|
"dead_code_fraction": 0.28015, |
|
"dead_code_fraction/layer0": 0.28015, |
|
"epoch": 0.3, |
|
"input_norm": 31.998098812103272, |
|
"input_norm/layer0": 31.998098812103272, |
|
"learning_rate": 0.005, |
|
"loss": 2.1814, |
|
"max_norm": 160.52183532714844, |
|
"max_norm/layer0": 160.52183532714844, |
|
"mean_norm": 55.21175575256348, |
|
"mean_norm/layer0": 55.21175575256348, |
|
"multicode_k": 1, |
|
"output_norm": 22.205291633605963, |
|
"output_norm/layer0": 22.205291633605963, |
|
"step": 3050 |
|
}, |
|
{ |
|
"MSE": 525.1535092671712, |
|
"MSE/layer0": 525.1535092671712, |
|
"dead_code_fraction": 0.27915, |
|
"dead_code_fraction/layer0": 0.27915, |
|
"epoch": 0.31, |
|
"input_norm": 31.998094654083246, |
|
"input_norm/layer0": 31.998094654083246, |
|
"learning_rate": 0.005, |
|
"loss": 2.1228, |
|
"max_norm": 161.857666015625, |
|
"max_norm/layer0": 161.857666015625, |
|
"mean_norm": 55.42117881774902, |
|
"mean_norm/layer0": 55.42117881774902, |
|
"multicode_k": 1, |
|
"output_norm": 22.20783314704896, |
|
"output_norm/layer0": 22.20783314704896, |
|
"step": 3100 |
|
}, |
|
{ |
|
"MSE": 524.7367662556965, |
|
"MSE/layer0": 524.7367662556965, |
|
"dead_code_fraction": 0.27865, |
|
"dead_code_fraction/layer0": 0.27865, |
|
"epoch": 0.32, |
|
"input_norm": 31.99810951550802, |
|
"input_norm/layer0": 31.99810951550802, |
|
"learning_rate": 0.005, |
|
"loss": 2.1582, |
|
"max_norm": 163.2421417236328, |
|
"max_norm/layer0": 163.2421417236328, |
|
"mean_norm": 55.61536979675293, |
|
"mean_norm/layer0": 55.61536979675293, |
|
"multicode_k": 1, |
|
"output_norm": 22.229626963933313, |
|
"output_norm/layer0": 22.229626963933313, |
|
"step": 3150 |
|
}, |
|
{ |
|
"MSE": 523.2996738688151, |
|
"MSE/layer0": 523.2996738688151, |
|
"dead_code_fraction": 0.27935, |
|
"dead_code_fraction/layer0": 0.27935, |
|
"epoch": 0.32, |
|
"input_norm": 31.998107938766474, |
|
"input_norm/layer0": 31.998107938766474, |
|
"learning_rate": 0.005, |
|
"loss": 2.0913, |
|
"max_norm": 164.34832763671875, |
|
"max_norm/layer0": 164.34832763671875, |
|
"mean_norm": 55.82136154174805, |
|
"mean_norm/layer0": 55.82136154174805, |
|
"multicode_k": 1, |
|
"output_norm": 22.247861604690552, |
|
"output_norm/layer0": 22.247861604690552, |
|
"step": 3200 |
|
}, |
|
{ |
|
"MSE": 524.0594484965007, |
|
"MSE/layer0": 524.0594484965007, |
|
"dead_code_fraction": 0.2756, |
|
"dead_code_fraction/layer0": 0.2756, |
|
"epoch": 0.33, |
|
"input_norm": 31.998120482762648, |
|
"input_norm/layer0": 31.998120482762648, |
|
"learning_rate": 0.005, |
|
"loss": 2.1073, |
|
"max_norm": 165.75938415527344, |
|
"max_norm/layer0": 165.75938415527344, |
|
"mean_norm": 56.010887145996094, |
|
"mean_norm/layer0": 56.010887145996094, |
|
"multicode_k": 1, |
|
"output_norm": 22.245831327438353, |
|
"output_norm/layer0": 22.245831327438353, |
|
"step": 3250 |
|
}, |
|
{ |
|
"MSE": 525.364818725586, |
|
"MSE/layer0": 525.364818725586, |
|
"dead_code_fraction": 0.2756, |
|
"dead_code_fraction/layer0": 0.2756, |
|
"epoch": 0.33, |
|
"input_norm": 31.998132244745904, |
|
"input_norm/layer0": 31.998132244745904, |
|
"learning_rate": 0.005, |
|
"loss": 2.0807, |
|
"max_norm": 166.85643005371094, |
|
"max_norm/layer0": 166.85643005371094, |
|
"mean_norm": 56.187782287597656, |
|
"mean_norm/layer0": 56.187782287597656, |
|
"multicode_k": 1, |
|
"output_norm": 22.242043924331664, |
|
"output_norm/layer0": 22.242043924331664, |
|
"step": 3300 |
|
}, |
|
{ |
|
"MSE": 523.8938673400878, |
|
"MSE/layer0": 523.8938673400878, |
|
"dead_code_fraction": 0.2733, |
|
"dead_code_fraction/layer0": 0.2733, |
|
"epoch": 0.34, |
|
"input_norm": 31.998154455820725, |
|
"input_norm/layer0": 31.998154455820725, |
|
"learning_rate": 0.005, |
|
"loss": 2.1234, |
|
"max_norm": 167.70089721679688, |
|
"max_norm/layer0": 167.70089721679688, |
|
"mean_norm": 56.36995506286621, |
|
"mean_norm/layer0": 56.36995506286621, |
|
"multicode_k": 1, |
|
"output_norm": 22.246343409220387, |
|
"output_norm/layer0": 22.246343409220387, |
|
"step": 3350 |
|
}, |
|
{ |
|
"MSE": 522.7465829976402, |
|
"MSE/layer0": 522.7465829976402, |
|
"dead_code_fraction": 0.2741, |
|
"dead_code_fraction/layer0": 0.2741, |
|
"epoch": 0.34, |
|
"input_norm": 31.998157631556197, |
|
"input_norm/layer0": 31.998157631556197, |
|
"learning_rate": 0.005, |
|
"loss": 2.1138, |
|
"max_norm": 168.70301818847656, |
|
"max_norm/layer0": 168.70301818847656, |
|
"mean_norm": 56.55203437805176, |
|
"mean_norm/layer0": 56.55203437805176, |
|
"multicode_k": 1, |
|
"output_norm": 22.282327626546234, |
|
"output_norm/layer0": 22.282327626546234, |
|
"step": 3400 |
|
}, |
|
{ |
|
"MSE": 522.0263201395671, |
|
"MSE/layer0": 522.0263201395671, |
|
"dead_code_fraction": 0.27335, |
|
"dead_code_fraction/layer0": 0.27335, |
|
"epoch": 0.34, |
|
"input_norm": 31.99815892855326, |
|
"input_norm/layer0": 31.99815892855326, |
|
"learning_rate": 0.005, |
|
"loss": 2.103, |
|
"max_norm": 169.3920135498047, |
|
"max_norm/layer0": 169.3920135498047, |
|
"mean_norm": 56.73575782775879, |
|
"mean_norm/layer0": 56.73575782775879, |
|
"multicode_k": 1, |
|
"output_norm": 22.29100898424786, |
|
"output_norm/layer0": 22.29100898424786, |
|
"step": 3450 |
|
}, |
|
{ |
|
"MSE": 521.5609470621745, |
|
"MSE/layer0": 521.5609470621745, |
|
"dead_code_fraction": 0.27265, |
|
"dead_code_fraction/layer0": 0.27265, |
|
"epoch": 0.35, |
|
"input_norm": 31.99817145665487, |
|
"input_norm/layer0": 31.99817145665487, |
|
"learning_rate": 0.005, |
|
"loss": 2.1045, |
|
"max_norm": 170.13829040527344, |
|
"max_norm/layer0": 170.13829040527344, |
|
"mean_norm": 56.91371726989746, |
|
"mean_norm/layer0": 56.91371726989746, |
|
"multicode_k": 1, |
|
"output_norm": 22.309985055923462, |
|
"output_norm/layer0": 22.309985055923462, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_MSE/layer0": 523.0844207110149, |
|
"eval_accuracy": 0.5078879054512807, |
|
"eval_dead_code_fraction/layer0": 0.27345, |
|
"eval_input_norm/layer0": 31.998171135689724, |
|
"eval_loss": 2.1130311489105225, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.351890057112634, |
|
"eval_runtime": 158.0171, |
|
"eval_samples_per_second": 29.256, |
|
"eval_steps_per_second": 1.829, |
|
"step": 3500 |
|
}, |
|
{ |
|
"MSE": 522.4261043294274, |
|
"MSE/layer0": 522.4261043294274, |
|
"dead_code_fraction": 0.27245, |
|
"dead_code_fraction/layer0": 0.27245, |
|
"epoch": 0.35, |
|
"input_norm": 31.998184868494675, |
|
"input_norm/layer0": 31.998184868494675, |
|
"learning_rate": 0.005, |
|
"loss": 2.1296, |
|
"max_norm": 171.21067810058594, |
|
"max_norm/layer0": 171.21067810058594, |
|
"mean_norm": 57.08243370056152, |
|
"mean_norm/layer0": 57.08243370056152, |
|
"multicode_k": 1, |
|
"output_norm": 22.307131767272942, |
|
"output_norm/layer0": 22.307131767272942, |
|
"step": 3550 |
|
}, |
|
{ |
|
"MSE": 520.5630591837569, |
|
"MSE/layer0": 520.5630591837569, |
|
"dead_code_fraction": 0.2715, |
|
"dead_code_fraction/layer0": 0.2715, |
|
"epoch": 0.36, |
|
"input_norm": 31.998175201416018, |
|
"input_norm/layer0": 31.998175201416018, |
|
"learning_rate": 0.005, |
|
"loss": 2.0606, |
|
"max_norm": 172.018798828125, |
|
"max_norm/layer0": 172.018798828125, |
|
"mean_norm": 57.259552001953125, |
|
"mean_norm/layer0": 57.259552001953125, |
|
"multicode_k": 1, |
|
"output_norm": 22.33381741523742, |
|
"output_norm/layer0": 22.33381741523742, |
|
"step": 3600 |
|
}, |
|
{ |
|
"MSE": 521.8190139770511, |
|
"MSE/layer0": 521.8190139770511, |
|
"dead_code_fraction": 0.26915, |
|
"dead_code_fraction/layer0": 0.26915, |
|
"epoch": 0.36, |
|
"input_norm": 31.998206920623783, |
|
"input_norm/layer0": 31.998206920623783, |
|
"learning_rate": 0.005, |
|
"loss": 2.1264, |
|
"max_norm": 173.08360290527344, |
|
"max_norm/layer0": 173.08360290527344, |
|
"mean_norm": 57.425479888916016, |
|
"mean_norm/layer0": 57.425479888916016, |
|
"multicode_k": 1, |
|
"output_norm": 22.321163501739488, |
|
"output_norm/layer0": 22.321163501739488, |
|
"step": 3650 |
|
}, |
|
{ |
|
"MSE": 520.2701113382976, |
|
"MSE/layer0": 520.2701113382976, |
|
"dead_code_fraction": 0.26935, |
|
"dead_code_fraction/layer0": 0.26935, |
|
"epoch": 0.37, |
|
"input_norm": 31.99821238517761, |
|
"input_norm/layer0": 31.99821238517761, |
|
"learning_rate": 0.005, |
|
"loss": 2.1028, |
|
"max_norm": 174.31561279296875, |
|
"max_norm/layer0": 174.31561279296875, |
|
"mean_norm": 57.58916091918945, |
|
"mean_norm/layer0": 57.58916091918945, |
|
"multicode_k": 1, |
|
"output_norm": 22.34191367149354, |
|
"output_norm/layer0": 22.34191367149354, |
|
"step": 3700 |
|
}, |
|
{ |
|
"MSE": 520.4189120992024, |
|
"MSE/layer0": 520.4189120992024, |
|
"dead_code_fraction": 0.26865, |
|
"dead_code_fraction/layer0": 0.26865, |
|
"epoch": 0.38, |
|
"input_norm": 31.99821661313375, |
|
"input_norm/layer0": 31.99821661313375, |
|
"learning_rate": 0.005, |
|
"loss": 2.106, |
|
"max_norm": 175.09739685058594, |
|
"max_norm/layer0": 175.09739685058594, |
|
"mean_norm": 57.75008010864258, |
|
"mean_norm/layer0": 57.75008010864258, |
|
"multicode_k": 1, |
|
"output_norm": 22.352550570170077, |
|
"output_norm/layer0": 22.352550570170077, |
|
"step": 3750 |
|
}, |
|
{ |
|
"MSE": 520.3332616170245, |
|
"MSE/layer0": 520.3332616170245, |
|
"dead_code_fraction": 0.2705, |
|
"dead_code_fraction/layer0": 0.2705, |
|
"epoch": 0.38, |
|
"input_norm": 31.998228356043505, |
|
"input_norm/layer0": 31.998228356043505, |
|
"learning_rate": 0.005, |
|
"loss": 2.1318, |
|
"max_norm": 175.85955810546875, |
|
"max_norm/layer0": 175.85955810546875, |
|
"mean_norm": 57.9084529876709, |
|
"mean_norm/layer0": 57.9084529876709, |
|
"multicode_k": 1, |
|
"output_norm": 22.355525690714526, |
|
"output_norm/layer0": 22.355525690714526, |
|
"step": 3800 |
|
}, |
|
{ |
|
"MSE": 519.1107161458334, |
|
"MSE/layer0": 519.1107161458334, |
|
"dead_code_fraction": 0.26585, |
|
"dead_code_fraction/layer0": 0.26585, |
|
"epoch": 0.39, |
|
"input_norm": 31.998228273391724, |
|
"input_norm/layer0": 31.998228273391724, |
|
"learning_rate": 0.005, |
|
"loss": 2.1063, |
|
"max_norm": 176.55845642089844, |
|
"max_norm/layer0": 176.55845642089844, |
|
"mean_norm": 58.0648193359375, |
|
"mean_norm/layer0": 58.0648193359375, |
|
"multicode_k": 1, |
|
"output_norm": 22.375479180018097, |
|
"output_norm/layer0": 22.375479180018097, |
|
"step": 3850 |
|
}, |
|
{ |
|
"MSE": 520.279450937907, |
|
"MSE/layer0": 520.279450937907, |
|
"dead_code_fraction": 0.26475, |
|
"dead_code_fraction/layer0": 0.26475, |
|
"epoch": 0.39, |
|
"input_norm": 31.998248408635455, |
|
"input_norm/layer0": 31.998248408635455, |
|
"learning_rate": 0.005, |
|
"loss": 2.1158, |
|
"max_norm": 177.40316772460938, |
|
"max_norm/layer0": 177.40316772460938, |
|
"mean_norm": 58.21473693847656, |
|
"mean_norm/layer0": 58.21473693847656, |
|
"multicode_k": 1, |
|
"output_norm": 22.37501454989114, |
|
"output_norm/layer0": 22.37501454989114, |
|
"step": 3900 |
|
}, |
|
{ |
|
"MSE": 520.3905441284179, |
|
"MSE/layer0": 520.3905441284179, |
|
"dead_code_fraction": 0.26645, |
|
"dead_code_fraction/layer0": 0.26645, |
|
"epoch": 0.4, |
|
"input_norm": 31.998255596160874, |
|
"input_norm/layer0": 31.998255596160874, |
|
"learning_rate": 0.005, |
|
"loss": 2.0919, |
|
"max_norm": 178.25682067871094, |
|
"max_norm/layer0": 178.25682067871094, |
|
"mean_norm": 58.36372947692871, |
|
"mean_norm/layer0": 58.36372947692871, |
|
"multicode_k": 1, |
|
"output_norm": 22.360030002593987, |
|
"output_norm/layer0": 22.360030002593987, |
|
"step": 3950 |
|
}, |
|
{ |
|
"MSE": 520.0447977193196, |
|
"MSE/layer0": 520.0447977193196, |
|
"dead_code_fraction": 0.2638, |
|
"dead_code_fraction/layer0": 0.2638, |
|
"epoch": 0.4, |
|
"input_norm": 31.998260914484668, |
|
"input_norm/layer0": 31.998260914484668, |
|
"learning_rate": 0.005, |
|
"loss": 2.0944, |
|
"max_norm": 178.8519287109375, |
|
"max_norm/layer0": 178.8519287109375, |
|
"mean_norm": 58.51635932922363, |
|
"mean_norm/layer0": 58.51635932922363, |
|
"multicode_k": 1, |
|
"output_norm": 22.37334650675455, |
|
"output_norm/layer0": 22.37334650675455, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_MSE/layer0": 519.885230389297, |
|
"eval_accuracy": 0.5089345655588774, |
|
"eval_dead_code_fraction/layer0": 0.2655, |
|
"eval_input_norm/layer0": 31.998263675723535, |
|
"eval_loss": 2.0995683670043945, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.39302826134025, |
|
"eval_runtime": 158.5492, |
|
"eval_samples_per_second": 29.158, |
|
"eval_steps_per_second": 1.823, |
|
"step": 4000 |
|
}, |
|
{ |
|
"MSE": 519.3041878255204, |
|
"MSE/layer0": 519.3041878255204, |
|
"dead_code_fraction": 0.2634, |
|
"dead_code_fraction/layer0": 0.2634, |
|
"epoch": 0.41, |
|
"input_norm": 31.998263047536216, |
|
"input_norm/layer0": 31.998263047536216, |
|
"learning_rate": 0.005, |
|
"loss": 2.0844, |
|
"max_norm": 179.35386657714844, |
|
"max_norm/layer0": 179.35386657714844, |
|
"mean_norm": 58.670223236083984, |
|
"mean_norm/layer0": 58.670223236083984, |
|
"multicode_k": 1, |
|
"output_norm": 22.375990848541264, |
|
"output_norm/layer0": 22.375990848541264, |
|
"step": 4050 |
|
}, |
|
{ |
|
"MSE": 520.2196419270836, |
|
"MSE/layer0": 520.2196419270836, |
|
"dead_code_fraction": 0.2615, |
|
"dead_code_fraction/layer0": 0.2615, |
|
"epoch": 0.41, |
|
"input_norm": 31.99826691627503, |
|
"input_norm/layer0": 31.99826691627503, |
|
"learning_rate": 0.005, |
|
"loss": 2.0983, |
|
"max_norm": 179.91224670410156, |
|
"max_norm/layer0": 179.91224670410156, |
|
"mean_norm": 58.826820373535156, |
|
"mean_norm/layer0": 58.826820373535156, |
|
"multicode_k": 1, |
|
"output_norm": 22.372630256017054, |
|
"output_norm/layer0": 22.372630256017054, |
|
"step": 4100 |
|
}, |
|
{ |
|
"MSE": 519.6039750162761, |
|
"MSE/layer0": 519.6039750162761, |
|
"dead_code_fraction": 0.26085, |
|
"dead_code_fraction/layer0": 0.26085, |
|
"epoch": 0.41, |
|
"input_norm": 31.998284708658858, |
|
"input_norm/layer0": 31.998284708658858, |
|
"learning_rate": 0.005, |
|
"loss": 2.0974, |
|
"max_norm": 180.4697265625, |
|
"max_norm/layer0": 180.4697265625, |
|
"mean_norm": 58.97820472717285, |
|
"mean_norm/layer0": 58.97820472717285, |
|
"multicode_k": 1, |
|
"output_norm": 22.377655258178706, |
|
"output_norm/layer0": 22.377655258178706, |
|
"step": 4150 |
|
}, |
|
{ |
|
"MSE": 518.396603800456, |
|
"MSE/layer0": 518.396603800456, |
|
"dead_code_fraction": 0.26145, |
|
"dead_code_fraction/layer0": 0.26145, |
|
"epoch": 0.42, |
|
"input_norm": 31.998291470209757, |
|
"input_norm/layer0": 31.998291470209757, |
|
"learning_rate": 0.005, |
|
"loss": 2.1155, |
|
"max_norm": 180.8781280517578, |
|
"max_norm/layer0": 180.8781280517578, |
|
"mean_norm": 59.12506866455078, |
|
"mean_norm/layer0": 59.12506866455078, |
|
"multicode_k": 1, |
|
"output_norm": 22.407741336822514, |
|
"output_norm/layer0": 22.407741336822514, |
|
"step": 4200 |
|
}, |
|
{ |
|
"MSE": 518.4843705240887, |
|
"MSE/layer0": 518.4843705240887, |
|
"dead_code_fraction": 0.2605, |
|
"dead_code_fraction/layer0": 0.2605, |
|
"epoch": 0.42, |
|
"input_norm": 31.99829554239909, |
|
"input_norm/layer0": 31.99829554239909, |
|
"learning_rate": 0.005, |
|
"loss": 2.1004, |
|
"max_norm": 181.51483154296875, |
|
"max_norm/layer0": 181.51483154296875, |
|
"mean_norm": 59.271942138671875, |
|
"mean_norm/layer0": 59.271942138671875, |
|
"multicode_k": 1, |
|
"output_norm": 22.40968936284383, |
|
"output_norm/layer0": 22.40968936284383, |
|
"step": 4250 |
|
}, |
|
{ |
|
"MSE": 518.0018126932782, |
|
"MSE/layer0": 518.0018126932782, |
|
"dead_code_fraction": 0.2586, |
|
"dead_code_fraction/layer0": 0.2586, |
|
"epoch": 0.43, |
|
"input_norm": 31.998309599558517, |
|
"input_norm/layer0": 31.998309599558517, |
|
"learning_rate": 0.005, |
|
"loss": 2.0848, |
|
"max_norm": 181.8904266357422, |
|
"max_norm/layer0": 181.8904266357422, |
|
"mean_norm": 59.40836715698242, |
|
"mean_norm/layer0": 59.40836715698242, |
|
"multicode_k": 1, |
|
"output_norm": 22.42666608492533, |
|
"output_norm/layer0": 22.42666608492533, |
|
"step": 4300 |
|
}, |
|
{ |
|
"MSE": 518.2576261901858, |
|
"MSE/layer0": 518.2576261901858, |
|
"dead_code_fraction": 0.25825, |
|
"dead_code_fraction/layer0": 0.25825, |
|
"epoch": 0.43, |
|
"input_norm": 31.99831358591716, |
|
"input_norm/layer0": 31.99831358591716, |
|
"learning_rate": 0.005, |
|
"loss": 2.0778, |
|
"max_norm": 182.52023315429688, |
|
"max_norm/layer0": 182.52023315429688, |
|
"mean_norm": 59.546592712402344, |
|
"mean_norm/layer0": 59.546592712402344, |
|
"multicode_k": 1, |
|
"output_norm": 22.415684442520128, |
|
"output_norm/layer0": 22.415684442520128, |
|
"step": 4350 |
|
}, |
|
{ |
|
"MSE": 517.176724141439, |
|
"MSE/layer0": 517.176724141439, |
|
"dead_code_fraction": 0.25845, |
|
"dead_code_fraction/layer0": 0.25845, |
|
"epoch": 0.44, |
|
"input_norm": 31.998328673044824, |
|
"input_norm/layer0": 31.998328673044824, |
|
"learning_rate": 0.005, |
|
"loss": 2.0989, |
|
"max_norm": 183.30308532714844, |
|
"max_norm/layer0": 183.30308532714844, |
|
"mean_norm": 59.680843353271484, |
|
"mean_norm/layer0": 59.680843353271484, |
|
"multicode_k": 1, |
|
"output_norm": 22.435629587173473, |
|
"output_norm/layer0": 22.435629587173473, |
|
"step": 4400 |
|
}, |
|
{ |
|
"MSE": 516.945845082601, |
|
"MSE/layer0": 516.945845082601, |
|
"dead_code_fraction": 0.2589, |
|
"dead_code_fraction/layer0": 0.2589, |
|
"epoch": 0.45, |
|
"input_norm": 31.998329006830847, |
|
"input_norm/layer0": 31.998329006830847, |
|
"learning_rate": 0.005, |
|
"loss": 2.087, |
|
"max_norm": 184.17068481445312, |
|
"max_norm/layer0": 184.17068481445312, |
|
"mean_norm": 59.81003379821777, |
|
"mean_norm/layer0": 59.81003379821777, |
|
"multicode_k": 1, |
|
"output_norm": 22.447185754775994, |
|
"output_norm/layer0": 22.447185754775994, |
|
"step": 4450 |
|
}, |
|
{ |
|
"MSE": 517.1110377502445, |
|
"MSE/layer0": 517.1110377502445, |
|
"dead_code_fraction": 0.25715, |
|
"dead_code_fraction/layer0": 0.25715, |
|
"epoch": 0.45, |
|
"input_norm": 31.998346713384, |
|
"input_norm/layer0": 31.998346713384, |
|
"learning_rate": 0.005, |
|
"loss": 2.1314, |
|
"max_norm": 185.53944396972656, |
|
"max_norm/layer0": 185.53944396972656, |
|
"mean_norm": 59.940223693847656, |
|
"mean_norm/layer0": 59.940223693847656, |
|
"multicode_k": 1, |
|
"output_norm": 22.444066270192472, |
|
"output_norm/layer0": 22.444066270192472, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_MSE/layer0": 517.038530914551, |
|
"eval_accuracy": 0.5114514130862962, |
|
"eval_dead_code_fraction/layer0": 0.25675, |
|
"eval_input_norm/layer0": 31.998349543131468, |
|
"eval_loss": 2.0859904289245605, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.471955899059722, |
|
"eval_runtime": 158.2677, |
|
"eval_samples_per_second": 29.21, |
|
"eval_steps_per_second": 1.826, |
|
"step": 4500 |
|
}, |
|
{ |
|
"MSE": 516.5108834838866, |
|
"MSE/layer0": 516.5108834838866, |
|
"dead_code_fraction": 0.2556, |
|
"dead_code_fraction/layer0": 0.2556, |
|
"epoch": 0.46, |
|
"input_norm": 31.99834162076315, |
|
"input_norm/layer0": 31.99834162076315, |
|
"learning_rate": 0.005, |
|
"loss": 2.0594, |
|
"max_norm": 186.7916259765625, |
|
"max_norm/layer0": 186.7916259765625, |
|
"mean_norm": 60.06948661804199, |
|
"mean_norm/layer0": 60.06948661804199, |
|
"multicode_k": 1, |
|
"output_norm": 22.45672461509705, |
|
"output_norm/layer0": 22.45672461509705, |
|
"step": 4550 |
|
}, |
|
{ |
|
"MSE": 517.5840914408367, |
|
"MSE/layer0": 517.5840914408367, |
|
"dead_code_fraction": 0.25275, |
|
"dead_code_fraction/layer0": 0.25275, |
|
"epoch": 0.46, |
|
"input_norm": 31.998361612955726, |
|
"input_norm/layer0": 31.998361612955726, |
|
"learning_rate": 0.005, |
|
"loss": 2.116, |
|
"max_norm": 187.44607543945312, |
|
"max_norm/layer0": 187.44607543945312, |
|
"mean_norm": 60.1932258605957, |
|
"mean_norm/layer0": 60.1932258605957, |
|
"multicode_k": 1, |
|
"output_norm": 22.445463349024458, |
|
"output_norm/layer0": 22.445463349024458, |
|
"step": 4600 |
|
}, |
|
{ |
|
"MSE": 515.9212077331542, |
|
"MSE/layer0": 515.9212077331542, |
|
"dead_code_fraction": 0.2549, |
|
"dead_code_fraction/layer0": 0.2549, |
|
"epoch": 0.47, |
|
"input_norm": 31.998358809153245, |
|
"input_norm/layer0": 31.998358809153245, |
|
"learning_rate": 0.005, |
|
"loss": 2.0851, |
|
"max_norm": 187.99063110351562, |
|
"max_norm/layer0": 187.99063110351562, |
|
"mean_norm": 60.3179931640625, |
|
"mean_norm/layer0": 60.3179931640625, |
|
"multicode_k": 1, |
|
"output_norm": 22.468881686528533, |
|
"output_norm/layer0": 22.468881686528533, |
|
"step": 4650 |
|
}, |
|
{ |
|
"MSE": 516.2712020365398, |
|
"MSE/layer0": 516.2712020365398, |
|
"dead_code_fraction": 0.2539, |
|
"dead_code_fraction/layer0": 0.2539, |
|
"epoch": 0.47, |
|
"input_norm": 31.99836943308513, |
|
"input_norm/layer0": 31.99836943308513, |
|
"learning_rate": 0.005, |
|
"loss": 2.0646, |
|
"max_norm": 188.7075653076172, |
|
"max_norm/layer0": 188.7075653076172, |
|
"mean_norm": 60.442317962646484, |
|
"mean_norm/layer0": 60.442317962646484, |
|
"multicode_k": 1, |
|
"output_norm": 22.458747002283737, |
|
"output_norm/layer0": 22.458747002283737, |
|
"step": 4700 |
|
}, |
|
{ |
|
"MSE": 515.2177518717448, |
|
"MSE/layer0": 515.2177518717448, |
|
"dead_code_fraction": 0.25225, |
|
"dead_code_fraction/layer0": 0.25225, |
|
"epoch": 0.47, |
|
"input_norm": 31.99837938944498, |
|
"input_norm/layer0": 31.99837938944498, |
|
"learning_rate": 0.005, |
|
"loss": 2.0508, |
|
"max_norm": 189.4132080078125, |
|
"max_norm/layer0": 189.4132080078125, |
|
"mean_norm": 60.56760787963867, |
|
"mean_norm/layer0": 60.56760787963867, |
|
"multicode_k": 1, |
|
"output_norm": 22.48432564417522, |
|
"output_norm/layer0": 22.48432564417522, |
|
"step": 4750 |
|
}, |
|
{ |
|
"MSE": 517.7876967760659, |
|
"MSE/layer0": 517.7876967760659, |
|
"dead_code_fraction": 0.2504, |
|
"dead_code_fraction/layer0": 0.2504, |
|
"epoch": 1.0, |
|
"input_norm": 31.998372135461928, |
|
"input_norm/layer0": 31.998372135461928, |
|
"learning_rate": 0.005, |
|
"loss": 2.0347, |
|
"max_norm": 189.93084716796875, |
|
"max_norm/layer0": 189.93084716796875, |
|
"mean_norm": 60.689674377441406, |
|
"mean_norm/layer0": 60.689674377441406, |
|
"multicode_k": 1, |
|
"output_norm": 22.4315491425679, |
|
"output_norm/layer0": 22.4315491425679, |
|
"step": 4800 |
|
}, |
|
{ |
|
"MSE": 515.4498620096845, |
|
"MSE/layer0": 515.4498620096845, |
|
"dead_code_fraction": 0.2505, |
|
"dead_code_fraction/layer0": 0.2505, |
|
"epoch": 1.01, |
|
"input_norm": 31.998399356206253, |
|
"input_norm/layer0": 31.998399356206253, |
|
"learning_rate": 0.005, |
|
"loss": 2.1351, |
|
"max_norm": 190.8528289794922, |
|
"max_norm/layer0": 190.8528289794922, |
|
"mean_norm": 60.80255126953125, |
|
"mean_norm/layer0": 60.80255126953125, |
|
"multicode_k": 1, |
|
"output_norm": 22.488870484034226, |
|
"output_norm/layer0": 22.488870484034226, |
|
"step": 4850 |
|
}, |
|
{ |
|
"MSE": 515.1998943074543, |
|
"MSE/layer0": 515.1998943074543, |
|
"dead_code_fraction": 0.24975, |
|
"dead_code_fraction/layer0": 0.24975, |
|
"epoch": 1.01, |
|
"input_norm": 31.998391094207765, |
|
"input_norm/layer0": 31.998391094207765, |
|
"learning_rate": 0.005, |
|
"loss": 2.0344, |
|
"max_norm": 191.88272094726562, |
|
"max_norm/layer0": 191.88272094726562, |
|
"mean_norm": 60.923635482788086, |
|
"mean_norm/layer0": 60.923635482788086, |
|
"multicode_k": 1, |
|
"output_norm": 22.493143533070885, |
|
"output_norm/layer0": 22.493143533070885, |
|
"step": 4900 |
|
}, |
|
{ |
|
"MSE": 516.1670984395346, |
|
"MSE/layer0": 516.1670984395346, |
|
"dead_code_fraction": 0.2478, |
|
"dead_code_fraction/layer0": 0.2478, |
|
"epoch": 1.02, |
|
"input_norm": 31.99841277122497, |
|
"input_norm/layer0": 31.99841277122497, |
|
"learning_rate": 0.005, |
|
"loss": 2.0591, |
|
"max_norm": 192.84405517578125, |
|
"max_norm/layer0": 192.84405517578125, |
|
"mean_norm": 61.04226303100586, |
|
"mean_norm/layer0": 61.04226303100586, |
|
"multicode_k": 1, |
|
"output_norm": 22.47217222531637, |
|
"output_norm/layer0": 22.47217222531637, |
|
"step": 4950 |
|
}, |
|
{ |
|
"MSE": 515.1936482747396, |
|
"MSE/layer0": 515.1936482747396, |
|
"dead_code_fraction": 0.2468, |
|
"dead_code_fraction/layer0": 0.2468, |
|
"epoch": 1.02, |
|
"input_norm": 31.998419497807816, |
|
"input_norm/layer0": 31.998419497807816, |
|
"learning_rate": 0.005, |
|
"loss": 2.0685, |
|
"max_norm": 193.819580078125, |
|
"max_norm/layer0": 193.819580078125, |
|
"mean_norm": 61.15685844421387, |
|
"mean_norm/layer0": 61.15685844421387, |
|
"multicode_k": 1, |
|
"output_norm": 22.499980732599887, |
|
"output_norm/layer0": 22.499980732599887, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_MSE/layer0": 514.3711726943474, |
|
"eval_accuracy": 0.5131406590660113, |
|
"eval_dead_code_fraction/layer0": 0.24975, |
|
"eval_input_norm/layer0": 31.998424857410036, |
|
"eval_loss": 2.076988458633423, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.494330299537523, |
|
"eval_runtime": 157.9418, |
|
"eval_samples_per_second": 29.27, |
|
"eval_steps_per_second": 1.83, |
|
"step": 5000 |
|
}, |
|
{ |
|
"MSE": 515.1962452189127, |
|
"MSE/layer0": 515.1962452189127, |
|
"dead_code_fraction": 0.24585, |
|
"dead_code_fraction/layer0": 0.24585, |
|
"epoch": 1.03, |
|
"input_norm": 31.998427387873342, |
|
"input_norm/layer0": 31.998427387873342, |
|
"learning_rate": 0.005, |
|
"loss": 2.07, |
|
"max_norm": 194.8584442138672, |
|
"max_norm/layer0": 194.8584442138672, |
|
"mean_norm": 61.2799015045166, |
|
"mean_norm/layer0": 61.2799015045166, |
|
"multicode_k": 1, |
|
"output_norm": 22.492868417104084, |
|
"output_norm/layer0": 22.492868417104084, |
|
"step": 5050 |
|
}, |
|
{ |
|
"MSE": 514.6688102722171, |
|
"MSE/layer0": 514.6688102722171, |
|
"dead_code_fraction": 0.24495, |
|
"dead_code_fraction/layer0": 0.24495, |
|
"epoch": 1.03, |
|
"input_norm": 31.99842824935913, |
|
"input_norm/layer0": 31.99842824935913, |
|
"learning_rate": 0.005, |
|
"loss": 2.0308, |
|
"max_norm": 195.33718872070312, |
|
"max_norm/layer0": 195.33718872070312, |
|
"mean_norm": 61.397443771362305, |
|
"mean_norm/layer0": 61.397443771362305, |
|
"multicode_k": 1, |
|
"output_norm": 22.502648471196487, |
|
"output_norm/layer0": 22.502648471196487, |
|
"step": 5100 |
|
}, |
|
{ |
|
"MSE": 515.46877843221, |
|
"MSE/layer0": 515.46877843221, |
|
"dead_code_fraction": 0.2426, |
|
"dead_code_fraction/layer0": 0.2426, |
|
"epoch": 1.04, |
|
"input_norm": 31.998430423736572, |
|
"input_norm/layer0": 31.998430423736572, |
|
"learning_rate": 0.005, |
|
"loss": 2.0427, |
|
"max_norm": 195.8143310546875, |
|
"max_norm/layer0": 195.8143310546875, |
|
"mean_norm": 61.51255416870117, |
|
"mean_norm/layer0": 61.51255416870117, |
|
"multicode_k": 1, |
|
"output_norm": 22.500031328201295, |
|
"output_norm/layer0": 22.500031328201295, |
|
"step": 5150 |
|
}, |
|
{ |
|
"MSE": 515.1060639953612, |
|
"MSE/layer0": 515.1060639953612, |
|
"dead_code_fraction": 0.2439, |
|
"dead_code_fraction/layer0": 0.2439, |
|
"epoch": 1.04, |
|
"input_norm": 31.998455877304075, |
|
"input_norm/layer0": 31.998455877304075, |
|
"learning_rate": 0.005, |
|
"loss": 2.1036, |
|
"max_norm": 196.40415954589844, |
|
"max_norm/layer0": 196.40415954589844, |
|
"mean_norm": 61.620216369628906, |
|
"mean_norm/layer0": 61.620216369628906, |
|
"multicode_k": 1, |
|
"output_norm": 22.504082736968975, |
|
"output_norm/layer0": 22.504082736968975, |
|
"step": 5200 |
|
}, |
|
{ |
|
"MSE": 514.64603418986, |
|
"MSE/layer0": 514.64603418986, |
|
"dead_code_fraction": 0.24415, |
|
"dead_code_fraction/layer0": 0.24415, |
|
"epoch": 1.05, |
|
"input_norm": 31.99846438090008, |
|
"input_norm/layer0": 31.99846438090008, |
|
"learning_rate": 0.005, |
|
"loss": 2.1032, |
|
"max_norm": 197.31690979003906, |
|
"max_norm/layer0": 197.31690979003906, |
|
"mean_norm": 61.73128890991211, |
|
"mean_norm/layer0": 61.73128890991211, |
|
"multicode_k": 1, |
|
"output_norm": 22.51759773572286, |
|
"output_norm/layer0": 22.51759773572286, |
|
"step": 5250 |
|
}, |
|
{ |
|
"MSE": 514.5095549011231, |
|
"MSE/layer0": 514.5095549011231, |
|
"dead_code_fraction": 0.24245, |
|
"dead_code_fraction/layer0": 0.24245, |
|
"epoch": 1.05, |
|
"input_norm": 31.998469727834063, |
|
"input_norm/layer0": 31.998469727834063, |
|
"learning_rate": 0.005, |
|
"loss": 2.0884, |
|
"max_norm": 198.30520629882812, |
|
"max_norm/layer0": 198.30520629882812, |
|
"mean_norm": 61.84503173828125, |
|
"mean_norm/layer0": 61.84503173828125, |
|
"multicode_k": 1, |
|
"output_norm": 22.52236960728964, |
|
"output_norm/layer0": 22.52236960728964, |
|
"step": 5300 |
|
}, |
|
{ |
|
"MSE": 514.2185153198242, |
|
"MSE/layer0": 514.2185153198242, |
|
"dead_code_fraction": 0.2423, |
|
"dead_code_fraction/layer0": 0.2423, |
|
"epoch": 1.06, |
|
"input_norm": 31.99846864700317, |
|
"input_norm/layer0": 31.99846864700317, |
|
"learning_rate": 0.005, |
|
"loss": 2.0541, |
|
"max_norm": 198.76315307617188, |
|
"max_norm/layer0": 198.76315307617188, |
|
"mean_norm": 61.954532623291016, |
|
"mean_norm/layer0": 61.954532623291016, |
|
"multicode_k": 1, |
|
"output_norm": 22.523964621225986, |
|
"output_norm/layer0": 22.523964621225986, |
|
"step": 5350 |
|
}, |
|
{ |
|
"MSE": 514.2201423136396, |
|
"MSE/layer0": 514.2201423136396, |
|
"dead_code_fraction": 0.24065, |
|
"dead_code_fraction/layer0": 0.24065, |
|
"epoch": 1.06, |
|
"input_norm": 31.99848121643067, |
|
"input_norm/layer0": 31.99848121643067, |
|
"learning_rate": 0.005, |
|
"loss": 2.0722, |
|
"max_norm": 199.5216522216797, |
|
"max_norm/layer0": 199.5216522216797, |
|
"mean_norm": 62.062015533447266, |
|
"mean_norm/layer0": 62.062015533447266, |
|
"multicode_k": 1, |
|
"output_norm": 22.529434289932254, |
|
"output_norm/layer0": 22.529434289932254, |
|
"step": 5400 |
|
}, |
|
{ |
|
"MSE": 513.0346335347496, |
|
"MSE/layer0": 513.0346335347496, |
|
"dead_code_fraction": 0.2396, |
|
"dead_code_fraction/layer0": 0.2396, |
|
"epoch": 1.07, |
|
"input_norm": 31.998482402165727, |
|
"input_norm/layer0": 31.998482402165727, |
|
"learning_rate": 0.005, |
|
"loss": 2.0839, |
|
"max_norm": 199.89144897460938, |
|
"max_norm/layer0": 199.89144897460938, |
|
"mean_norm": 62.16894721984863, |
|
"mean_norm/layer0": 62.16894721984863, |
|
"multicode_k": 1, |
|
"output_norm": 22.549472332000725, |
|
"output_norm/layer0": 22.549472332000725, |
|
"step": 5450 |
|
}, |
|
{ |
|
"MSE": 512.9845250447588, |
|
"MSE/layer0": 512.9845250447588, |
|
"dead_code_fraction": 0.23995, |
|
"dead_code_fraction/layer0": 0.23995, |
|
"epoch": 1.07, |
|
"input_norm": 31.99848415692648, |
|
"input_norm/layer0": 31.99848415692648, |
|
"learning_rate": 0.005, |
|
"loss": 2.0496, |
|
"max_norm": 200.10585021972656, |
|
"max_norm/layer0": 200.10585021972656, |
|
"mean_norm": 62.28166961669922, |
|
"mean_norm/layer0": 62.28166961669922, |
|
"multicode_k": 1, |
|
"output_norm": 22.535003283818554, |
|
"output_norm/layer0": 22.535003283818554, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_MSE/layer0": 513.7822700020247, |
|
"eval_accuracy": 0.5137449731240944, |
|
"eval_dead_code_fraction/layer0": 0.23805, |
|
"eval_input_norm/layer0": 31.998499035448475, |
|
"eval_loss": 2.0730204582214355, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.562518398921107, |
|
"eval_runtime": 158.2513, |
|
"eval_samples_per_second": 29.213, |
|
"eval_steps_per_second": 1.826, |
|
"step": 5500 |
|
}, |
|
{ |
|
"MSE": 513.4142807515464, |
|
"MSE/layer0": 513.4142807515464, |
|
"dead_code_fraction": 0.2388, |
|
"dead_code_fraction/layer0": 0.2388, |
|
"epoch": 1.08, |
|
"input_norm": 31.99849408785501, |
|
"input_norm/layer0": 31.99849408785501, |
|
"learning_rate": 0.005, |
|
"loss": 2.0918, |
|
"max_norm": 200.3399200439453, |
|
"max_norm/layer0": 200.3399200439453, |
|
"mean_norm": 62.38692855834961, |
|
"mean_norm/layer0": 62.38692855834961, |
|
"multicode_k": 1, |
|
"output_norm": 22.54020097732544, |
|
"output_norm/layer0": 22.54020097732544, |
|
"step": 5550 |
|
}, |
|
{ |
|
"MSE": 512.2161093648273, |
|
"MSE/layer0": 512.2161093648273, |
|
"dead_code_fraction": 0.23455, |
|
"dead_code_fraction/layer0": 0.23455, |
|
"epoch": 1.08, |
|
"input_norm": 31.998499333063755, |
|
"input_norm/layer0": 31.998499333063755, |
|
"learning_rate": 0.005, |
|
"loss": 2.0812, |
|
"max_norm": 200.90451049804688, |
|
"max_norm/layer0": 200.90451049804688, |
|
"mean_norm": 62.49030685424805, |
|
"mean_norm/layer0": 62.49030685424805, |
|
"multicode_k": 1, |
|
"output_norm": 22.562892615000422, |
|
"output_norm/layer0": 22.562892615000422, |
|
"step": 5600 |
|
}, |
|
{ |
|
"MSE": 513.2079597473146, |
|
"MSE/layer0": 513.2079597473146, |
|
"dead_code_fraction": 0.2364, |
|
"dead_code_fraction/layer0": 0.2364, |
|
"epoch": 1.09, |
|
"input_norm": 31.998499097824094, |
|
"input_norm/layer0": 31.998499097824094, |
|
"learning_rate": 0.005, |
|
"loss": 2.0405, |
|
"max_norm": 201.2469940185547, |
|
"max_norm/layer0": 201.2469940185547, |
|
"mean_norm": 62.587249755859375, |
|
"mean_norm/layer0": 62.587249755859375, |
|
"multicode_k": 1, |
|
"output_norm": 22.551958309809354, |
|
"output_norm/layer0": 22.551958309809354, |
|
"step": 5650 |
|
}, |
|
{ |
|
"MSE": 512.3663133748375, |
|
"MSE/layer0": 512.3663133748375, |
|
"dead_code_fraction": 0.2359, |
|
"dead_code_fraction/layer0": 0.2359, |
|
"epoch": 1.09, |
|
"input_norm": 31.998522087732937, |
|
"input_norm/layer0": 31.998522087732937, |
|
"learning_rate": 0.005, |
|
"loss": 2.0976, |
|
"max_norm": 202.06686401367188, |
|
"max_norm/layer0": 202.06686401367188, |
|
"mean_norm": 62.68406677246094, |
|
"mean_norm/layer0": 62.68406677246094, |
|
"multicode_k": 1, |
|
"output_norm": 22.56861629168192, |
|
"output_norm/layer0": 22.56861629168192, |
|
"step": 5700 |
|
}, |
|
{ |
|
"MSE": 513.556918741862, |
|
"MSE/layer0": 513.556918741862, |
|
"dead_code_fraction": 0.2325, |
|
"dead_code_fraction/layer0": 0.2325, |
|
"epoch": 1.1, |
|
"input_norm": 31.99852702458699, |
|
"input_norm/layer0": 31.99852702458699, |
|
"learning_rate": 0.005, |
|
"loss": 2.0531, |
|
"max_norm": 202.5853729248047, |
|
"max_norm/layer0": 202.5853729248047, |
|
"mean_norm": 62.78022766113281, |
|
"mean_norm/layer0": 62.78022766113281, |
|
"multicode_k": 1, |
|
"output_norm": 22.55354828198752, |
|
"output_norm/layer0": 22.55354828198752, |
|
"step": 5750 |
|
}, |
|
{ |
|
"MSE": 514.1225356547038, |
|
"MSE/layer0": 514.1225356547038, |
|
"dead_code_fraction": 0.23125, |
|
"dead_code_fraction/layer0": 0.23125, |
|
"epoch": 1.1, |
|
"input_norm": 31.998530540466305, |
|
"input_norm/layer0": 31.998530540466305, |
|
"learning_rate": 0.005, |
|
"loss": 2.0333, |
|
"max_norm": 202.8258514404297, |
|
"max_norm/layer0": 202.8258514404297, |
|
"mean_norm": 62.881099700927734, |
|
"mean_norm/layer0": 62.881099700927734, |
|
"multicode_k": 1, |
|
"output_norm": 22.538857170740776, |
|
"output_norm/layer0": 22.538857170740776, |
|
"step": 5800 |
|
}, |
|
{ |
|
"MSE": 512.891567026774, |
|
"MSE/layer0": 512.891567026774, |
|
"dead_code_fraction": 0.23305, |
|
"dead_code_fraction/layer0": 0.23305, |
|
"epoch": 1.11, |
|
"input_norm": 31.998542674382527, |
|
"input_norm/layer0": 31.998542674382527, |
|
"learning_rate": 0.005, |
|
"loss": 2.0894, |
|
"max_norm": 203.2826385498047, |
|
"max_norm/layer0": 203.2826385498047, |
|
"mean_norm": 62.98002815246582, |
|
"mean_norm/layer0": 62.98002815246582, |
|
"multicode_k": 1, |
|
"output_norm": 22.556459398269645, |
|
"output_norm/layer0": 22.556459398269645, |
|
"step": 5850 |
|
}, |
|
{ |
|
"MSE": 512.6300255839031, |
|
"MSE/layer0": 512.6300255839031, |
|
"dead_code_fraction": 0.23175, |
|
"dead_code_fraction/layer0": 0.23175, |
|
"epoch": 1.11, |
|
"input_norm": 31.998538637161257, |
|
"input_norm/layer0": 31.998538637161257, |
|
"learning_rate": 0.005, |
|
"loss": 2.0371, |
|
"max_norm": 203.56114196777344, |
|
"max_norm/layer0": 203.56114196777344, |
|
"mean_norm": 63.085018157958984, |
|
"mean_norm/layer0": 63.085018157958984, |
|
"multicode_k": 1, |
|
"output_norm": 22.55499767621359, |
|
"output_norm/layer0": 22.55499767621359, |
|
"step": 5900 |
|
}, |
|
{ |
|
"MSE": 512.6470455423993, |
|
"MSE/layer0": 512.6470455423993, |
|
"dead_code_fraction": 0.22945, |
|
"dead_code_fraction/layer0": 0.22945, |
|
"epoch": 1.12, |
|
"input_norm": 31.998542264302582, |
|
"input_norm/layer0": 31.998542264302582, |
|
"learning_rate": 0.005, |
|
"loss": 2.0693, |
|
"max_norm": 204.18482971191406, |
|
"max_norm/layer0": 204.18482971191406, |
|
"mean_norm": 63.186561584472656, |
|
"mean_norm/layer0": 63.186561584472656, |
|
"multicode_k": 1, |
|
"output_norm": 22.56271686236063, |
|
"output_norm/layer0": 22.56271686236063, |
|
"step": 5950 |
|
}, |
|
{ |
|
"MSE": 512.2647941589354, |
|
"MSE/layer0": 512.2647941589354, |
|
"dead_code_fraction": 0.23005, |
|
"dead_code_fraction/layer0": 0.23005, |
|
"epoch": 1.12, |
|
"input_norm": 31.99855575561523, |
|
"input_norm/layer0": 31.99855575561523, |
|
"learning_rate": 0.005, |
|
"loss": 2.1002, |
|
"max_norm": 204.59375, |
|
"max_norm/layer0": 204.59375, |
|
"mean_norm": 63.287431716918945, |
|
"mean_norm/layer0": 63.287431716918945, |
|
"multicode_k": 1, |
|
"output_norm": 22.56941809654236, |
|
"output_norm/layer0": 22.56941809654236, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_MSE/layer0": 510.787595085063, |
|
"eval_accuracy": 0.5144414778502405, |
|
"eval_dead_code_fraction/layer0": 0.2305, |
|
"eval_input_norm/layer0": 31.998558920130655, |
|
"eval_loss": 2.0667405128479004, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.588203073708286, |
|
"eval_runtime": 158.1457, |
|
"eval_samples_per_second": 29.233, |
|
"eval_steps_per_second": 1.827, |
|
"step": 6000 |
|
}, |
|
{ |
|
"MSE": 512.2951668802899, |
|
"MSE/layer0": 512.2951668802899, |
|
"dead_code_fraction": 0.2284, |
|
"dead_code_fraction/layer0": 0.2284, |
|
"epoch": 1.13, |
|
"input_norm": 31.998558203379314, |
|
"input_norm/layer0": 31.998558203379314, |
|
"learning_rate": 0.005, |
|
"loss": 2.0645, |
|
"max_norm": 205.16860961914062, |
|
"max_norm/layer0": 205.16860961914062, |
|
"mean_norm": 63.38333702087402, |
|
"mean_norm/layer0": 63.38333702087402, |
|
"multicode_k": 1, |
|
"output_norm": 22.570796213150032, |
|
"output_norm/layer0": 22.570796213150032, |
|
"step": 6050 |
|
}, |
|
{ |
|
"MSE": 512.1381341044107, |
|
"MSE/layer0": 512.1381341044107, |
|
"dead_code_fraction": 0.22855, |
|
"dead_code_fraction/layer0": 0.22855, |
|
"epoch": 1.13, |
|
"input_norm": 31.99856172879537, |
|
"input_norm/layer0": 31.99856172879537, |
|
"learning_rate": 0.005, |
|
"loss": 2.0856, |
|
"max_norm": 205.37376403808594, |
|
"max_norm/layer0": 205.37376403808594, |
|
"mean_norm": 63.47422790527344, |
|
"mean_norm/layer0": 63.47422790527344, |
|
"multicode_k": 1, |
|
"output_norm": 22.58201634724934, |
|
"output_norm/layer0": 22.58201634724934, |
|
"step": 6100 |
|
}, |
|
{ |
|
"MSE": 512.7539996846516, |
|
"MSE/layer0": 512.7539996846516, |
|
"dead_code_fraction": 0.22755, |
|
"dead_code_fraction/layer0": 0.22755, |
|
"epoch": 1.14, |
|
"input_norm": 31.99856161753337, |
|
"input_norm/layer0": 31.99856161753337, |
|
"learning_rate": 0.005, |
|
"loss": 2.0468, |
|
"max_norm": 205.76866149902344, |
|
"max_norm/layer0": 205.76866149902344, |
|
"mean_norm": 63.56420707702637, |
|
"mean_norm/layer0": 63.56420707702637, |
|
"multicode_k": 1, |
|
"output_norm": 22.575629208882646, |
|
"output_norm/layer0": 22.575629208882646, |
|
"step": 6150 |
|
}, |
|
{ |
|
"MSE": 512.1215149434411, |
|
"MSE/layer0": 512.1215149434411, |
|
"dead_code_fraction": 0.2268, |
|
"dead_code_fraction/layer0": 0.2268, |
|
"epoch": 1.14, |
|
"input_norm": 31.998577674229928, |
|
"input_norm/layer0": 31.998577674229928, |
|
"learning_rate": 0.005, |
|
"loss": 2.0663, |
|
"max_norm": 206.54251098632812, |
|
"max_norm/layer0": 206.54251098632812, |
|
"mean_norm": 63.64880561828613, |
|
"mean_norm/layer0": 63.64880561828613, |
|
"multicode_k": 1, |
|
"output_norm": 22.58266611417133, |
|
"output_norm/layer0": 22.58266611417133, |
|
"step": 6200 |
|
}, |
|
{ |
|
"MSE": 513.3685421752932, |
|
"MSE/layer0": 513.3685421752932, |
|
"dead_code_fraction": 0.22515, |
|
"dead_code_fraction/layer0": 0.22515, |
|
"epoch": 1.15, |
|
"input_norm": 31.998585720062266, |
|
"input_norm/layer0": 31.998585720062266, |
|
"learning_rate": 0.005, |
|
"loss": 2.0807, |
|
"max_norm": 207.23460388183594, |
|
"max_norm/layer0": 207.23460388183594, |
|
"mean_norm": 63.73150444030762, |
|
"mean_norm/layer0": 63.73150444030762, |
|
"multicode_k": 1, |
|
"output_norm": 22.574931882222508, |
|
"output_norm/layer0": 22.574931882222508, |
|
"step": 6250 |
|
}, |
|
{ |
|
"MSE": 512.1649493916829, |
|
"MSE/layer0": 512.1649493916829, |
|
"dead_code_fraction": 0.2243, |
|
"dead_code_fraction/layer0": 0.2243, |
|
"epoch": 1.15, |
|
"input_norm": 31.99859083811442, |
|
"input_norm/layer0": 31.99859083811442, |
|
"learning_rate": 0.005, |
|
"loss": 1.9994, |
|
"max_norm": 207.4078826904297, |
|
"max_norm/layer0": 207.4078826904297, |
|
"mean_norm": 63.8239631652832, |
|
"mean_norm/layer0": 63.8239631652832, |
|
"multicode_k": 1, |
|
"output_norm": 22.573653513590493, |
|
"output_norm/layer0": 22.573653513590493, |
|
"step": 6300 |
|
}, |
|
{ |
|
"MSE": 512.4084614054359, |
|
"MSE/layer0": 512.4084614054359, |
|
"dead_code_fraction": 0.22405, |
|
"dead_code_fraction/layer0": 0.22405, |
|
"epoch": 1.16, |
|
"input_norm": 31.998589369455978, |
|
"input_norm/layer0": 31.998589369455978, |
|
"learning_rate": 0.005, |
|
"loss": 2.0383, |
|
"max_norm": 207.421875, |
|
"max_norm/layer0": 207.421875, |
|
"mean_norm": 63.91918754577637, |
|
"mean_norm/layer0": 63.91918754577637, |
|
"multicode_k": 1, |
|
"output_norm": 22.570101757049564, |
|
"output_norm/layer0": 22.570101757049564, |
|
"step": 6350 |
|
}, |
|
{ |
|
"MSE": 511.3037980651857, |
|
"MSE/layer0": 511.3037980651857, |
|
"dead_code_fraction": 0.22325, |
|
"dead_code_fraction/layer0": 0.22325, |
|
"epoch": 1.16, |
|
"input_norm": 31.9986056105296, |
|
"input_norm/layer0": 31.9986056105296, |
|
"learning_rate": 0.005, |
|
"loss": 2.0836, |
|
"max_norm": 207.90211486816406, |
|
"max_norm/layer0": 207.90211486816406, |
|
"mean_norm": 64.0091323852539, |
|
"mean_norm/layer0": 64.0091323852539, |
|
"multicode_k": 1, |
|
"output_norm": 22.591040735244757, |
|
"output_norm/layer0": 22.591040735244757, |
|
"step": 6400 |
|
}, |
|
{ |
|
"MSE": 511.63349212646506, |
|
"MSE/layer0": 511.63349212646506, |
|
"dead_code_fraction": 0.2231, |
|
"dead_code_fraction/layer0": 0.2231, |
|
"epoch": 1.17, |
|
"input_norm": 31.998600152333573, |
|
"input_norm/layer0": 31.998600152333573, |
|
"learning_rate": 0.005, |
|
"loss": 2.049, |
|
"max_norm": 208.1908416748047, |
|
"max_norm/layer0": 208.1908416748047, |
|
"mean_norm": 64.09888458251953, |
|
"mean_norm/layer0": 64.09888458251953, |
|
"multicode_k": 1, |
|
"output_norm": 22.598680645624796, |
|
"output_norm/layer0": 22.598680645624796, |
|
"step": 6450 |
|
}, |
|
{ |
|
"MSE": 510.1135516866045, |
|
"MSE/layer0": 510.1135516866045, |
|
"dead_code_fraction": 0.2198, |
|
"dead_code_fraction/layer0": 0.2198, |
|
"epoch": 1.17, |
|
"input_norm": 31.99861148198446, |
|
"input_norm/layer0": 31.99861148198446, |
|
"learning_rate": 0.005, |
|
"loss": 2.0723, |
|
"max_norm": 208.76829528808594, |
|
"max_norm/layer0": 208.76829528808594, |
|
"mean_norm": 64.1937198638916, |
|
"mean_norm/layer0": 64.1937198638916, |
|
"multicode_k": 1, |
|
"output_norm": 22.610935223897293, |
|
"output_norm/layer0": 22.610935223897293, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_MSE/layer0": 510.5624312578848, |
|
"eval_accuracy": 0.5148121435408701, |
|
"eval_dead_code_fraction/layer0": 0.2206, |
|
"eval_input_norm/layer0": 31.99861497196212, |
|
"eval_loss": 2.0631778240203857, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.613337997850753, |
|
"eval_runtime": 157.9752, |
|
"eval_samples_per_second": 29.264, |
|
"eval_steps_per_second": 1.829, |
|
"step": 6500 |
|
}, |
|
{ |
|
"MSE": 511.1518494669597, |
|
"MSE/layer0": 511.1518494669597, |
|
"dead_code_fraction": 0.2202, |
|
"dead_code_fraction/layer0": 0.2202, |
|
"epoch": 1.18, |
|
"input_norm": 31.998620487848918, |
|
"input_norm/layer0": 31.998620487848918, |
|
"learning_rate": 0.005, |
|
"loss": 2.0713, |
|
"max_norm": 209.1894989013672, |
|
"max_norm/layer0": 209.1894989013672, |
|
"mean_norm": 64.28516006469727, |
|
"mean_norm/layer0": 64.28516006469727, |
|
"multicode_k": 1, |
|
"output_norm": 22.59137951215108, |
|
"output_norm/layer0": 22.59137951215108, |
|
"step": 6550 |
|
}, |
|
{ |
|
"MSE": 511.4045928446453, |
|
"MSE/layer0": 511.4045928446453, |
|
"dead_code_fraction": 0.2177, |
|
"dead_code_fraction/layer0": 0.2177, |
|
"epoch": 1.18, |
|
"input_norm": 31.99861013412476, |
|
"input_norm/layer0": 31.99861013412476, |
|
"learning_rate": 0.005, |
|
"loss": 2.0067, |
|
"max_norm": 209.52085876464844, |
|
"max_norm/layer0": 209.52085876464844, |
|
"mean_norm": 64.37364196777344, |
|
"mean_norm/layer0": 64.37364196777344, |
|
"multicode_k": 1, |
|
"output_norm": 22.59921900431315, |
|
"output_norm/layer0": 22.59921900431315, |
|
"step": 6600 |
|
}, |
|
{ |
|
"MSE": 510.8443921915694, |
|
"MSE/layer0": 510.8443921915694, |
|
"dead_code_fraction": 0.21885, |
|
"dead_code_fraction/layer0": 0.21885, |
|
"epoch": 1.19, |
|
"input_norm": 31.998613767623894, |
|
"input_norm/layer0": 31.998613767623894, |
|
"learning_rate": 0.005, |
|
"loss": 2.013, |
|
"max_norm": 209.98431396484375, |
|
"max_norm/layer0": 209.98431396484375, |
|
"mean_norm": 64.46432113647461, |
|
"mean_norm/layer0": 64.46432113647461, |
|
"multicode_k": 1, |
|
"output_norm": 22.60319686889649, |
|
"output_norm/layer0": 22.60319686889649, |
|
"step": 6650 |
|
}, |
|
{ |
|
"MSE": 510.0460713704424, |
|
"MSE/layer0": 510.0460713704424, |
|
"dead_code_fraction": 0.2183, |
|
"dead_code_fraction/layer0": 0.2183, |
|
"epoch": 1.19, |
|
"input_norm": 31.998641831080132, |
|
"input_norm/layer0": 31.998641831080132, |
|
"learning_rate": 0.005, |
|
"loss": 2.1151, |
|
"max_norm": 210.53810119628906, |
|
"max_norm/layer0": 210.53810119628906, |
|
"mean_norm": 64.55224609375, |
|
"mean_norm/layer0": 64.55224609375, |
|
"multicode_k": 1, |
|
"output_norm": 22.621459808349613, |
|
"output_norm/layer0": 22.621459808349613, |
|
"step": 6700 |
|
}, |
|
{ |
|
"MSE": 509.41305414835614, |
|
"MSE/layer0": 509.41305414835614, |
|
"dead_code_fraction": 0.2172, |
|
"dead_code_fraction/layer0": 0.2172, |
|
"epoch": 1.2, |
|
"input_norm": 31.998632535934448, |
|
"input_norm/layer0": 31.998632535934448, |
|
"learning_rate": 0.005, |
|
"loss": 2.0318, |
|
"max_norm": 210.88394165039062, |
|
"max_norm/layer0": 210.88394165039062, |
|
"mean_norm": 64.64096069335938, |
|
"mean_norm/layer0": 64.64096069335938, |
|
"multicode_k": 1, |
|
"output_norm": 22.626508464813227, |
|
"output_norm/layer0": 22.626508464813227, |
|
"step": 6750 |
|
}, |
|
{ |
|
"MSE": 510.3878801981608, |
|
"MSE/layer0": 510.3878801981608, |
|
"dead_code_fraction": 0.21645, |
|
"dead_code_fraction/layer0": 0.21645, |
|
"epoch": 1.2, |
|
"input_norm": 31.998641300201413, |
|
"input_norm/layer0": 31.998641300201413, |
|
"learning_rate": 0.005, |
|
"loss": 2.0492, |
|
"max_norm": 211.13937377929688, |
|
"max_norm/layer0": 211.13937377929688, |
|
"mean_norm": 64.7227783203125, |
|
"mean_norm/layer0": 64.7227783203125, |
|
"multicode_k": 1, |
|
"output_norm": 22.63286488850911, |
|
"output_norm/layer0": 22.63286488850911, |
|
"step": 6800 |
|
}, |
|
{ |
|
"MSE": 509.17419825236027, |
|
"MSE/layer0": 509.17419825236027, |
|
"dead_code_fraction": 0.2138, |
|
"dead_code_fraction/layer0": 0.2138, |
|
"epoch": 1.21, |
|
"input_norm": 31.998648115793856, |
|
"input_norm/layer0": 31.998648115793856, |
|
"learning_rate": 0.005, |
|
"loss": 2.0467, |
|
"max_norm": 211.3644256591797, |
|
"max_norm/layer0": 211.3644256591797, |
|
"mean_norm": 64.80514907836914, |
|
"mean_norm/layer0": 64.80514907836914, |
|
"multicode_k": 1, |
|
"output_norm": 22.63950007438659, |
|
"output_norm/layer0": 22.63950007438659, |
|
"step": 6850 |
|
}, |
|
{ |
|
"MSE": 509.3450110371906, |
|
"MSE/layer0": 509.3450110371906, |
|
"dead_code_fraction": 0.2144, |
|
"dead_code_fraction/layer0": 0.2144, |
|
"epoch": 1.21, |
|
"input_norm": 31.998654368718455, |
|
"input_norm/layer0": 31.998654368718455, |
|
"learning_rate": 0.005, |
|
"loss": 2.0327, |
|
"max_norm": 211.51609802246094, |
|
"max_norm/layer0": 211.51609802246094, |
|
"mean_norm": 64.885498046875, |
|
"mean_norm/layer0": 64.885498046875, |
|
"multicode_k": 1, |
|
"output_norm": 22.636532586415615, |
|
"output_norm/layer0": 22.636532586415615, |
|
"step": 6900 |
|
}, |
|
{ |
|
"MSE": 509.711417948405, |
|
"MSE/layer0": 509.711417948405, |
|
"dead_code_fraction": 0.2121, |
|
"dead_code_fraction/layer0": 0.2121, |
|
"epoch": 1.22, |
|
"input_norm": 31.998653659820555, |
|
"input_norm/layer0": 31.998653659820555, |
|
"learning_rate": 0.005, |
|
"loss": 2.0344, |
|
"max_norm": 211.93910217285156, |
|
"max_norm/layer0": 211.93910217285156, |
|
"mean_norm": 64.96215629577637, |
|
"mean_norm/layer0": 64.96215629577637, |
|
"multicode_k": 1, |
|
"output_norm": 22.642279275258375, |
|
"output_norm/layer0": 22.642279275258375, |
|
"step": 6950 |
|
}, |
|
{ |
|
"MSE": 509.53209904988614, |
|
"MSE/layer0": 509.53209904988614, |
|
"dead_code_fraction": 0.2112, |
|
"dead_code_fraction/layer0": 0.2112, |
|
"epoch": 1.22, |
|
"input_norm": 31.99865920702616, |
|
"input_norm/layer0": 31.99865920702616, |
|
"learning_rate": 0.005, |
|
"loss": 2.023, |
|
"max_norm": 212.15188598632812, |
|
"max_norm/layer0": 212.15188598632812, |
|
"mean_norm": 65.03938484191895, |
|
"mean_norm/layer0": 65.03938484191895, |
|
"multicode_k": 1, |
|
"output_norm": 22.641168931325275, |
|
"output_norm/layer0": 22.641168931325275, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_MSE/layer0": 509.9877618207523, |
|
"eval_accuracy": 0.5156894350128739, |
|
"eval_dead_code_fraction/layer0": 0.21105, |
|
"eval_input_norm/layer0": 31.998664335077162, |
|
"eval_loss": 2.0573580265045166, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.65440880063548, |
|
"eval_runtime": 158.8251, |
|
"eval_samples_per_second": 29.107, |
|
"eval_steps_per_second": 1.82, |
|
"step": 7000 |
|
}, |
|
{ |
|
"MSE": 509.9403240458172, |
|
"MSE/layer0": 509.9403240458172, |
|
"dead_code_fraction": 0.2101, |
|
"dead_code_fraction/layer0": 0.2101, |
|
"epoch": 1.23, |
|
"input_norm": 31.998663558959954, |
|
"input_norm/layer0": 31.998663558959954, |
|
"learning_rate": 0.005, |
|
"loss": 2.0391, |
|
"max_norm": 212.45599365234375, |
|
"max_norm/layer0": 212.45599365234375, |
|
"mean_norm": 65.11711883544922, |
|
"mean_norm/layer0": 65.11711883544922, |
|
"multicode_k": 1, |
|
"output_norm": 22.636152718861904, |
|
"output_norm/layer0": 22.636152718861904, |
|
"step": 7050 |
|
}, |
|
{ |
|
"MSE": 509.17088083903013, |
|
"MSE/layer0": 509.17088083903013, |
|
"dead_code_fraction": 0.2099, |
|
"dead_code_fraction/layer0": 0.2099, |
|
"epoch": 1.23, |
|
"input_norm": 31.998674535751356, |
|
"input_norm/layer0": 31.998674535751356, |
|
"learning_rate": 0.005, |
|
"loss": 2.0413, |
|
"max_norm": 212.8926544189453, |
|
"max_norm/layer0": 212.8926544189453, |
|
"mean_norm": 65.19314002990723, |
|
"mean_norm/layer0": 65.19314002990723, |
|
"multicode_k": 1, |
|
"output_norm": 22.652867739995315, |
|
"output_norm/layer0": 22.652867739995315, |
|
"step": 7100 |
|
}, |
|
{ |
|
"MSE": 509.09580220540397, |
|
"MSE/layer0": 509.09580220540397, |
|
"dead_code_fraction": 0.209, |
|
"dead_code_fraction/layer0": 0.209, |
|
"epoch": 1.24, |
|
"input_norm": 31.99867464383444, |
|
"input_norm/layer0": 31.99867464383444, |
|
"learning_rate": 0.005, |
|
"loss": 2.0495, |
|
"max_norm": 213.29238891601562, |
|
"max_norm/layer0": 213.29238891601562, |
|
"mean_norm": 65.27325248718262, |
|
"mean_norm/layer0": 65.27325248718262, |
|
"multicode_k": 1, |
|
"output_norm": 22.646062428156533, |
|
"output_norm/layer0": 22.646062428156533, |
|
"step": 7150 |
|
}, |
|
{ |
|
"MSE": 509.9214274597167, |
|
"MSE/layer0": 509.9214274597167, |
|
"dead_code_fraction": 0.20905, |
|
"dead_code_fraction/layer0": 0.20905, |
|
"epoch": 1.24, |
|
"input_norm": 31.998673133850097, |
|
"input_norm/layer0": 31.998673133850097, |
|
"learning_rate": 0.005, |
|
"loss": 2.0462, |
|
"max_norm": 213.58729553222656, |
|
"max_norm/layer0": 213.58729553222656, |
|
"mean_norm": 65.35407447814941, |
|
"mean_norm/layer0": 65.35407447814941, |
|
"multicode_k": 1, |
|
"output_norm": 22.63937306404113, |
|
"output_norm/layer0": 22.63937306404113, |
|
"step": 7200 |
|
}, |
|
{ |
|
"MSE": 508.71533091227207, |
|
"MSE/layer0": 508.71533091227207, |
|
"dead_code_fraction": 0.2082, |
|
"dead_code_fraction/layer0": 0.2082, |
|
"epoch": 1.25, |
|
"input_norm": 31.99868763287862, |
|
"input_norm/layer0": 31.99868763287862, |
|
"learning_rate": 0.005, |
|
"loss": 2.0582, |
|
"max_norm": 213.80873107910156, |
|
"max_norm/layer0": 213.80873107910156, |
|
"mean_norm": 65.43496131896973, |
|
"mean_norm/layer0": 65.43496131896973, |
|
"multicode_k": 1, |
|
"output_norm": 22.648734455108645, |
|
"output_norm/layer0": 22.648734455108645, |
|
"step": 7250 |
|
}, |
|
{ |
|
"MSE": 507.686293182373, |
|
"MSE/layer0": 507.686293182373, |
|
"dead_code_fraction": 0.2066, |
|
"dead_code_fraction/layer0": 0.2066, |
|
"epoch": 1.25, |
|
"input_norm": 31.998690617879234, |
|
"input_norm/layer0": 31.998690617879234, |
|
"learning_rate": 0.005, |
|
"loss": 2.0485, |
|
"max_norm": 214.17088317871094, |
|
"max_norm/layer0": 214.17088317871094, |
|
"mean_norm": 65.51487731933594, |
|
"mean_norm/layer0": 65.51487731933594, |
|
"multicode_k": 1, |
|
"output_norm": 22.669575303395582, |
|
"output_norm/layer0": 22.669575303395582, |
|
"step": 7300 |
|
}, |
|
{ |
|
"MSE": 507.97169540405275, |
|
"MSE/layer0": 507.97169540405275, |
|
"dead_code_fraction": 0.20445, |
|
"dead_code_fraction/layer0": 0.20445, |
|
"epoch": 1.26, |
|
"input_norm": 31.99869660695392, |
|
"input_norm/layer0": 31.99869660695392, |
|
"learning_rate": 0.005, |
|
"loss": 2.0534, |
|
"max_norm": 214.52955627441406, |
|
"max_norm/layer0": 214.52955627441406, |
|
"mean_norm": 65.59026718139648, |
|
"mean_norm/layer0": 65.59026718139648, |
|
"multicode_k": 1, |
|
"output_norm": 22.678728303909296, |
|
"output_norm/layer0": 22.678728303909296, |
|
"step": 7350 |
|
}, |
|
{ |
|
"MSE": 507.6675502522787, |
|
"MSE/layer0": 507.6675502522787, |
|
"dead_code_fraction": 0.20485, |
|
"dead_code_fraction/layer0": 0.20485, |
|
"epoch": 1.26, |
|
"input_norm": 31.998699353535965, |
|
"input_norm/layer0": 31.998699353535965, |
|
"learning_rate": 0.005, |
|
"loss": 2.0638, |
|
"max_norm": 214.7173614501953, |
|
"max_norm/layer0": 214.7173614501953, |
|
"mean_norm": 65.67013740539551, |
|
"mean_norm/layer0": 65.67013740539551, |
|
"multicode_k": 1, |
|
"output_norm": 22.67898440043131, |
|
"output_norm/layer0": 22.67898440043131, |
|
"step": 7400 |
|
}, |
|
{ |
|
"MSE": 507.85135843912786, |
|
"MSE/layer0": 507.85135843912786, |
|
"dead_code_fraction": 0.2049, |
|
"dead_code_fraction/layer0": 0.2049, |
|
"epoch": 1.27, |
|
"input_norm": 31.998699776331584, |
|
"input_norm/layer0": 31.998699776331584, |
|
"learning_rate": 0.005, |
|
"loss": 2.0353, |
|
"max_norm": 215.19158935546875, |
|
"max_norm/layer0": 215.19158935546875, |
|
"mean_norm": 65.75178337097168, |
|
"mean_norm/layer0": 65.75178337097168, |
|
"multicode_k": 1, |
|
"output_norm": 22.680205952326446, |
|
"output_norm/layer0": 22.680205952326446, |
|
"step": 7450 |
|
}, |
|
{ |
|
"MSE": 507.253986562093, |
|
"MSE/layer0": 507.253986562093, |
|
"dead_code_fraction": 0.20435, |
|
"dead_code_fraction/layer0": 0.20435, |
|
"epoch": 1.27, |
|
"input_norm": 31.99870971679686, |
|
"input_norm/layer0": 31.99870971679686, |
|
"learning_rate": 0.005, |
|
"loss": 2.0791, |
|
"max_norm": 215.7554931640625, |
|
"max_norm/layer0": 215.7554931640625, |
|
"mean_norm": 65.82438659667969, |
|
"mean_norm/layer0": 65.82438659667969, |
|
"multicode_k": 1, |
|
"output_norm": 22.691158383687345, |
|
"output_norm/layer0": 22.691158383687345, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_MSE/layer0": 507.1513778155122, |
|
"eval_accuracy": 0.5167855735982843, |
|
"eval_dead_code_fraction/layer0": 0.2033, |
|
"eval_input_norm/layer0": 31.998707461867696, |
|
"eval_loss": 2.0513455867767334, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.70183411032355, |
|
"eval_runtime": 158.8151, |
|
"eval_samples_per_second": 29.109, |
|
"eval_steps_per_second": 1.82, |
|
"step": 7500 |
|
}, |
|
{ |
|
"MSE": 508.0465566507977, |
|
"MSE/layer0": 508.0465566507977, |
|
"dead_code_fraction": 0.20265, |
|
"dead_code_fraction/layer0": 0.20265, |
|
"epoch": 1.28, |
|
"input_norm": 31.9987080860138, |
|
"input_norm/layer0": 31.9987080860138, |
|
"learning_rate": 0.005, |
|
"loss": 2.0357, |
|
"max_norm": 216.1879119873047, |
|
"max_norm/layer0": 216.1879119873047, |
|
"mean_norm": 65.89747428894043, |
|
"mean_norm/layer0": 65.89747428894043, |
|
"multicode_k": 1, |
|
"output_norm": 22.691229712168372, |
|
"output_norm/layer0": 22.691229712168372, |
|
"step": 7550 |
|
}, |
|
{ |
|
"MSE": 506.86150853474936, |
|
"MSE/layer0": 506.86150853474936, |
|
"dead_code_fraction": 0.20235, |
|
"dead_code_fraction/layer0": 0.20235, |
|
"epoch": 1.28, |
|
"input_norm": 31.998724161783855, |
|
"input_norm/layer0": 31.998724161783855, |
|
"learning_rate": 0.005, |
|
"loss": 2.0643, |
|
"max_norm": 216.84507751464844, |
|
"max_norm/layer0": 216.84507751464844, |
|
"mean_norm": 65.96598243713379, |
|
"mean_norm/layer0": 65.96598243713379, |
|
"multicode_k": 1, |
|
"output_norm": 22.70548650105794, |
|
"output_norm/layer0": 22.70548650105794, |
|
"step": 7600 |
|
}, |
|
{ |
|
"MSE": 508.52483596801756, |
|
"MSE/layer0": 508.52483596801756, |
|
"dead_code_fraction": 0.20115, |
|
"dead_code_fraction/layer0": 0.20115, |
|
"epoch": 1.29, |
|
"input_norm": 31.998720836639407, |
|
"input_norm/layer0": 31.998720836639407, |
|
"learning_rate": 0.005, |
|
"loss": 2.0331, |
|
"max_norm": 217.07077026367188, |
|
"max_norm/layer0": 217.07077026367188, |
|
"mean_norm": 66.04256629943848, |
|
"mean_norm/layer0": 66.04256629943848, |
|
"multicode_k": 1, |
|
"output_norm": 22.671403992970788, |
|
"output_norm/layer0": 22.671403992970788, |
|
"step": 7650 |
|
}, |
|
{ |
|
"MSE": 506.7901182556151, |
|
"MSE/layer0": 506.7901182556151, |
|
"dead_code_fraction": 0.20025, |
|
"dead_code_fraction/layer0": 0.20025, |
|
"epoch": 1.29, |
|
"input_norm": 31.998723080952953, |
|
"input_norm/layer0": 31.998723080952953, |
|
"learning_rate": 0.005, |
|
"loss": 2.0643, |
|
"max_norm": 217.60621643066406, |
|
"max_norm/layer0": 217.60621643066406, |
|
"mean_norm": 66.1141586303711, |
|
"mean_norm/layer0": 66.1141586303711, |
|
"multicode_k": 1, |
|
"output_norm": 22.711970895131433, |
|
"output_norm/layer0": 22.711970895131433, |
|
"step": 7700 |
|
}, |
|
{ |
|
"MSE": 506.4805715942383, |
|
"MSE/layer0": 506.4805715942383, |
|
"dead_code_fraction": 0.19955, |
|
"dead_code_fraction/layer0": 0.19955, |
|
"epoch": 1.3, |
|
"input_norm": 31.998739531834943, |
|
"input_norm/layer0": 31.998739531834943, |
|
"learning_rate": 0.005, |
|
"loss": 2.0999, |
|
"max_norm": 218.18724060058594, |
|
"max_norm/layer0": 218.18724060058594, |
|
"mean_norm": 66.18310356140137, |
|
"mean_norm/layer0": 66.18310356140137, |
|
"multicode_k": 1, |
|
"output_norm": 22.715899858474735, |
|
"output_norm/layer0": 22.715899858474735, |
|
"step": 7750 |
|
}, |
|
{ |
|
"MSE": 507.79560877482083, |
|
"MSE/layer0": 507.79560877482083, |
|
"dead_code_fraction": 0.1983, |
|
"dead_code_fraction/layer0": 0.1983, |
|
"epoch": 1.3, |
|
"input_norm": 31.9987256272634, |
|
"input_norm/layer0": 31.9987256272634, |
|
"learning_rate": 0.005, |
|
"loss": 2.0143, |
|
"max_norm": 218.3722686767578, |
|
"max_norm/layer0": 218.3722686767578, |
|
"mean_norm": 66.25444984436035, |
|
"mean_norm/layer0": 66.25444984436035, |
|
"multicode_k": 1, |
|
"output_norm": 22.692439622879014, |
|
"output_norm/layer0": 22.692439622879014, |
|
"step": 7800 |
|
}, |
|
{ |
|
"MSE": 507.2388439432779, |
|
"MSE/layer0": 507.2388439432779, |
|
"dead_code_fraction": 0.198, |
|
"dead_code_fraction/layer0": 0.198, |
|
"epoch": 1.31, |
|
"input_norm": 31.998735243479416, |
|
"input_norm/layer0": 31.998735243479416, |
|
"learning_rate": 0.005, |
|
"loss": 2.069, |
|
"max_norm": 218.93580627441406, |
|
"max_norm/layer0": 218.93580627441406, |
|
"mean_norm": 66.32441329956055, |
|
"mean_norm/layer0": 66.32441329956055, |
|
"multicode_k": 1, |
|
"output_norm": 22.703038584391276, |
|
"output_norm/layer0": 22.703038584391276, |
|
"step": 7850 |
|
}, |
|
{ |
|
"MSE": 508.13961395263664, |
|
"MSE/layer0": 508.13961395263664, |
|
"dead_code_fraction": 0.19705, |
|
"dead_code_fraction/layer0": 0.19705, |
|
"epoch": 1.31, |
|
"input_norm": 31.99873922983806, |
|
"input_norm/layer0": 31.99873922983806, |
|
"learning_rate": 0.005, |
|
"loss": 2.0712, |
|
"max_norm": 219.51759338378906, |
|
"max_norm/layer0": 219.51759338378906, |
|
"mean_norm": 66.39589881896973, |
|
"mean_norm/layer0": 66.39589881896973, |
|
"multicode_k": 1, |
|
"output_norm": 22.68491499900817, |
|
"output_norm/layer0": 22.68491499900817, |
|
"step": 7900 |
|
}, |
|
{ |
|
"MSE": 506.5046355692546, |
|
"MSE/layer0": 506.5046355692546, |
|
"dead_code_fraction": 0.1958, |
|
"dead_code_fraction/layer0": 0.1958, |
|
"epoch": 1.32, |
|
"input_norm": 31.998745075861606, |
|
"input_norm/layer0": 31.998745075861606, |
|
"learning_rate": 0.005, |
|
"loss": 2.0623, |
|
"max_norm": 220.1356658935547, |
|
"max_norm/layer0": 220.1356658935547, |
|
"mean_norm": 66.46616172790527, |
|
"mean_norm/layer0": 66.46616172790527, |
|
"multicode_k": 1, |
|
"output_norm": 22.709094810485844, |
|
"output_norm/layer0": 22.709094810485844, |
|
"step": 7950 |
|
}, |
|
{ |
|
"MSE": 506.24584472656227, |
|
"MSE/layer0": 506.24584472656227, |
|
"dead_code_fraction": 0.1962, |
|
"dead_code_fraction/layer0": 0.1962, |
|
"epoch": 1.32, |
|
"input_norm": 31.998744071324662, |
|
"input_norm/layer0": 31.998744071324662, |
|
"learning_rate": 0.005, |
|
"loss": 2.0252, |
|
"max_norm": 220.52029418945312, |
|
"max_norm/layer0": 220.52029418945312, |
|
"mean_norm": 66.54170417785645, |
|
"mean_norm/layer0": 66.54170417785645, |
|
"multicode_k": 1, |
|
"output_norm": 22.71004734039306, |
|
"output_norm/layer0": 22.71004734039306, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_MSE/layer0": 505.2722684186489, |
|
"eval_accuracy": 0.5173414664109856, |
|
"eval_dead_code_fraction/layer0": 0.19525, |
|
"eval_input_norm/layer0": 31.998757950702117, |
|
"eval_loss": 2.046276569366455, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.71078164304668, |
|
"eval_runtime": 158.1298, |
|
"eval_samples_per_second": 29.235, |
|
"eval_steps_per_second": 1.828, |
|
"step": 8000 |
|
}, |
|
{ |
|
"MSE": 507.15304565429676, |
|
"MSE/layer0": 507.15304565429676, |
|
"dead_code_fraction": 0.1941, |
|
"dead_code_fraction/layer0": 0.1941, |
|
"epoch": 1.33, |
|
"input_norm": 31.998751821517956, |
|
"input_norm/layer0": 31.998751821517956, |
|
"learning_rate": 0.005, |
|
"loss": 2.0231, |
|
"max_norm": 221.12425231933594, |
|
"max_norm/layer0": 221.12425231933594, |
|
"mean_norm": 66.61260223388672, |
|
"mean_norm/layer0": 66.61260223388672, |
|
"multicode_k": 1, |
|
"output_norm": 22.70729770024618, |
|
"output_norm/layer0": 22.70729770024618, |
|
"step": 8050 |
|
}, |
|
{ |
|
"MSE": 508.0300794474282, |
|
"MSE/layer0": 508.0300794474282, |
|
"dead_code_fraction": 0.19355, |
|
"dead_code_fraction/layer0": 0.19355, |
|
"epoch": 1.33, |
|
"input_norm": 31.998755750656134, |
|
"input_norm/layer0": 31.998755750656134, |
|
"learning_rate": 0.005, |
|
"loss": 2.0065, |
|
"max_norm": 221.41090393066406, |
|
"max_norm/layer0": 221.41090393066406, |
|
"mean_norm": 66.68024253845215, |
|
"mean_norm/layer0": 66.68024253845215, |
|
"multicode_k": 1, |
|
"output_norm": 22.685567102432238, |
|
"output_norm/layer0": 22.685567102432238, |
|
"step": 8100 |
|
}, |
|
{ |
|
"MSE": 506.83792968750004, |
|
"MSE/layer0": 506.83792968750004, |
|
"dead_code_fraction": 0.194, |
|
"dead_code_fraction/layer0": 0.194, |
|
"epoch": 1.34, |
|
"input_norm": 31.998766530354814, |
|
"input_norm/layer0": 31.998766530354814, |
|
"learning_rate": 0.005, |
|
"loss": 2.0545, |
|
"max_norm": 221.77352905273438, |
|
"max_norm/layer0": 221.77352905273438, |
|
"mean_norm": 66.74850654602051, |
|
"mean_norm/layer0": 66.74850654602051, |
|
"multicode_k": 1, |
|
"output_norm": 22.711014649073284, |
|
"output_norm/layer0": 22.711014649073284, |
|
"step": 8150 |
|
}, |
|
{ |
|
"MSE": 506.1638347880046, |
|
"MSE/layer0": 506.1638347880046, |
|
"dead_code_fraction": 0.1922, |
|
"dead_code_fraction/layer0": 0.1922, |
|
"epoch": 1.34, |
|
"input_norm": 31.998765303293865, |
|
"input_norm/layer0": 31.998765303293865, |
|
"learning_rate": 0.005, |
|
"loss": 2.0291, |
|
"max_norm": 222.23851013183594, |
|
"max_norm/layer0": 222.23851013183594, |
|
"mean_norm": 66.81972694396973, |
|
"mean_norm/layer0": 66.81972694396973, |
|
"multicode_k": 1, |
|
"output_norm": 22.712359495162957, |
|
"output_norm/layer0": 22.712359495162957, |
|
"step": 8200 |
|
}, |
|
{ |
|
"MSE": 505.4201058959959, |
|
"MSE/layer0": 505.4201058959959, |
|
"dead_code_fraction": 0.19165, |
|
"dead_code_fraction/layer0": 0.19165, |
|
"epoch": 1.35, |
|
"input_norm": 31.998765595753984, |
|
"input_norm/layer0": 31.998765595753984, |
|
"learning_rate": 0.005, |
|
"loss": 2.0255, |
|
"max_norm": 222.60708618164062, |
|
"max_norm/layer0": 222.60708618164062, |
|
"mean_norm": 66.89296340942383, |
|
"mean_norm/layer0": 66.89296340942383, |
|
"multicode_k": 1, |
|
"output_norm": 22.733057559331257, |
|
"output_norm/layer0": 22.733057559331257, |
|
"step": 8250 |
|
}, |
|
{ |
|
"MSE": 506.6631129964193, |
|
"MSE/layer0": 506.6631129964193, |
|
"dead_code_fraction": 0.18985, |
|
"dead_code_fraction/layer0": 0.18985, |
|
"epoch": 1.35, |
|
"input_norm": 31.998774194717406, |
|
"input_norm/layer0": 31.998774194717406, |
|
"learning_rate": 0.005, |
|
"loss": 2.0543, |
|
"max_norm": 222.95948791503906, |
|
"max_norm/layer0": 222.95948791503906, |
|
"mean_norm": 66.95783233642578, |
|
"mean_norm/layer0": 66.95783233642578, |
|
"multicode_k": 1, |
|
"output_norm": 22.715471951166787, |
|
"output_norm/layer0": 22.715471951166787, |
|
"step": 8300 |
|
}, |
|
{ |
|
"MSE": 505.8098661804198, |
|
"MSE/layer0": 505.8098661804198, |
|
"dead_code_fraction": 0.1901, |
|
"dead_code_fraction/layer0": 0.1901, |
|
"epoch": 1.36, |
|
"input_norm": 31.998776054382326, |
|
"input_norm/layer0": 31.998776054382326, |
|
"learning_rate": 0.005, |
|
"loss": 2.0361, |
|
"max_norm": 222.99290466308594, |
|
"max_norm/layer0": 222.99290466308594, |
|
"mean_norm": 67.03095436096191, |
|
"mean_norm/layer0": 67.03095436096191, |
|
"multicode_k": 1, |
|
"output_norm": 22.720023854573576, |
|
"output_norm/layer0": 22.720023854573576, |
|
"step": 8350 |
|
}, |
|
{ |
|
"MSE": 504.6476872253421, |
|
"MSE/layer0": 504.6476872253421, |
|
"dead_code_fraction": 0.18865, |
|
"dead_code_fraction/layer0": 0.18865, |
|
"epoch": 1.36, |
|
"input_norm": 31.99877415021262, |
|
"input_norm/layer0": 31.99877415021262, |
|
"learning_rate": 0.005, |
|
"loss": 2.018, |
|
"max_norm": 222.99652099609375, |
|
"max_norm/layer0": 222.99652099609375, |
|
"mean_norm": 67.10310173034668, |
|
"mean_norm/layer0": 67.10310173034668, |
|
"multicode_k": 1, |
|
"output_norm": 22.743260552088422, |
|
"output_norm/layer0": 22.743260552088422, |
|
"step": 8400 |
|
}, |
|
{ |
|
"MSE": 505.1742755126953, |
|
"MSE/layer0": 505.1742755126953, |
|
"dead_code_fraction": 0.18805, |
|
"dead_code_fraction/layer0": 0.18805, |
|
"epoch": 1.37, |
|
"input_norm": 31.998781833648685, |
|
"input_norm/layer0": 31.998781833648685, |
|
"learning_rate": 0.005, |
|
"loss": 2.0373, |
|
"max_norm": 223.39710998535156, |
|
"max_norm/layer0": 223.39710998535156, |
|
"mean_norm": 67.17368698120117, |
|
"mean_norm/layer0": 67.17368698120117, |
|
"multicode_k": 1, |
|
"output_norm": 22.74353121121724, |
|
"output_norm/layer0": 22.74353121121724, |
|
"step": 8450 |
|
}, |
|
{ |
|
"MSE": 505.00153442382805, |
|
"MSE/layer0": 505.00153442382805, |
|
"dead_code_fraction": 0.1875, |
|
"dead_code_fraction/layer0": 0.1875, |
|
"epoch": 1.37, |
|
"input_norm": 31.998789456685383, |
|
"input_norm/layer0": 31.998789456685383, |
|
"learning_rate": 0.005, |
|
"loss": 2.0432, |
|
"max_norm": 223.86239624023438, |
|
"max_norm/layer0": 223.86239624023438, |
|
"mean_norm": 67.2455825805664, |
|
"mean_norm/layer0": 67.2455825805664, |
|
"multicode_k": 1, |
|
"output_norm": 22.747594401041667, |
|
"output_norm/layer0": 22.747594401041667, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_MSE/layer0": 502.9394664067146, |
|
"eval_accuracy": 0.5183496432580605, |
|
"eval_dead_code_fraction/layer0": 0.18745, |
|
"eval_input_norm/layer0": 31.998788164622738, |
|
"eval_loss": 2.042330265045166, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.756197618711013, |
|
"eval_runtime": 159.3738, |
|
"eval_samples_per_second": 29.007, |
|
"eval_steps_per_second": 1.813, |
|
"step": 8500 |
|
}, |
|
{ |
|
"MSE": 504.47000788370775, |
|
"MSE/layer0": 504.47000788370775, |
|
"dead_code_fraction": 0.1867, |
|
"dead_code_fraction/layer0": 0.1867, |
|
"epoch": 1.38, |
|
"input_norm": 31.998792708714795, |
|
"input_norm/layer0": 31.998792708714795, |
|
"learning_rate": 0.005, |
|
"loss": 2.0483, |
|
"max_norm": 224.50177001953125, |
|
"max_norm/layer0": 224.50177001953125, |
|
"mean_norm": 67.31682586669922, |
|
"mean_norm/layer0": 67.31682586669922, |
|
"multicode_k": 1, |
|
"output_norm": 22.753840500513725, |
|
"output_norm/layer0": 22.753840500513725, |
|
"step": 8550 |
|
}, |
|
{ |
|
"MSE": 504.7471296691896, |
|
"MSE/layer0": 504.7471296691896, |
|
"dead_code_fraction": 0.1861, |
|
"dead_code_fraction/layer0": 0.1861, |
|
"epoch": 1.38, |
|
"input_norm": 31.99879879315695, |
|
"input_norm/layer0": 31.99879879315695, |
|
"learning_rate": 0.005, |
|
"loss": 2.0626, |
|
"max_norm": 224.77008056640625, |
|
"max_norm/layer0": 224.77008056640625, |
|
"mean_norm": 67.38501358032227, |
|
"mean_norm/layer0": 67.38501358032227, |
|
"multicode_k": 1, |
|
"output_norm": 22.75965905507406, |
|
"output_norm/layer0": 22.75965905507406, |
|
"step": 8600 |
|
}, |
|
{ |
|
"MSE": 504.43309575398786, |
|
"MSE/layer0": 504.43309575398786, |
|
"dead_code_fraction": 0.18575, |
|
"dead_code_fraction/layer0": 0.18575, |
|
"epoch": 1.39, |
|
"input_norm": 31.9987975247701, |
|
"input_norm/layer0": 31.9987975247701, |
|
"learning_rate": 0.005, |
|
"loss": 2.0528, |
|
"max_norm": 224.8895263671875, |
|
"max_norm/layer0": 224.8895263671875, |
|
"mean_norm": 67.45294189453125, |
|
"mean_norm/layer0": 67.45294189453125, |
|
"multicode_k": 1, |
|
"output_norm": 22.765578152338662, |
|
"output_norm/layer0": 22.765578152338662, |
|
"step": 8650 |
|
}, |
|
{ |
|
"MSE": 504.8997240193688, |
|
"MSE/layer0": 504.8997240193688, |
|
"dead_code_fraction": 0.1849, |
|
"dead_code_fraction/layer0": 0.1849, |
|
"epoch": 1.39, |
|
"input_norm": 31.998805205027267, |
|
"input_norm/layer0": 31.998805205027267, |
|
"learning_rate": 0.005, |
|
"loss": 2.0355, |
|
"max_norm": 225.1109619140625, |
|
"max_norm/layer0": 225.1109619140625, |
|
"mean_norm": 67.51644897460938, |
|
"mean_norm/layer0": 67.51644897460938, |
|
"multicode_k": 1, |
|
"output_norm": 22.76556049982706, |
|
"output_norm/layer0": 22.76556049982706, |
|
"step": 8700 |
|
}, |
|
{ |
|
"MSE": 504.98007812499975, |
|
"MSE/layer0": 504.98007812499975, |
|
"dead_code_fraction": 0.1841, |
|
"dead_code_fraction/layer0": 0.1841, |
|
"epoch": 1.4, |
|
"input_norm": 31.998811095555627, |
|
"input_norm/layer0": 31.998811095555627, |
|
"learning_rate": 0.005, |
|
"loss": 2.048, |
|
"max_norm": 225.3004608154297, |
|
"max_norm/layer0": 225.3004608154297, |
|
"mean_norm": 67.58170700073242, |
|
"mean_norm/layer0": 67.58170700073242, |
|
"multicode_k": 1, |
|
"output_norm": 22.734453417460124, |
|
"output_norm/layer0": 22.734453417460124, |
|
"step": 8750 |
|
}, |
|
{ |
|
"MSE": 505.8172926839193, |
|
"MSE/layer0": 505.8172926839193, |
|
"dead_code_fraction": 0.1825, |
|
"dead_code_fraction/layer0": 0.1825, |
|
"epoch": 1.4, |
|
"input_norm": 31.998811902999876, |
|
"input_norm/layer0": 31.998811902999876, |
|
"learning_rate": 0.005, |
|
"loss": 2.0314, |
|
"max_norm": 225.43496704101562, |
|
"max_norm/layer0": 225.43496704101562, |
|
"mean_norm": 67.64213943481445, |
|
"mean_norm/layer0": 67.64213943481445, |
|
"multicode_k": 1, |
|
"output_norm": 22.746523040135706, |
|
"output_norm/layer0": 22.746523040135706, |
|
"step": 8800 |
|
}, |
|
{ |
|
"MSE": 505.15463668823276, |
|
"MSE/layer0": 505.15463668823276, |
|
"dead_code_fraction": 0.1834, |
|
"dead_code_fraction/layer0": 0.1834, |
|
"epoch": 1.41, |
|
"input_norm": 31.99881089528401, |
|
"input_norm/layer0": 31.99881089528401, |
|
"learning_rate": 0.005, |
|
"loss": 2.0019, |
|
"max_norm": 225.2454376220703, |
|
"max_norm/layer0": 225.2454376220703, |
|
"mean_norm": 67.70701217651367, |
|
"mean_norm/layer0": 67.70701217651367, |
|
"multicode_k": 1, |
|
"output_norm": 22.74102473258972, |
|
"output_norm/layer0": 22.74102473258972, |
|
"step": 8850 |
|
}, |
|
{ |
|
"MSE": 505.15305394490576, |
|
"MSE/layer0": 505.15305394490576, |
|
"dead_code_fraction": 0.18105, |
|
"dead_code_fraction/layer0": 0.18105, |
|
"epoch": 1.41, |
|
"input_norm": 31.99882117907206, |
|
"input_norm/layer0": 31.99882117907206, |
|
"learning_rate": 0.005, |
|
"loss": 2.0614, |
|
"max_norm": 224.98548889160156, |
|
"max_norm/layer0": 224.98548889160156, |
|
"mean_norm": 67.77053833007812, |
|
"mean_norm/layer0": 67.77053833007812, |
|
"multicode_k": 1, |
|
"output_norm": 22.750008074442544, |
|
"output_norm/layer0": 22.750008074442544, |
|
"step": 8900 |
|
}, |
|
{ |
|
"MSE": 505.46065561930345, |
|
"MSE/layer0": 505.46065561930345, |
|
"dead_code_fraction": 0.1809, |
|
"dead_code_fraction/layer0": 0.1809, |
|
"epoch": 1.42, |
|
"input_norm": 31.99882030487061, |
|
"input_norm/layer0": 31.99882030487061, |
|
"learning_rate": 0.005, |
|
"loss": 2.0259, |
|
"max_norm": 224.90966796875, |
|
"max_norm/layer0": 224.90966796875, |
|
"mean_norm": 67.83388900756836, |
|
"mean_norm/layer0": 67.83388900756836, |
|
"multicode_k": 1, |
|
"output_norm": 22.744747044245393, |
|
"output_norm/layer0": 22.744747044245393, |
|
"step": 8950 |
|
}, |
|
{ |
|
"MSE": 503.93126592000317, |
|
"MSE/layer0": 503.93126592000317, |
|
"dead_code_fraction": 0.1795, |
|
"dead_code_fraction/layer0": 0.1795, |
|
"epoch": 1.42, |
|
"input_norm": 31.99882487614949, |
|
"input_norm/layer0": 31.99882487614949, |
|
"learning_rate": 0.005, |
|
"loss": 2.0549, |
|
"max_norm": 224.75604248046875, |
|
"max_norm/layer0": 224.75604248046875, |
|
"mean_norm": 67.89757537841797, |
|
"mean_norm/layer0": 67.89757537841797, |
|
"multicode_k": 1, |
|
"output_norm": 22.767707106272383, |
|
"output_norm/layer0": 22.767707106272383, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_MSE/layer0": 502.90162357014304, |
|
"eval_accuracy": 0.518752237368134, |
|
"eval_dead_code_fraction/layer0": 0.1797, |
|
"eval_input_norm/layer0": 31.998819289515865, |
|
"eval_loss": 2.0394132137298584, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.772194602647794, |
|
"eval_runtime": 158.4408, |
|
"eval_samples_per_second": 29.178, |
|
"eval_steps_per_second": 1.824, |
|
"step": 9000 |
|
}, |
|
{ |
|
"MSE": 504.23655522664376, |
|
"MSE/layer0": 504.23655522664376, |
|
"dead_code_fraction": 0.17875, |
|
"dead_code_fraction/layer0": 0.17875, |
|
"epoch": 1.43, |
|
"input_norm": 31.998824621836345, |
|
"input_norm/layer0": 31.998824621836345, |
|
"learning_rate": 0.005, |
|
"loss": 2.0392, |
|
"max_norm": 224.60926818847656, |
|
"max_norm/layer0": 224.60926818847656, |
|
"mean_norm": 67.96440505981445, |
|
"mean_norm/layer0": 67.96440505981445, |
|
"multicode_k": 1, |
|
"output_norm": 22.772467877070113, |
|
"output_norm/layer0": 22.772467877070113, |
|
"step": 9050 |
|
}, |
|
{ |
|
"MSE": 503.93936604817725, |
|
"MSE/layer0": 503.93936604817725, |
|
"dead_code_fraction": 0.17785, |
|
"dead_code_fraction/layer0": 0.17785, |
|
"epoch": 1.43, |
|
"input_norm": 31.99882525444032, |
|
"input_norm/layer0": 31.99882525444032, |
|
"learning_rate": 0.005, |
|
"loss": 2.0329, |
|
"max_norm": 224.35545349121094, |
|
"max_norm/layer0": 224.35545349121094, |
|
"mean_norm": 68.02788925170898, |
|
"mean_norm/layer0": 68.02788925170898, |
|
"multicode_k": 1, |
|
"output_norm": 22.77433245976766, |
|
"output_norm/layer0": 22.77433245976766, |
|
"step": 9100 |
|
}, |
|
{ |
|
"MSE": 504.38566899617547, |
|
"MSE/layer0": 504.38566899617547, |
|
"dead_code_fraction": 0.1771, |
|
"dead_code_fraction/layer0": 0.1771, |
|
"epoch": 1.44, |
|
"input_norm": 31.998831052780154, |
|
"input_norm/layer0": 31.998831052780154, |
|
"learning_rate": 0.005, |
|
"loss": 2.0175, |
|
"max_norm": 224.176025390625, |
|
"max_norm/layer0": 224.176025390625, |
|
"mean_norm": 68.09515762329102, |
|
"mean_norm/layer0": 68.09515762329102, |
|
"multicode_k": 1, |
|
"output_norm": 22.758301575978592, |
|
"output_norm/layer0": 22.758301575978592, |
|
"step": 9150 |
|
}, |
|
{ |
|
"MSE": 503.9738773091634, |
|
"MSE/layer0": 503.9738773091634, |
|
"dead_code_fraction": 0.1763, |
|
"dead_code_fraction/layer0": 0.1763, |
|
"epoch": 1.44, |
|
"input_norm": 31.998839066823308, |
|
"input_norm/layer0": 31.998839066823308, |
|
"learning_rate": 0.005, |
|
"loss": 2.0462, |
|
"max_norm": 224.0868377685547, |
|
"max_norm/layer0": 224.0868377685547, |
|
"mean_norm": 68.16043853759766, |
|
"mean_norm/layer0": 68.16043853759766, |
|
"multicode_k": 1, |
|
"output_norm": 22.7744267431895, |
|
"output_norm/layer0": 22.7744267431895, |
|
"step": 9200 |
|
}, |
|
{ |
|
"MSE": 503.29069310506196, |
|
"MSE/layer0": 503.29069310506196, |
|
"dead_code_fraction": 0.17485, |
|
"dead_code_fraction/layer0": 0.17485, |
|
"epoch": 1.45, |
|
"input_norm": 31.99883868853251, |
|
"input_norm/layer0": 31.99883868853251, |
|
"learning_rate": 0.005, |
|
"loss": 2.0416, |
|
"max_norm": 223.89230346679688, |
|
"max_norm/layer0": 223.89230346679688, |
|
"mean_norm": 68.22885513305664, |
|
"mean_norm/layer0": 68.22885513305664, |
|
"multicode_k": 1, |
|
"output_norm": 22.78441795984904, |
|
"output_norm/layer0": 22.78441795984904, |
|
"step": 9250 |
|
}, |
|
{ |
|
"MSE": 504.4634376017252, |
|
"MSE/layer0": 504.4634376017252, |
|
"dead_code_fraction": 0.17465, |
|
"dead_code_fraction/layer0": 0.17465, |
|
"epoch": 1.45, |
|
"input_norm": 31.998847064971933, |
|
"input_norm/layer0": 31.998847064971933, |
|
"learning_rate": 0.005, |
|
"loss": 2.0822, |
|
"max_norm": 223.5952911376953, |
|
"max_norm/layer0": 223.5952911376953, |
|
"mean_norm": 68.2917366027832, |
|
"mean_norm/layer0": 68.2917366027832, |
|
"multicode_k": 1, |
|
"output_norm": 22.78019981384277, |
|
"output_norm/layer0": 22.78019981384277, |
|
"step": 9300 |
|
}, |
|
{ |
|
"MSE": 504.5819336954755, |
|
"MSE/layer0": 504.5819336954755, |
|
"dead_code_fraction": 0.1737, |
|
"dead_code_fraction/layer0": 0.1737, |
|
"epoch": 1.46, |
|
"input_norm": 31.998844486872358, |
|
"input_norm/layer0": 31.998844486872358, |
|
"learning_rate": 0.005, |
|
"loss": 2.0181, |
|
"max_norm": 223.33349609375, |
|
"max_norm/layer0": 223.33349609375, |
|
"mean_norm": 68.3541030883789, |
|
"mean_norm/layer0": 68.3541030883789, |
|
"multicode_k": 1, |
|
"output_norm": 22.770421886444097, |
|
"output_norm/layer0": 22.770421886444097, |
|
"step": 9350 |
|
}, |
|
{ |
|
"MSE": 504.08388671875014, |
|
"MSE/layer0": 504.08388671875014, |
|
"dead_code_fraction": 0.17315, |
|
"dead_code_fraction/layer0": 0.17315, |
|
"epoch": 1.46, |
|
"input_norm": 31.998852834701534, |
|
"input_norm/layer0": 31.998852834701534, |
|
"learning_rate": 0.005, |
|
"loss": 2.0332, |
|
"max_norm": 223.0471954345703, |
|
"max_norm/layer0": 223.0471954345703, |
|
"mean_norm": 68.41642379760742, |
|
"mean_norm/layer0": 68.41642379760742, |
|
"multicode_k": 1, |
|
"output_norm": 22.783455673853553, |
|
"output_norm/layer0": 22.783455673853553, |
|
"step": 9400 |
|
}, |
|
{ |
|
"MSE": 504.4143726603196, |
|
"MSE/layer0": 504.4143726603196, |
|
"dead_code_fraction": 0.17145, |
|
"dead_code_fraction/layer0": 0.17145, |
|
"epoch": 1.47, |
|
"input_norm": 31.998856865564967, |
|
"input_norm/layer0": 31.998856865564967, |
|
"learning_rate": 0.005, |
|
"loss": 2.0241, |
|
"max_norm": 222.83218383789062, |
|
"max_norm/layer0": 222.83218383789062, |
|
"mean_norm": 68.48007202148438, |
|
"mean_norm/layer0": 68.48007202148438, |
|
"multicode_k": 1, |
|
"output_norm": 22.767489954630527, |
|
"output_norm/layer0": 22.767489954630527, |
|
"step": 9450 |
|
}, |
|
{ |
|
"MSE": 503.2655168151856, |
|
"MSE/layer0": 503.2655168151856, |
|
"dead_code_fraction": 0.17245, |
|
"dead_code_fraction/layer0": 0.17245, |
|
"epoch": 1.47, |
|
"input_norm": 31.998857196172086, |
|
"input_norm/layer0": 31.998857196172086, |
|
"learning_rate": 0.005, |
|
"loss": 2.0087, |
|
"max_norm": 222.5254669189453, |
|
"max_norm/layer0": 222.5254669189453, |
|
"mean_norm": 68.54964065551758, |
|
"mean_norm/layer0": 68.54964065551758, |
|
"multicode_k": 1, |
|
"output_norm": 22.78383262634278, |
|
"output_norm/layer0": 22.78383262634278, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_MSE/layer0": 504.0087830256569, |
|
"eval_accuracy": 0.5192516739689711, |
|
"eval_dead_code_fraction/layer0": 0.1704, |
|
"eval_input_norm/layer0": 31.99886018302103, |
|
"eval_loss": 2.0364596843719482, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.799023320451585, |
|
"eval_runtime": 158.3046, |
|
"eval_samples_per_second": 29.203, |
|
"eval_steps_per_second": 1.826, |
|
"step": 9500 |
|
}, |
|
{ |
|
"MSE": 502.63093187018274, |
|
"MSE/layer0": 502.63093187018274, |
|
"dead_code_fraction": 0.1713, |
|
"dead_code_fraction/layer0": 0.1713, |
|
"epoch": 1.48, |
|
"input_norm": 31.998863240132852, |
|
"input_norm/layer0": 31.998863240132852, |
|
"learning_rate": 0.005, |
|
"loss": 2.0083, |
|
"max_norm": 222.2374725341797, |
|
"max_norm/layer0": 222.2374725341797, |
|
"mean_norm": 68.61249160766602, |
|
"mean_norm/layer0": 68.61249160766602, |
|
"multicode_k": 1, |
|
"output_norm": 22.79880291995348, |
|
"output_norm/layer0": 22.79880291995348, |
|
"step": 9550 |
|
}, |
|
{ |
|
"MSE": 505.36792836568793, |
|
"MSE/layer0": 505.36792836568793, |
|
"dead_code_fraction": 0.17135, |
|
"dead_code_fraction/layer0": 0.17135, |
|
"epoch": 2.0, |
|
"input_norm": 31.99885930271917, |
|
"input_norm/layer0": 31.99885930271917, |
|
"learning_rate": 0.005, |
|
"loss": 2.0034, |
|
"max_norm": 222.0310516357422, |
|
"max_norm/layer0": 222.0310516357422, |
|
"mean_norm": 68.67721176147461, |
|
"mean_norm/layer0": 68.67721176147461, |
|
"multicode_k": 1, |
|
"output_norm": 22.74409036474983, |
|
"output_norm/layer0": 22.74409036474983, |
|
"step": 9600 |
|
}, |
|
{ |
|
"MSE": 502.98986485799134, |
|
"MSE/layer0": 502.98986485799134, |
|
"dead_code_fraction": 0.1697, |
|
"dead_code_fraction/layer0": 0.1697, |
|
"epoch": 2.01, |
|
"input_norm": 31.998873513539642, |
|
"input_norm/layer0": 31.998873513539642, |
|
"learning_rate": 0.005, |
|
"loss": 2.072, |
|
"max_norm": 222.00772094726562, |
|
"max_norm/layer0": 222.00772094726562, |
|
"mean_norm": 68.73538589477539, |
|
"mean_norm/layer0": 68.73538589477539, |
|
"multicode_k": 1, |
|
"output_norm": 22.796976168950394, |
|
"output_norm/layer0": 22.796976168950394, |
|
"step": 9650 |
|
}, |
|
{ |
|
"MSE": 503.04733729044574, |
|
"MSE/layer0": 503.04733729044574, |
|
"dead_code_fraction": 0.16915, |
|
"dead_code_fraction/layer0": 0.16915, |
|
"epoch": 2.01, |
|
"input_norm": 31.998862508138025, |
|
"input_norm/layer0": 31.998862508138025, |
|
"learning_rate": 0.005, |
|
"loss": 1.9691, |
|
"max_norm": 221.80978393554688, |
|
"max_norm/layer0": 221.80978393554688, |
|
"mean_norm": 68.80109405517578, |
|
"mean_norm/layer0": 68.80109405517578, |
|
"multicode_k": 1, |
|
"output_norm": 22.794911410013835, |
|
"output_norm/layer0": 22.794911410013835, |
|
"step": 9700 |
|
}, |
|
{ |
|
"MSE": 503.3161979675292, |
|
"MSE/layer0": 503.3161979675292, |
|
"dead_code_fraction": 0.16895, |
|
"dead_code_fraction/layer0": 0.16895, |
|
"epoch": 2.02, |
|
"input_norm": 31.998878345489487, |
|
"input_norm/layer0": 31.998878345489487, |
|
"learning_rate": 0.005, |
|
"loss": 2.0368, |
|
"max_norm": 221.684814453125, |
|
"max_norm/layer0": 221.684814453125, |
|
"mean_norm": 68.86429214477539, |
|
"mean_norm/layer0": 68.86429214477539, |
|
"multicode_k": 1, |
|
"output_norm": 22.785858039855956, |
|
"output_norm/layer0": 22.785858039855956, |
|
"step": 9750 |
|
}, |
|
{ |
|
"MSE": 502.7885366821291, |
|
"MSE/layer0": 502.7885366821291, |
|
"dead_code_fraction": 0.16775, |
|
"dead_code_fraction/layer0": 0.16775, |
|
"epoch": 2.02, |
|
"input_norm": 31.998876323699957, |
|
"input_norm/layer0": 31.998876323699957, |
|
"learning_rate": 0.005, |
|
"loss": 2.0029, |
|
"max_norm": 221.55738830566406, |
|
"max_norm/layer0": 221.55738830566406, |
|
"mean_norm": 68.92353439331055, |
|
"mean_norm/layer0": 68.92353439331055, |
|
"multicode_k": 1, |
|
"output_norm": 22.80311137835186, |
|
"output_norm/layer0": 22.80311137835186, |
|
"step": 9800 |
|
}, |
|
{ |
|
"MSE": 503.08141484578465, |
|
"MSE/layer0": 503.08141484578465, |
|
"dead_code_fraction": 0.16675, |
|
"dead_code_fraction/layer0": 0.16675, |
|
"epoch": 2.03, |
|
"input_norm": 31.998882681528727, |
|
"input_norm/layer0": 31.998882681528727, |
|
"learning_rate": 0.005, |
|
"loss": 2.0058, |
|
"max_norm": 221.4176025390625, |
|
"max_norm/layer0": 221.4176025390625, |
|
"mean_norm": 68.97920608520508, |
|
"mean_norm/layer0": 68.97920608520508, |
|
"multicode_k": 1, |
|
"output_norm": 22.79436633110047, |
|
"output_norm/layer0": 22.79436633110047, |
|
"step": 9850 |
|
}, |
|
{ |
|
"MSE": 503.44391169230175, |
|
"MSE/layer0": 503.44391169230175, |
|
"dead_code_fraction": 0.16635, |
|
"dead_code_fraction/layer0": 0.16635, |
|
"epoch": 2.03, |
|
"input_norm": 31.998889300028488, |
|
"input_norm/layer0": 31.998889300028488, |
|
"learning_rate": 0.005, |
|
"loss": 2.0128, |
|
"max_norm": 220.8733673095703, |
|
"max_norm/layer0": 220.8733673095703, |
|
"mean_norm": 69.03522872924805, |
|
"mean_norm/layer0": 69.03522872924805, |
|
"multicode_k": 1, |
|
"output_norm": 22.793825833002728, |
|
"output_norm/layer0": 22.793825833002728, |
|
"step": 9900 |
|
}, |
|
{ |
|
"MSE": 503.14160481770807, |
|
"MSE/layer0": 503.14160481770807, |
|
"dead_code_fraction": 0.1655, |
|
"dead_code_fraction/layer0": 0.1655, |
|
"epoch": 2.04, |
|
"input_norm": 31.99888905207317, |
|
"input_norm/layer0": 31.99888905207317, |
|
"learning_rate": 0.005, |
|
"loss": 2.0053, |
|
"max_norm": 220.66598510742188, |
|
"max_norm/layer0": 220.66598510742188, |
|
"mean_norm": 69.08990859985352, |
|
"mean_norm/layer0": 69.08990859985352, |
|
"multicode_k": 1, |
|
"output_norm": 22.802439581553138, |
|
"output_norm/layer0": 22.802439581553138, |
|
"step": 9950 |
|
}, |
|
{ |
|
"MSE": 502.7584656778976, |
|
"MSE/layer0": 502.7584656778976, |
|
"dead_code_fraction": 0.16445, |
|
"dead_code_fraction/layer0": 0.16445, |
|
"epoch": 2.04, |
|
"input_norm": 31.998899453481037, |
|
"input_norm/layer0": 31.998899453481037, |
|
"learning_rate": 0.005, |
|
"loss": 2.0569, |
|
"max_norm": 220.5869903564453, |
|
"max_norm/layer0": 220.5869903564453, |
|
"mean_norm": 69.14492416381836, |
|
"mean_norm/layer0": 69.14492416381836, |
|
"multicode_k": 1, |
|
"output_norm": 22.808293444315584, |
|
"output_norm/layer0": 22.808293444315584, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_MSE/layer0": 501.8128262733759, |
|
"eval_accuracy": 0.5193506309245984, |
|
"eval_dead_code_fraction/layer0": 0.16395, |
|
"eval_input_norm/layer0": 31.998895487949337, |
|
"eval_loss": 2.0353407859802246, |
|
"eval_multicode_k": 1, |
|
"eval_output_norm/layer0": 22.80092038433711, |
|
"eval_runtime": 158.6027, |
|
"eval_samples_per_second": 29.148, |
|
"eval_steps_per_second": 1.822, |
|
"step": 10000 |
|
}, |
|
{ |
|
"MSE": 0.0, |
|
"MSE/layer0": 0.0, |
|
"dead_code_fraction": 1.0, |
|
"dead_code_fraction/layer0": 1.0, |
|
"epoch": 2.04, |
|
"input_norm": 0.0, |
|
"input_norm/layer0": 0.0, |
|
"max_norm": 220.5869903564453, |
|
"max_norm/layer0": 220.5869903564453, |
|
"mean_norm": 69.14492416381836, |
|
"mean_norm/layer0": 69.14492416381836, |
|
"multicode_k": 1, |
|
"output_norm": 0.0, |
|
"output_norm/layer0": 0.0, |
|
"step": 10000, |
|
"total_flos": 7.43098011353088e+16, |
|
"train_loss": 2.205516522693634, |
|
"train_runtime": 15654.0479, |
|
"train_samples_per_second": 61.326, |
|
"train_steps_per_second": 0.639 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 10000, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 7.43098011353088e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|