pretrain_utg4java_codet5_prueba / utg4java_pretrain_log_history.txt
eljavatar's picture
upload test mini model utg4java pretained on codet5
b3599bf verified
Log History:
{"loss": 1.6003, "grad_norm": 4.428943157196045, "learning_rate": 0.00019936921218628373, "epoch": 0.05, "step": 200}
{"loss": 1.0391, "grad_norm": 3.2689905166625977, "learning_rate": 0.00019668500872366127, "epoch": 0.11, "step": 400}
{"loss": 0.9586, "grad_norm": 3.4730663299560547, "learning_rate": 0.00019400080526103878, "epoch": 0.16, "step": 600}
{"loss": 0.9416, "grad_norm": 3.3503410816192627, "learning_rate": 0.00019131660179841633, "epoch": 0.21, "step": 800}
{"loss": 0.9352, "grad_norm": 3.9026317596435547, "learning_rate": 0.00018863239833579387, "epoch": 0.27, "step": 1000}
{"loss": 0.9018, "grad_norm": 3.4323599338531494, "learning_rate": 0.00018594819487317138, "epoch": 0.32, "step": 1200}
{"loss": 0.8959, "grad_norm": 2.90022349357605, "learning_rate": 0.00018326399141054892, "epoch": 0.37, "step": 1400}
{"loss": 0.8777, "grad_norm": 3.3339474201202393, "learning_rate": 0.00018057978794792646, "epoch": 0.43, "step": 1600}
{"loss": 0.8719, "grad_norm": 5.839554309844971, "learning_rate": 0.00017789558448530398, "epoch": 0.48, "step": 1800}
{"loss": 0.8763, "grad_norm": 3.7872190475463867, "learning_rate": 0.00017521138102268152, "epoch": 0.53, "step": 2000}
{"loss": 0.8601, "grad_norm": 3.5567479133605957, "learning_rate": 0.00017252717756005906, "epoch": 0.58, "step": 2200}
{"loss": 0.8522, "grad_norm": 2.728114604949951, "learning_rate": 0.0001698429740974366, "epoch": 0.64, "step": 2400}
{"loss": 0.852, "grad_norm": 2.7635385990142822, "learning_rate": 0.00016717219165212724, "epoch": 0.69, "step": 2600}
{"loss": 0.8622, "grad_norm": 2.332137107849121, "learning_rate": 0.00016448798818950478, "epoch": 0.74, "step": 2800}
{"loss": 0.8814, "grad_norm": 2.689222812652588, "learning_rate": 0.00016180378472688232, "epoch": 0.8, "step": 3000}
{"loss": 0.8223, "grad_norm": 3.2268168926239014, "learning_rate": 0.00015911958126425983, "epoch": 0.85, "step": 3200}
{"loss": 0.8538, "grad_norm": 2.7432162761688232, "learning_rate": 0.00015643537780163738, "epoch": 0.9, "step": 3400}
{"loss": 0.8118, "grad_norm": 3.417742967605591, "learning_rate": 0.00015375117433901492, "epoch": 0.96, "step": 3600}
{"eval_loss": 0.7114242315292358, "eval_runtime": 144.1269, "eval_samples_per_second": 13.051, "eval_steps_per_second": 1.637, "epoch": 1.0, "step": 3763}
{"loss": 0.7983, "grad_norm": 2.479877233505249, "learning_rate": 0.00015106697087639243, "epoch": 1.01, "step": 3800}
{"loss": 0.7892, "grad_norm": 2.73120379447937, "learning_rate": 0.00014838276741376997, "epoch": 1.06, "step": 4000}
{"loss": 0.7747, "grad_norm": 2.924459934234619, "learning_rate": 0.00014569856395114749, "epoch": 1.12, "step": 4200}
{"loss": 0.7758, "grad_norm": 3.1980695724487305, "learning_rate": 0.00014301436048852503, "epoch": 1.17, "step": 4400}
{"loss": 0.7814, "grad_norm": 2.564530849456787, "learning_rate": 0.00014033015702590257, "epoch": 1.22, "step": 4600}
{"loss": 0.7501, "grad_norm": 2.7397029399871826, "learning_rate": 0.0001376593745805932, "epoch": 1.28, "step": 4800}
{"loss": 0.7712, "grad_norm": 2.7164580821990967, "learning_rate": 0.00013497517111797075, "epoch": 1.33, "step": 5000}
{"loss": 0.7999, "grad_norm": 2.4152510166168213, "learning_rate": 0.0001322909676553483, "epoch": 1.38, "step": 5200}
{"loss": 0.7516, "grad_norm": 2.9376044273376465, "learning_rate": 0.00012960676419272583, "epoch": 1.44, "step": 5400}
{"loss": 0.7663, "grad_norm": 3.0849993228912354, "learning_rate": 0.00012692256073010337, "epoch": 1.49, "step": 5600}
{"loss": 0.7725, "grad_norm": 2.0552046298980713, "learning_rate": 0.00012423835726748088, "epoch": 1.54, "step": 5800}
{"loss": 0.748, "grad_norm": 4.300168991088867, "learning_rate": 0.00012155415380485841, "epoch": 1.59, "step": 6000}
{"loss": 0.7219, "grad_norm": 2.172149658203125, "learning_rate": 0.00011886995034223594, "epoch": 1.65, "step": 6200}
{"loss": 0.7565, "grad_norm": 4.83528470993042, "learning_rate": 0.00011618574687961348, "epoch": 1.7, "step": 6400}
{"loss": 0.7433, "grad_norm": 2.4364535808563232, "learning_rate": 0.00011350154341699102, "epoch": 1.75, "step": 6600}
{"loss": 0.729, "grad_norm": 3.630072593688965, "learning_rate": 0.00011081733995436854, "epoch": 1.81, "step": 6800}
{"loss": 0.7178, "grad_norm": 3.485239267349243, "learning_rate": 0.00010813313649174608, "epoch": 1.86, "step": 7000}
{"loss": 0.7507, "grad_norm": 2.265608787536621, "learning_rate": 0.00010544893302912362, "epoch": 1.91, "step": 7200}
{"loss": 0.763, "grad_norm": 2.48197340965271, "learning_rate": 0.00010276472956650113, "epoch": 1.97, "step": 7400}
{"eval_loss": 0.6458454728126526, "eval_runtime": 138.1168, "eval_samples_per_second": 13.619, "eval_steps_per_second": 1.709, "epoch": 2.0, "step": 7526}
{"loss": 0.7165, "grad_norm": 3.2982826232910156, "learning_rate": 0.00010008052610387867, "epoch": 2.02, "step": 7600}
{"loss": 0.7093, "grad_norm": 2.9913341999053955, "learning_rate": 9.739632264125621e-05, "epoch": 2.07, "step": 7800}
{"loss": 0.7007, "grad_norm": 3.782381057739258, "learning_rate": 9.471211917863374e-05, "epoch": 2.13, "step": 8000}
{"loss": 0.6961, "grad_norm": 1.392814040184021, "learning_rate": 9.202791571601128e-05, "epoch": 2.18, "step": 8200}
{"loss": 0.7014, "grad_norm": 1.976282000541687, "learning_rate": 8.934371225338881e-05, "epoch": 2.23, "step": 8400}
{"loss": 0.6845, "grad_norm": 1.8080275058746338, "learning_rate": 8.665950879076634e-05, "epoch": 2.29, "step": 8600}
{"loss": 0.6766, "grad_norm": 2.2179641723632812, "learning_rate": 8.397530532814388e-05, "epoch": 2.34, "step": 8800}
{"loss": 0.6716, "grad_norm": 4.8770551681518555, "learning_rate": 8.129110186552141e-05, "epoch": 2.39, "step": 9000}
{"loss": 0.663, "grad_norm": 2.4720568656921387, "learning_rate": 7.860689840289895e-05, "epoch": 2.44, "step": 9200}
{"loss": 0.6629, "grad_norm": 2.546283006668091, "learning_rate": 7.592269494027648e-05, "epoch": 2.5, "step": 9400}
{"loss": 0.6677, "grad_norm": 2.2792224884033203, "learning_rate": 7.323849147765402e-05, "epoch": 2.55, "step": 9600}
{"loss": 0.6697, "grad_norm": 2.87917160987854, "learning_rate": 7.055428801503155e-05, "epoch": 2.6, "step": 9800}
{"loss": 0.661, "grad_norm": 4.642680644989014, "learning_rate": 6.787008455240907e-05, "epoch": 2.66, "step": 10000}
{"loss": 0.6447, "grad_norm": 2.9181325435638428, "learning_rate": 6.51858810897866e-05, "epoch": 2.71, "step": 10200}
{"loss": 0.6668, "grad_norm": 1.9983739852905273, "learning_rate": 6.250167762716414e-05, "epoch": 2.76, "step": 10400}
{"loss": 0.6494, "grad_norm": 3.3339133262634277, "learning_rate": 5.9817474164541676e-05, "epoch": 2.82, "step": 10600}
{"loss": 0.6471, "grad_norm": 2.7515461444854736, "learning_rate": 5.7133270701919204e-05, "epoch": 2.87, "step": 10800}
{"loss": 0.6513, "grad_norm": 2.502366781234741, "learning_rate": 5.4462488256609854e-05, "epoch": 2.92, "step": 11000}
{"loss": 0.6701, "grad_norm": 2.680753707885742, "learning_rate": 5.177828479398739e-05, "epoch": 2.98, "step": 11200}
{"eval_loss": 0.5908769369125366, "eval_runtime": 267.1069, "eval_samples_per_second": 7.042, "eval_steps_per_second": 0.884, "epoch": 3.0, "step": 11289}
{"loss": 0.6571, "grad_norm": 1.714341640472412, "learning_rate": 4.909408133136492e-05, "epoch": 3.03, "step": 11400}
{"loss": 0.6627, "grad_norm": 2.996971607208252, "learning_rate": 4.640987786874245e-05, "epoch": 3.08, "step": 11600}
{"loss": 0.6486, "grad_norm": 2.6136629581451416, "learning_rate": 4.3725674406119985e-05, "epoch": 3.14, "step": 11800}
{"loss": 0.5942, "grad_norm": 1.8732506036758423, "learning_rate": 4.104147094349752e-05, "epoch": 3.19, "step": 12000}
{"loss": 0.6228, "grad_norm": 3.316340923309326, "learning_rate": 3.8357267480875054e-05, "epoch": 3.24, "step": 12200}
{"loss": 0.6313, "grad_norm": 2.4904277324676514, "learning_rate": 3.567306401825258e-05, "epoch": 3.3, "step": 12400}
{"loss": 0.6132, "grad_norm": 2.3936824798583984, "learning_rate": 3.2988860555630116e-05, "epoch": 3.35, "step": 12600}
{"loss": 0.6108, "grad_norm": 2.5523078441619873, "learning_rate": 3.030465709300765e-05, "epoch": 3.4, "step": 12800}
{"loss": 0.6133, "grad_norm": 2.3019254207611084, "learning_rate": 2.7620453630385185e-05, "epoch": 3.45, "step": 13000}
{"loss": 0.6241, "grad_norm": 2.492788791656494, "learning_rate": 2.493625016776272e-05, "epoch": 3.51, "step": 13200}
{"loss": 0.6023, "grad_norm": 2.1467068195343018, "learning_rate": 2.2265467722453363e-05, "epoch": 3.56, "step": 13400}
{"loss": 0.6059, "grad_norm": 1.78839910030365, "learning_rate": 1.9581264259830894e-05, "epoch": 3.61, "step": 13600}
{"loss": 0.5922, "grad_norm": 2.343613624572754, "learning_rate": 1.689706079720843e-05, "epoch": 3.67, "step": 13800}
{"loss": 0.6092, "grad_norm": 2.2175772190093994, "learning_rate": 1.4212857334585964e-05, "epoch": 3.72, "step": 14000}
{"loss": 0.6055, "grad_norm": 1.8630731105804443, "learning_rate": 1.1528653871963495e-05, "epoch": 3.77, "step": 14200}
{"loss": 0.6053, "grad_norm": 1.4924801588058472, "learning_rate": 8.84445040934103e-06, "epoch": 3.83, "step": 14400}
{"loss": 0.5846, "grad_norm": 3.014918088912964, "learning_rate": 6.1602469467185615e-06, "epoch": 3.88, "step": 14600}
{"loss": 0.5826, "grad_norm": 1.5459446907043457, "learning_rate": 3.4760434840960946e-06, "epoch": 3.93, "step": 14800}
{"loss": 0.608, "grad_norm": 3.936332941055298, "learning_rate": 7.918400214736277e-07, "epoch": 3.99, "step": 15000}
{"eval_loss": 0.5480290651321411, "eval_runtime": 221.6429, "eval_samples_per_second": 8.487, "eval_steps_per_second": 1.065, "epoch": 4.0, "step": 15052}
{"train_runtime": 7813.3494, "train_samples_per_second": 7.706, "train_steps_per_second": 1.926, "total_flos": 8148659183026176.0, "train_loss": 0.7414002821479045, "epoch": 4.0, "step": 15052}