full_deepseek-1.3-tc / trainer_log.jsonl
Yhhhhhhhhh's picture
Upload folder using huggingface_hub
4f17f74 verified
{"current_steps": 10, "total_steps": 1016, "loss": 0.8414, "learning_rate": 1.6129032258064516e-06, "epoch": 0.03929273084479371, "percentage": 0.98, "elapsed_time": "0:00:37", "remaining_time": "1:02:15"}
{"current_steps": 20, "total_steps": 1016, "loss": 0.8388, "learning_rate": 3.225806451612903e-06, "epoch": 0.07858546168958742, "percentage": 1.97, "elapsed_time": "0:01:10", "remaining_time": "0:58:34"}
{"current_steps": 30, "total_steps": 1016, "loss": 0.6738, "learning_rate": 4.838709677419355e-06, "epoch": 0.11787819253438114, "percentage": 2.95, "elapsed_time": "0:01:43", "remaining_time": "0:56:43"}
{"current_steps": 40, "total_steps": 1016, "loss": 0.5541, "learning_rate": 4.998970106077018e-06, "epoch": 0.15717092337917485, "percentage": 3.94, "elapsed_time": "0:02:15", "remaining_time": "0:55:04"}
{"current_steps": 50, "total_steps": 1016, "loss": 0.5261, "learning_rate": 4.99541106832608e-06, "epoch": 0.19646365422396855, "percentage": 4.92, "elapsed_time": "0:02:47", "remaining_time": "0:53:55"}
{"current_steps": 60, "total_steps": 1016, "loss": 0.6025, "learning_rate": 4.989313791265896e-06, "epoch": 0.2357563850687623, "percentage": 5.91, "elapsed_time": "0:03:17", "remaining_time": "0:52:32"}
{"current_steps": 70, "total_steps": 1016, "loss": 0.571, "learning_rate": 4.9806844768198724e-06, "epoch": 0.275049115913556, "percentage": 6.89, "elapsed_time": "0:03:49", "remaining_time": "0:51:40"}
{"current_steps": 80, "total_steps": 1016, "loss": 0.5795, "learning_rate": 4.969531902405652e-06, "epoch": 0.3143418467583497, "percentage": 7.87, "elapsed_time": "0:04:20", "remaining_time": "0:50:43"}
{"current_steps": 90, "total_steps": 1016, "loss": 0.5766, "learning_rate": 4.955867412007052e-06, "epoch": 0.35363457760314343, "percentage": 8.86, "elapsed_time": "0:04:49", "remaining_time": "0:49:39"}
{"current_steps": 100, "total_steps": 1016, "loss": 0.5421, "learning_rate": 4.939704904635388e-06, "epoch": 0.3929273084479371, "percentage": 9.84, "elapsed_time": "0:05:17", "remaining_time": "0:48:28"}
{"current_steps": 110, "total_steps": 1016, "loss": 0.5133, "learning_rate": 4.921060820191909e-06, "epoch": 0.43222003929273084, "percentage": 10.83, "elapsed_time": "0:05:44", "remaining_time": "0:47:19"}
{"current_steps": 120, "total_steps": 1016, "loss": 0.5643, "learning_rate": 4.8999541227457514e-06, "epoch": 0.4715127701375246, "percentage": 11.81, "elapsed_time": "0:06:12", "remaining_time": "0:46:20"}
{"current_steps": 130, "total_steps": 1016, "loss": 0.5117, "learning_rate": 4.8764062812443875e-06, "epoch": 0.5108055009823183, "percentage": 12.8, "elapsed_time": "0:06:43", "remaining_time": "0:45:48"}
{"current_steps": 140, "total_steps": 1016, "loss": 0.5394, "learning_rate": 4.8504412476762105e-06, "epoch": 0.550098231827112, "percentage": 13.78, "elapsed_time": "0:07:14", "remaining_time": "0:45:16"}
{"current_steps": 150, "total_steps": 1016, "loss": 0.5244, "learning_rate": 4.822085432707465e-06, "epoch": 0.5893909626719057, "percentage": 14.76, "elapsed_time": "0:07:42", "remaining_time": "0:44:28"}
{"current_steps": 160, "total_steps": 1016, "loss": 0.4974, "learning_rate": 4.791367678818299e-06, "epoch": 0.6286836935166994, "percentage": 15.75, "elapsed_time": "0:08:09", "remaining_time": "0:43:39"}
{"current_steps": 170, "total_steps": 1016, "loss": 0.51, "learning_rate": 4.758319230965267e-06, "epoch": 0.6679764243614931, "percentage": 16.73, "elapsed_time": "0:08:36", "remaining_time": "0:42:52"}
{"current_steps": 180, "total_steps": 1016, "loss": 0.4972, "learning_rate": 4.72297370480012e-06, "epoch": 0.7072691552062869, "percentage": 17.72, "elapsed_time": "0:09:04", "remaining_time": "0:42:09"}
{"current_steps": 190, "total_steps": 1016, "loss": 0.4932, "learning_rate": 4.685367052477218e-06, "epoch": 0.7465618860510805, "percentage": 18.7, "elapsed_time": "0:09:31", "remaining_time": "0:41:22"}
{"current_steps": 200, "total_steps": 1016, "loss": 0.472, "learning_rate": 4.645537526084331e-06, "epoch": 0.7858546168958742, "percentage": 19.69, "elapsed_time": "0:09:58", "remaining_time": "0:40:42"}
{"current_steps": 210, "total_steps": 1016, "loss": 0.5046, "learning_rate": 4.603525638734049e-06, "epoch": 0.825147347740668, "percentage": 20.67, "elapsed_time": "0:10:25", "remaining_time": "0:40:02"}
{"current_steps": 220, "total_steps": 1016, "loss": 0.5002, "learning_rate": 4.559374123355337e-06, "epoch": 0.8644400785854617, "percentage": 21.65, "elapsed_time": "0:10:53", "remaining_time": "0:39:26"}
{"current_steps": 230, "total_steps": 1016, "loss": 0.4411, "learning_rate": 4.5131278892272e-06, "epoch": 0.9037328094302554, "percentage": 22.64, "elapsed_time": "0:11:23", "remaining_time": "0:38:54"}
{"current_steps": 240, "total_steps": 1016, "loss": 0.4711, "learning_rate": 4.46483397629863e-06, "epoch": 0.9430255402750491, "percentage": 23.62, "elapsed_time": "0:11:48", "remaining_time": "0:38:11"}
{"current_steps": 250, "total_steps": 1016, "loss": 0.4635, "learning_rate": 4.414541507341323e-06, "epoch": 0.9823182711198428, "percentage": 24.61, "elapsed_time": "0:12:16", "remaining_time": "0:37:35"}
{"current_steps": 260, "total_steps": 1016, "loss": 0.4179, "learning_rate": 4.362301637983815e-06, "epoch": 1.0216110019646365, "percentage": 25.59, "elapsed_time": "0:12:42", "remaining_time": "0:36:57"}
{"current_steps": 270, "total_steps": 1016, "loss": 0.4041, "learning_rate": 4.308167504677893e-06, "epoch": 1.0609037328094302, "percentage": 26.57, "elapsed_time": "0:13:08", "remaining_time": "0:36:19"}
{"current_steps": 280, "total_steps": 1016, "loss": 0.3847, "learning_rate": 4.2521941706501625e-06, "epoch": 1.1001964636542239, "percentage": 27.56, "elapsed_time": "0:13:37", "remaining_time": "0:35:49"}
{"current_steps": 290, "total_steps": 1016, "loss": 0.3876, "learning_rate": 4.194438569893784e-06, "epoch": 1.1394891944990178, "percentage": 28.54, "elapsed_time": "0:14:05", "remaining_time": "0:35:17"}
{"current_steps": 300, "total_steps": 1016, "loss": 0.3743, "learning_rate": 4.134959449257335e-06, "epoch": 1.1787819253438114, "percentage": 29.53, "elapsed_time": "0:14:32", "remaining_time": "0:34:42"}
{"current_steps": 310, "total_steps": 1016, "loss": 0.3689, "learning_rate": 4.0738173086896995e-06, "epoch": 1.218074656188605, "percentage": 30.51, "elapsed_time": "0:14:59", "remaining_time": "0:34:09"}
{"current_steps": 320, "total_steps": 1016, "loss": 0.3813, "learning_rate": 4.011074339701772e-06, "epoch": 1.2573673870333988, "percentage": 31.5, "elapsed_time": "0:15:25", "remaining_time": "0:33:32"}
{"current_steps": 330, "total_steps": 1016, "loss": 0.3786, "learning_rate": 3.946794362107564e-06, "epoch": 1.2966601178781925, "percentage": 32.48, "elapsed_time": "0:15:51", "remaining_time": "0:32:57"}
{"current_steps": 340, "total_steps": 1016, "loss": 0.4117, "learning_rate": 3.8810427591090635e-06, "epoch": 1.3359528487229864, "percentage": 33.46, "elapsed_time": "0:16:17", "remaining_time": "0:32:23"}
{"current_steps": 350, "total_steps": 1016, "loss": 0.3861, "learning_rate": 3.813886410790879e-06, "epoch": 1.37524557956778, "percentage": 34.45, "elapsed_time": "0:16:42", "remaining_time": "0:31:48"}
{"current_steps": 360, "total_steps": 1016, "loss": 0.3636, "learning_rate": 3.7453936260922983e-06, "epoch": 1.4145383104125737, "percentage": 35.43, "elapsed_time": "0:17:09", "remaining_time": "0:31:15"}
{"current_steps": 370, "total_steps": 1016, "loss": 0.4099, "learning_rate": 3.675634073325981e-06, "epoch": 1.4538310412573674, "percentage": 36.42, "elapsed_time": "0:17:36", "remaining_time": "0:30:43"}
{"current_steps": 380, "total_steps": 1016, "loss": 0.381, "learning_rate": 3.6046787093139415e-06, "epoch": 1.493123772102161, "percentage": 37.4, "elapsed_time": "0:18:03", "remaining_time": "0:30:13"}
{"current_steps": 390, "total_steps": 1016, "loss": 0.3769, "learning_rate": 3.5325997072129066e-06, "epoch": 1.5324165029469548, "percentage": 38.39, "elapsed_time": "0:18:30", "remaining_time": "0:29:43"}
{"current_steps": 400, "total_steps": 1016, "loss": 0.3923, "learning_rate": 3.4594703831024723e-06, "epoch": 1.5717092337917484, "percentage": 39.37, "elapsed_time": "0:18:57", "remaining_time": "0:29:11"}
{"current_steps": 410, "total_steps": 1016, "loss": 0.3653, "learning_rate": 3.385365121410706e-06, "epoch": 1.611001964636542, "percentage": 40.35, "elapsed_time": "0:19:23", "remaining_time": "0:28:39"}
{"current_steps": 420, "total_steps": 1016, "loss": 0.3544, "learning_rate": 3.310359299253082e-06, "epoch": 1.650294695481336, "percentage": 41.34, "elapsed_time": "0:19:48", "remaining_time": "0:28:06"}
{"current_steps": 430, "total_steps": 1016, "loss": 0.3878, "learning_rate": 3.234529209761676e-06, "epoch": 1.6895874263261297, "percentage": 42.32, "elapsed_time": "0:20:15", "remaining_time": "0:27:36"}
{"current_steps": 440, "total_steps": 1016, "loss": 0.3846, "learning_rate": 3.157951984482635e-06, "epoch": 1.7288801571709234, "percentage": 43.31, "elapsed_time": "0:20:41", "remaining_time": "0:27:05"}
{"current_steps": 450, "total_steps": 1016, "loss": 0.4512, "learning_rate": 3.080705514920836e-06, "epoch": 1.768172888015717, "percentage": 44.29, "elapsed_time": "0:21:08", "remaining_time": "0:26:35"}
{"current_steps": 460, "total_steps": 1016, "loss": 0.3761, "learning_rate": 3.0028683733115417e-06, "epoch": 1.807465618860511, "percentage": 45.28, "elapsed_time": "0:21:34", "remaining_time": "0:26:04"}
{"current_steps": 470, "total_steps": 1016, "loss": 0.3673, "learning_rate": 2.9245197326996515e-06, "epoch": 1.8467583497053046, "percentage": 46.26, "elapsed_time": "0:22:02", "remaining_time": "0:25:36"}
{"current_steps": 480, "total_steps": 1016, "loss": 0.3874, "learning_rate": 2.845739286407821e-06, "epoch": 1.8860510805500983, "percentage": 47.24, "elapsed_time": "0:22:29", "remaining_time": "0:25:06"}
{"current_steps": 490, "total_steps": 1016, "loss": 0.3562, "learning_rate": 2.7666071669753807e-06, "epoch": 1.925343811394892, "percentage": 48.23, "elapsed_time": "0:22:56", "remaining_time": "0:24:37"}
{"current_steps": 500, "total_steps": 1016, "loss": 0.3739, "learning_rate": 2.687203864650497e-06, "epoch": 1.9646365422396856, "percentage": 49.21, "elapsed_time": "0:23:23", "remaining_time": "0:24:08"}
{"current_steps": 510, "total_steps": 1016, "loss": 0.3554, "learning_rate": 2.6076101455184867e-06, "epoch": 2.0039292730844793, "percentage": 50.2, "elapsed_time": "0:24:41", "remaining_time": "0:24:29"}
{"current_steps": 520, "total_steps": 1016, "loss": 0.323, "learning_rate": 2.527906969349559e-06, "epoch": 2.043222003929273, "percentage": 51.18, "elapsed_time": "0:25:08", "remaining_time": "0:23:58"}
{"current_steps": 530, "total_steps": 1016, "loss": 0.2903, "learning_rate": 2.44817540724955e-06, "epoch": 2.0825147347740667, "percentage": 52.17, "elapsed_time": "0:25:34", "remaining_time": "0:23:27"}
{"current_steps": 540, "total_steps": 1016, "loss": 0.306, "learning_rate": 2.3684965591974084e-06, "epoch": 2.1218074656188604, "percentage": 53.15, "elapsed_time": "0:25:59", "remaining_time": "0:22:55"}
{"current_steps": 550, "total_steps": 1016, "loss": 0.3889, "learning_rate": 2.288951471553316e-06, "epoch": 2.161100196463654, "percentage": 54.13, "elapsed_time": "0:26:25", "remaining_time": "0:22:23"}
{"current_steps": 560, "total_steps": 1016, "loss": 0.2868, "learning_rate": 2.2096210546213397e-06, "epoch": 2.2003929273084477, "percentage": 55.12, "elapsed_time": "0:26:49", "remaining_time": "0:21:50"}
{"current_steps": 570, "total_steps": 1016, "loss": 0.2873, "learning_rate": 2.130586000350486e-06, "epoch": 2.239685658153242, "percentage": 56.1, "elapsed_time": "0:27:16", "remaining_time": "0:21:20"}
{"current_steps": 580, "total_steps": 1016, "loss": 0.2845, "learning_rate": 2.0519267002578517e-06, "epoch": 2.2789783889980355, "percentage": 57.09, "elapsed_time": "0:27:43", "remaining_time": "0:20:50"}
{"current_steps": 590, "total_steps": 1016, "loss": 0.3272, "learning_rate": 1.9737231636573595e-06, "epoch": 2.318271119842829, "percentage": 58.07, "elapsed_time": "0:28:07", "remaining_time": "0:20:18"}