sedrickkeh commited on
Commit
8270454
1 Parent(s): 3f6d21d

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a2612a6ba69f922364da03f60f0f4bc65f7b6ee7ffae80b4189e512b305c468
3
  size 4903351912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a543f3ef469e820b7f5c5e973ce740968f29e9da203c1902e03c0696d5c73f
3
  size 4903351912
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63aa89889f07bc6a196a3999e1e429c33edf12bea7227f3a9d65517ac707cfaa
3
  size 4947570872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7919a5eb31df82f7aa8d12b18d2f09cc6f260c4b302d13a07c91965573d67743
3
  size 4947570872
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc90d0b05c76d1e9b85b346406b17a428b30d39a37fd1a6c3836f30f65b85ec
3
  size 4962221464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a1edc48bd1c06e3e660aeb048d7b1a8b13f6a957d2edd711f5153c719ca1aa
3
  size 4962221464
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5fb396188827449e922def7eb5887ffc53e72f219f04ee64f8e339c5350b9d0
3
  size 3670322200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d86237a6c3db73d5abd99aa7404831f17b78a18b110f923cc146947320269328
3
  size 3670322200
trainer_log.jsonl CHANGED
@@ -43,3 +43,48 @@
43
  {"current_steps": 430, "total_steps": 1329, "loss": 0.5893, "learning_rate": 4.142191492540592e-06, "epoch": 0.9703808180535967, "percentage": 32.36, "elapsed_time": "10:32:36", "remaining_time": "22:02:35"}
44
  {"current_steps": 440, "total_steps": 1329, "loss": 0.5939, "learning_rate": 4.097763229899161e-06, "epoch": 0.9929478138222849, "percentage": 33.11, "elapsed_time": "10:47:18", "remaining_time": "21:47:51"}
45
  {"current_steps": 443, "total_steps": 1329, "eval_loss": 0.5920091271400452, "epoch": 0.9997179125528914, "percentage": 33.33, "elapsed_time": "11:03:15", "remaining_time": "22:06:30"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  {"current_steps": 430, "total_steps": 1329, "loss": 0.5893, "learning_rate": 4.142191492540592e-06, "epoch": 0.9703808180535967, "percentage": 32.36, "elapsed_time": "10:32:36", "remaining_time": "22:02:35"}
44
  {"current_steps": 440, "total_steps": 1329, "loss": 0.5939, "learning_rate": 4.097763229899161e-06, "epoch": 0.9929478138222849, "percentage": 33.11, "elapsed_time": "10:47:18", "remaining_time": "21:47:51"}
45
  {"current_steps": 443, "total_steps": 1329, "eval_loss": 0.5920091271400452, "epoch": 0.9997179125528914, "percentage": 33.33, "elapsed_time": "11:03:15", "remaining_time": "22:06:30"}
46
+ {"current_steps": 450, "total_steps": 1329, "loss": 0.6055, "learning_rate": 4.052499802400512e-06, "epoch": 1.0155148095909732, "percentage": 33.86, "elapsed_time": "11:14:43", "remaining_time": "21:57:57"}
47
+ {"current_steps": 460, "total_steps": 1329, "loss": 0.5342, "learning_rate": 4.006429258312416e-06, "epoch": 1.0380818053596614, "percentage": 34.61, "elapsed_time": "11:29:26", "remaining_time": "21:42:26"}
48
+ {"current_steps": 470, "total_steps": 1329, "loss": 0.549, "learning_rate": 3.959580146046436e-06, "epoch": 1.0606488011283497, "percentage": 35.36, "elapsed_time": "11:44:09", "remaining_time": "21:26:58"}
49
+ {"current_steps": 480, "total_steps": 1329, "loss": 0.5365, "learning_rate": 3.9119814964674036e-06, "epoch": 1.0832157968970382, "percentage": 36.12, "elapsed_time": "11:58:52", "remaining_time": "21:11:30"}
50
+ {"current_steps": 490, "total_steps": 1329, "loss": 0.5469, "learning_rate": 3.86366280490395e-06, "epoch": 1.1057827926657264, "percentage": 36.87, "elapsed_time": "12:13:34", "remaining_time": "20:56:04"}
51
+ {"current_steps": 500, "total_steps": 1329, "loss": 0.5451, "learning_rate": 3.814654012871206e-06, "epoch": 1.1283497884344147, "percentage": 37.62, "elapsed_time": "12:28:17", "remaining_time": "20:40:40"}
52
+ {"current_steps": 510, "total_steps": 1329, "loss": 0.5459, "learning_rate": 3.7649854895170252e-06, "epoch": 1.150916784203103, "percentage": 38.37, "elapsed_time": "12:43:00", "remaining_time": "20:25:18"}
53
+ {"current_steps": 520, "total_steps": 1329, "loss": 0.5383, "learning_rate": 3.7146880128032125e-06, "epoch": 1.1734837799717912, "percentage": 39.13, "elapsed_time": "12:57:43", "remaining_time": "20:09:58"}
54
+ {"current_steps": 530, "total_steps": 1329, "loss": 0.5439, "learning_rate": 3.663792750433435e-06, "epoch": 1.1960507757404795, "percentage": 39.88, "elapsed_time": "13:12:27", "remaining_time": "19:54:39"}
55
+ {"current_steps": 540, "total_steps": 1329, "loss": 0.5413, "learning_rate": 3.612331240539601e-06, "epoch": 1.2186177715091677, "percentage": 40.63, "elapsed_time": "13:27:10", "remaining_time": "19:39:22"}
56
+ {"current_steps": 550, "total_steps": 1329, "loss": 0.5462, "learning_rate": 3.5603353721387224e-06, "epoch": 1.2411847672778562, "percentage": 41.38, "elapsed_time": "13:41:53", "remaining_time": "19:24:05"}
57
+ {"current_steps": 560, "total_steps": 1329, "loss": 0.5475, "learning_rate": 3.5078373653723222e-06, "epoch": 1.2637517630465445, "percentage": 42.14, "elapsed_time": "13:56:36", "remaining_time": "19:08:51"}
58
+ {"current_steps": 570, "total_steps": 1329, "loss": 0.5444, "learning_rate": 3.454869751540672e-06, "epoch": 1.2863187588152327, "percentage": 42.89, "elapsed_time": "14:11:19", "remaining_time": "18:53:36"}
59
+ {"current_steps": 580, "total_steps": 1329, "loss": 0.5445, "learning_rate": 3.4014653529442025e-06, "epoch": 1.308885754583921, "percentage": 43.64, "elapsed_time": "14:26:03", "remaining_time": "18:38:24"}
60
+ {"current_steps": 590, "total_steps": 1329, "loss": 0.5442, "learning_rate": 3.3476572625446e-06, "epoch": 1.3314527503526092, "percentage": 44.39, "elapsed_time": "14:40:46", "remaining_time": "18:23:12"}
61
+ {"current_steps": 600, "total_steps": 1329, "loss": 0.5494, "learning_rate": 3.2934788234581773e-06, "epoch": 1.3540197461212977, "percentage": 45.15, "elapsed_time": "14:55:29", "remaining_time": "18:08:01"}
62
+ {"current_steps": 610, "total_steps": 1329, "loss": 0.5415, "learning_rate": 3.238963608294232e-06, "epoch": 1.376586741889986, "percentage": 45.9, "elapsed_time": "15:10:13", "remaining_time": "17:52:52"}
63
+ {"current_steps": 620, "total_steps": 1329, "loss": 0.5459, "learning_rate": 3.1841453983511984e-06, "epoch": 1.3991537376586742, "percentage": 46.65, "elapsed_time": "15:24:56", "remaining_time": "17:37:43"}
64
+ {"current_steps": 630, "total_steps": 1329, "loss": 0.5436, "learning_rate": 3.1290581626834814e-06, "epoch": 1.4217207334273625, "percentage": 47.4, "elapsed_time": "15:39:40", "remaining_time": "17:22:35"}
65
+ {"current_steps": 640, "total_steps": 1329, "loss": 0.543, "learning_rate": 3.0737360370519397e-06, "epoch": 1.4442877291960508, "percentage": 48.16, "elapsed_time": "15:54:23", "remaining_time": "17:07:28"}
66
+ {"current_steps": 650, "total_steps": 1329, "loss": 0.5439, "learning_rate": 3.0182133027710727e-06, "epoch": 1.466854724964739, "percentage": 48.91, "elapsed_time": "16:09:07", "remaining_time": "16:52:21"}
67
+ {"current_steps": 660, "total_steps": 1329, "loss": 0.5471, "learning_rate": 2.9625243654660064e-06, "epoch": 1.4894217207334273, "percentage": 49.66, "elapsed_time": "16:23:50", "remaining_time": "16:37:15"}
68
+ {"current_steps": 670, "total_steps": 1329, "loss": 0.5419, "learning_rate": 2.906703733752456e-06, "epoch": 1.5119887165021155, "percentage": 50.41, "elapsed_time": "16:38:33", "remaining_time": "16:22:09"}
69
+ {"current_steps": 680, "total_steps": 1329, "loss": 0.536, "learning_rate": 2.850785997852856e-06, "epoch": 1.5345557122708038, "percentage": 51.17, "elapsed_time": "16:53:15", "remaining_time": "16:07:04"}
70
+ {"current_steps": 690, "total_steps": 1329, "loss": 0.5377, "learning_rate": 2.7948058081619394e-06, "epoch": 1.5571227080394923, "percentage": 51.92, "elapsed_time": "17:07:58", "remaining_time": "15:51:59"}
71
+ {"current_steps": 700, "total_steps": 1329, "loss": 0.5418, "learning_rate": 2.7387978537750094e-06, "epoch": 1.5796897038081805, "percentage": 52.67, "elapsed_time": "17:22:41", "remaining_time": "15:36:55"}
72
+ {"current_steps": 710, "total_steps": 1329, "loss": 0.5444, "learning_rate": 2.6827968409922457e-06, "epoch": 1.6022566995768688, "percentage": 53.42, "elapsed_time": "17:37:24", "remaining_time": "15:21:52"}
73
+ {"current_steps": 720, "total_steps": 1329, "loss": 0.5522, "learning_rate": 2.6268374718123436e-06, "epoch": 1.6248236953455573, "percentage": 54.18, "elapsed_time": "17:52:06", "remaining_time": "15:06:49"}
74
+ {"current_steps": 730, "total_steps": 1329, "loss": 0.5354, "learning_rate": 2.5709544224288164e-06, "epoch": 1.6473906911142455, "percentage": 54.93, "elapsed_time": "18:06:49", "remaining_time": "14:51:47"}
75
+ {"current_steps": 740, "total_steps": 1329, "loss": 0.5507, "learning_rate": 2.5151823217422937e-06, "epoch": 1.6699576868829338, "percentage": 55.68, "elapsed_time": "18:21:32", "remaining_time": "14:36:46"}
76
+ {"current_steps": 750, "total_steps": 1329, "loss": 0.5422, "learning_rate": 2.4595557299021205e-06, "epoch": 1.692524682651622, "percentage": 56.43, "elapsed_time": "18:36:15", "remaining_time": "14:21:45"}
77
+ {"current_steps": 760, "total_steps": 1329, "loss": 0.5409, "learning_rate": 2.404109116890561e-06, "epoch": 1.7150916784203103, "percentage": 57.19, "elapsed_time": "18:50:58", "remaining_time": "14:06:44"}
78
+ {"current_steps": 770, "total_steps": 1329, "loss": 0.5414, "learning_rate": 2.3488768411628787e-06, "epoch": 1.7376586741889986, "percentage": 57.94, "elapsed_time": "19:05:42", "remaining_time": "13:51:44"}
79
+ {"current_steps": 780, "total_steps": 1329, "loss": 0.5372, "learning_rate": 2.29389312835652e-06, "epoch": 1.7602256699576868, "percentage": 58.69, "elapsed_time": "19:20:25", "remaining_time": "13:36:45"}
80
+ {"current_steps": 790, "total_steps": 1329, "loss": 0.5403, "learning_rate": 2.2391920500826093e-06, "epoch": 1.782792665726375, "percentage": 59.44, "elapsed_time": "19:35:08", "remaining_time": "13:21:46"}
81
+ {"current_steps": 800, "total_steps": 1329, "loss": 0.5352, "learning_rate": 2.184807502812879e-06, "epoch": 1.8053596614950633, "percentage": 60.2, "elapsed_time": "19:49:50", "remaining_time": "13:06:46"}
82
+ {"current_steps": 810, "total_steps": 1329, "loss": 0.5458, "learning_rate": 2.1307731868751387e-06, "epoch": 1.8279266572637518, "percentage": 60.95, "elapsed_time": "20:04:33", "remaining_time": "12:51:48"}
83
+ {"current_steps": 820, "total_steps": 1329, "loss": 0.5329, "learning_rate": 2.0771225855702777e-06, "epoch": 1.85049365303244, "percentage": 61.7, "elapsed_time": "20:19:17", "remaining_time": "12:36:50"}
84
+ {"current_steps": 830, "total_steps": 1329, "loss": 0.5447, "learning_rate": 2.02388894442376e-06, "epoch": 1.8730606488011283, "percentage": 62.45, "elapsed_time": "20:34:00", "remaining_time": "12:21:53"}
85
+ {"current_steps": 840, "total_steps": 1329, "loss": 0.5449, "learning_rate": 1.9711052505844555e-06, "epoch": 1.8956276445698168, "percentage": 63.21, "elapsed_time": "20:48:43", "remaining_time": "12:06:56"}
86
+ {"current_steps": 850, "total_steps": 1329, "loss": 0.5322, "learning_rate": 1.918804212383584e-06, "epoch": 1.918194640338505, "percentage": 63.96, "elapsed_time": "21:03:27", "remaining_time": "11:51:59"}
87
+ {"current_steps": 860, "total_steps": 1329, "loss": 0.5373, "learning_rate": 1.867018239066429e-06, "epoch": 1.9407616361071933, "percentage": 64.71, "elapsed_time": "21:18:10", "remaining_time": "11:37:02"}
88
+ {"current_steps": 870, "total_steps": 1329, "loss": 0.5438, "learning_rate": 1.815779420709386e-06, "epoch": 1.9633286318758816, "percentage": 65.46, "elapsed_time": "21:32:55", "remaining_time": "11:22:07"}
89
+ {"current_steps": 880, "total_steps": 1329, "loss": 0.5415, "learning_rate": 1.7651195083347916e-06, "epoch": 1.9858956276445698, "percentage": 66.22, "elapsed_time": "21:47:39", "remaining_time": "11:07:11"}
90
+ {"current_steps": 886, "total_steps": 1329, "eval_loss": 0.5835571885108948, "epoch": 1.9994358251057829, "percentage": 66.67, "elapsed_time": "22:08:15", "remaining_time": "11:04:07"}