sedrickkeh commited on
Commit
99c51a5
1 Parent(s): d699a7e

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70c62a477351dec2e626fa809106e4c23cfd180247253a7646bd0b9a6f8d581e
3
  size 4903351912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:485a64510d6bad28827a5b75051783e8ae16d2f021f1ed0ebab0ac726b01f8b0
3
  size 4903351912
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:404c60d2273306e892fc3137fe3bb48afe7e478d266ee6726550dc9aa7c9c6f4
3
  size 4947570872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9572e7caeed02b1ea96392522b82acbe4fbe29cd968f0e4de6a65f8eb6e99b3f
3
  size 4947570872
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b25ab55fd90b8baeac17e9eab8dda73059a0d21a7613208b78f876a85e8d3f
3
  size 4962221464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bf28aebe7fee46a5cd953565cbebbacbb048b86e8494282f11c5abd166b6bb8
3
  size 4962221464
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0740499b2b8c3448691a53cf86973351ec80399a89ac847bbbb44a89be97ee1c
3
  size 3670322200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fa1eac0d99340eff2a58495e2d0175f779a4b57c8825f02310f7cf47849ab09
3
  size 3670322200
trainer_log.jsonl CHANGED
@@ -43,3 +43,48 @@
43
  {"current_steps": 430, "total_steps": 1329, "loss": 0.5891, "learning_rate": 3.561806656101426e-06, "epoch": 0.9703808180535967, "percentage": 32.36, "elapsed_time": "10:38:36", "remaining_time": "22:15:08"}
44
  {"current_steps": 440, "total_steps": 1329, "loss": 0.5938, "learning_rate": 3.5221870047543584e-06, "epoch": 0.9929478138222849, "percentage": 33.11, "elapsed_time": "10:53:28", "remaining_time": "22:00:19"}
45
  {"current_steps": 443, "total_steps": 1329, "eval_loss": 0.591905951499939, "epoch": 0.9997179125528914, "percentage": 33.33, "elapsed_time": "11:09:32", "remaining_time": "22:19:05"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  {"current_steps": 430, "total_steps": 1329, "loss": 0.5891, "learning_rate": 3.561806656101426e-06, "epoch": 0.9703808180535967, "percentage": 32.36, "elapsed_time": "10:38:36", "remaining_time": "22:15:08"}
44
  {"current_steps": 440, "total_steps": 1329, "loss": 0.5938, "learning_rate": 3.5221870047543584e-06, "epoch": 0.9929478138222849, "percentage": 33.11, "elapsed_time": "10:53:28", "remaining_time": "22:00:19"}
45
  {"current_steps": 443, "total_steps": 1329, "eval_loss": 0.591905951499939, "epoch": 0.9997179125528914, "percentage": 33.33, "elapsed_time": "11:09:32", "remaining_time": "22:19:05"}
46
+ {"current_steps": 450, "total_steps": 1329, "loss": 0.6072, "learning_rate": 3.4825673534072902e-06, "epoch": 1.0155148095909732, "percentage": 33.86, "elapsed_time": "11:21:00", "remaining_time": "22:10:13"}
47
+ {"current_steps": 460, "total_steps": 1329, "loss": 0.5365, "learning_rate": 3.442947702060222e-06, "epoch": 1.0380818053596614, "percentage": 34.61, "elapsed_time": "11:35:52", "remaining_time": "21:54:36"}
48
+ {"current_steps": 470, "total_steps": 1329, "loss": 0.5512, "learning_rate": 3.403328050713154e-06, "epoch": 1.0606488011283497, "percentage": 35.36, "elapsed_time": "11:50:45", "remaining_time": "21:39:00"}
49
+ {"current_steps": 480, "total_steps": 1329, "loss": 0.5388, "learning_rate": 3.3637083993660857e-06, "epoch": 1.0832157968970382, "percentage": 36.12, "elapsed_time": "12:05:37", "remaining_time": "21:23:27"}
50
+ {"current_steps": 490, "total_steps": 1329, "loss": 0.5492, "learning_rate": 3.3240887480190175e-06, "epoch": 1.1057827926657264, "percentage": 36.87, "elapsed_time": "12:20:29", "remaining_time": "21:07:54"}
51
+ {"current_steps": 500, "total_steps": 1329, "loss": 0.5475, "learning_rate": 3.2844690966719493e-06, "epoch": 1.1283497884344147, "percentage": 37.62, "elapsed_time": "12:35:19", "remaining_time": "20:52:19"}
52
+ {"current_steps": 510, "total_steps": 1329, "loss": 0.5481, "learning_rate": 3.244849445324881e-06, "epoch": 1.150916784203103, "percentage": 38.37, "elapsed_time": "12:50:11", "remaining_time": "20:36:50"}
53
+ {"current_steps": 520, "total_steps": 1329, "loss": 0.5405, "learning_rate": 3.2052297939778134e-06, "epoch": 1.1734837799717912, "percentage": 39.13, "elapsed_time": "13:05:03", "remaining_time": "20:21:22"}
54
+ {"current_steps": 530, "total_steps": 1329, "loss": 0.546, "learning_rate": 3.165610142630745e-06, "epoch": 1.1960507757404795, "percentage": 39.88, "elapsed_time": "13:19:56", "remaining_time": "20:05:56"}
55
+ {"current_steps": 540, "total_steps": 1329, "loss": 0.5435, "learning_rate": 3.125990491283677e-06, "epoch": 1.2186177715091677, "percentage": 40.63, "elapsed_time": "13:34:45", "remaining_time": "19:50:27"}
56
+ {"current_steps": 550, "total_steps": 1329, "loss": 0.5483, "learning_rate": 3.086370839936609e-06, "epoch": 1.2411847672778562, "percentage": 41.38, "elapsed_time": "13:49:35", "remaining_time": "19:34:59"}
57
+ {"current_steps": 560, "total_steps": 1329, "loss": 0.5497, "learning_rate": 3.0467511885895406e-06, "epoch": 1.2637517630465445, "percentage": 42.14, "elapsed_time": "14:04:25", "remaining_time": "19:19:34"}
58
+ {"current_steps": 570, "total_steps": 1329, "loss": 0.5467, "learning_rate": 3.0071315372424724e-06, "epoch": 1.2863187588152327, "percentage": 42.89, "elapsed_time": "14:19:15", "remaining_time": "19:04:10"}
59
+ {"current_steps": 580, "total_steps": 1329, "loss": 0.5467, "learning_rate": 2.9675118858954042e-06, "epoch": 1.308885754583921, "percentage": 43.64, "elapsed_time": "14:34:08", "remaining_time": "18:48:50"}
60
+ {"current_steps": 590, "total_steps": 1329, "loss": 0.5464, "learning_rate": 2.9278922345483365e-06, "epoch": 1.3314527503526092, "percentage": 44.39, "elapsed_time": "14:48:57", "remaining_time": "18:33:27"}
61
+ {"current_steps": 600, "total_steps": 1329, "loss": 0.5518, "learning_rate": 2.8882725832012683e-06, "epoch": 1.3540197461212977, "percentage": 45.15, "elapsed_time": "15:03:48", "remaining_time": "18:18:07"}
62
+ {"current_steps": 610, "total_steps": 1329, "loss": 0.5439, "learning_rate": 2.8486529318542e-06, "epoch": 1.376586741889986, "percentage": 45.9, "elapsed_time": "15:18:39", "remaining_time": "18:02:49"}
63
+ {"current_steps": 620, "total_steps": 1329, "loss": 0.5482, "learning_rate": 2.809033280507132e-06, "epoch": 1.3991537376586742, "percentage": 46.65, "elapsed_time": "15:33:31", "remaining_time": "17:47:32"}
64
+ {"current_steps": 630, "total_steps": 1329, "loss": 0.5459, "learning_rate": 2.7694136291600637e-06, "epoch": 1.4217207334273625, "percentage": 47.4, "elapsed_time": "15:48:23", "remaining_time": "17:32:16"}
65
+ {"current_steps": 640, "total_steps": 1329, "loss": 0.5454, "learning_rate": 2.7297939778129955e-06, "epoch": 1.4442877291960508, "percentage": 48.16, "elapsed_time": "16:03:14", "remaining_time": "17:16:59"}
66
+ {"current_steps": 650, "total_steps": 1329, "loss": 0.5462, "learning_rate": 2.6901743264659273e-06, "epoch": 1.466854724964739, "percentage": 48.91, "elapsed_time": "16:18:05", "remaining_time": "17:01:43"}
67
+ {"current_steps": 660, "total_steps": 1329, "loss": 0.5496, "learning_rate": 2.650554675118859e-06, "epoch": 1.4894217207334273, "percentage": 49.66, "elapsed_time": "16:32:54", "remaining_time": "16:46:27"}
68
+ {"current_steps": 670, "total_steps": 1329, "loss": 0.5446, "learning_rate": 2.6109350237717914e-06, "epoch": 1.5119887165021155, "percentage": 50.41, "elapsed_time": "16:47:46", "remaining_time": "16:31:13"}
69
+ {"current_steps": 680, "total_steps": 1329, "loss": 0.5385, "learning_rate": 2.571315372424723e-06, "epoch": 1.5345557122708038, "percentage": 51.17, "elapsed_time": "17:02:38", "remaining_time": "16:16:01"}
70
+ {"current_steps": 690, "total_steps": 1329, "loss": 0.5403, "learning_rate": 2.531695721077655e-06, "epoch": 1.5571227080394923, "percentage": 51.92, "elapsed_time": "17:17:30", "remaining_time": "16:00:49"}
71
+ {"current_steps": 700, "total_steps": 1329, "loss": 0.5444, "learning_rate": 2.4920760697305864e-06, "epoch": 1.5796897038081805, "percentage": 52.67, "elapsed_time": "17:32:20", "remaining_time": "15:45:35"}
72
+ {"current_steps": 710, "total_steps": 1329, "loss": 0.547, "learning_rate": 2.452456418383518e-06, "epoch": 1.6022566995768688, "percentage": 53.42, "elapsed_time": "17:47:11", "remaining_time": "15:30:24"}
73
+ {"current_steps": 720, "total_steps": 1329, "loss": 0.5549, "learning_rate": 2.4128367670364504e-06, "epoch": 1.6248236953455573, "percentage": 54.18, "elapsed_time": "18:02:01", "remaining_time": "15:15:12"}
74
+ {"current_steps": 730, "total_steps": 1329, "loss": 0.538, "learning_rate": 2.3732171156893823e-06, "epoch": 1.6473906911142455, "percentage": 54.93, "elapsed_time": "18:16:53", "remaining_time": "15:00:02"}
75
+ {"current_steps": 740, "total_steps": 1329, "loss": 0.5536, "learning_rate": 2.333597464342314e-06, "epoch": 1.6699576868829338, "percentage": 55.68, "elapsed_time": "18:31:43", "remaining_time": "14:44:52"}
76
+ {"current_steps": 750, "total_steps": 1329, "loss": 0.5448, "learning_rate": 2.293977812995246e-06, "epoch": 1.692524682651622, "percentage": 56.43, "elapsed_time": "18:46:30", "remaining_time": "14:29:39"}
77
+ {"current_steps": 760, "total_steps": 1329, "loss": 0.5436, "learning_rate": 2.2543581616481777e-06, "epoch": 1.7150916784203103, "percentage": 57.19, "elapsed_time": "19:01:21", "remaining_time": "14:14:30"}
78
+ {"current_steps": 770, "total_steps": 1329, "loss": 0.5442, "learning_rate": 2.2147385103011095e-06, "epoch": 1.7376586741889986, "percentage": 57.94, "elapsed_time": "19:16:09", "remaining_time": "13:59:20"}
79
+ {"current_steps": 780, "total_steps": 1329, "loss": 0.5401, "learning_rate": 2.1751188589540413e-06, "epoch": 1.7602256699576868, "percentage": 58.69, "elapsed_time": "19:30:59", "remaining_time": "13:44:11"}
80
+ {"current_steps": 790, "total_steps": 1329, "loss": 0.5433, "learning_rate": 2.135499207606973e-06, "epoch": 1.782792665726375, "percentage": 59.44, "elapsed_time": "19:45:51", "remaining_time": "13:29:04"}
81
+ {"current_steps": 800, "total_steps": 1329, "loss": 0.5382, "learning_rate": 2.0958795562599054e-06, "epoch": 1.8053596614950633, "percentage": 60.2, "elapsed_time": "20:00:43", "remaining_time": "13:13:58"}
82
+ {"current_steps": 810, "total_steps": 1329, "loss": 0.5488, "learning_rate": 2.056259904912837e-06, "epoch": 1.8279266572637518, "percentage": 60.95, "elapsed_time": "20:15:34", "remaining_time": "12:58:52"}
83
+ {"current_steps": 820, "total_steps": 1329, "loss": 0.5359, "learning_rate": 2.016640253565769e-06, "epoch": 1.85049365303244, "percentage": 61.7, "elapsed_time": "20:30:23", "remaining_time": "12:43:44"}
84
+ {"current_steps": 830, "total_steps": 1329, "loss": 0.5479, "learning_rate": 1.9770206022187004e-06, "epoch": 1.8730606488011283, "percentage": 62.45, "elapsed_time": "20:45:14", "remaining_time": "12:28:38"}
85
+ {"current_steps": 840, "total_steps": 1329, "loss": 0.548, "learning_rate": 1.937400950871632e-06, "epoch": 1.8956276445698168, "percentage": 63.21, "elapsed_time": "21:00:06", "remaining_time": "12:13:33"}
86
+ {"current_steps": 850, "total_steps": 1329, "loss": 0.5353, "learning_rate": 1.8977812995245642e-06, "epoch": 1.918194640338505, "percentage": 63.96, "elapsed_time": "21:14:56", "remaining_time": "11:58:28"}
87
+ {"current_steps": 860, "total_steps": 1329, "loss": 0.5404, "learning_rate": 1.858161648177496e-06, "epoch": 1.9407616361071933, "percentage": 64.71, "elapsed_time": "21:29:48", "remaining_time": "11:43:23"}
88
+ {"current_steps": 870, "total_steps": 1329, "loss": 0.5469, "learning_rate": 1.818541996830428e-06, "epoch": 1.9633286318758816, "percentage": 65.46, "elapsed_time": "21:44:39", "remaining_time": "11:28:19"}
89
+ {"current_steps": 880, "total_steps": 1329, "loss": 0.5447, "learning_rate": 1.7789223454833599e-06, "epoch": 1.9858956276445698, "percentage": 66.22, "elapsed_time": "21:59:30", "remaining_time": "11:13:15"}
90
+ {"current_steps": 886, "total_steps": 1329, "eval_loss": 0.5841004252433777, "epoch": 1.9994358251057829, "percentage": 66.67, "elapsed_time": "22:20:13", "remaining_time": "11:10:06"}