ardaspear commited on
Commit
765e8ef
·
verified ·
1 Parent(s): 243b2fa

Training in progress, step 68, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faf6a0c9f287d54ffdc5b5fd47ee47acb3962a794039f6c50471332b1bd0597a
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35b03ad288e9be268229d4c9b8350b53a0832256682c787acaf0d0153bef542a
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94232827281bc34d3d9aa3486a9436a9999c28af4953f5c8f57971a4e34dfd09
3
  size 37134420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b33cb1418e7c3b78e10095e9e9845586d0cec7958bc2a6c4d162562a3e217d0
3
  size 37134420
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:161ccaa5741f73c3c75a0291963368f5281659d8a1026a6fd4b0de6bf799153e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f03bc1423c34b94dc31b9df725b107e8c7094161c242c029296a4dd797e9612
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97178a71d5acd54714c38f03fc162b58c9ab83f0e2b9f2d42288a4a7b505c2c6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57465ffa9dc280f2ea6034fe61064b0208bf36c7b5f569218c0e1296778ee099
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.009667330110889964,
5
  "eval_steps": 34,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -100,6 +100,91 @@
100
  "eval_samples_per_second": 35.316,
101
  "eval_steps_per_second": 4.417,
102
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
104
  ],
105
  "logging_steps": 3,
@@ -119,7 +204,7 @@
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 1.0888000598704128e+16,
123
  "train_batch_size": 8,
124
  "trial_name": null,
125
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.019334660221779928,
5
  "eval_steps": 34,
6
+ "global_step": 68,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
100
  "eval_samples_per_second": 35.316,
101
  "eval_steps_per_second": 4.417,
102
  "step": 34
103
+ },
104
+ {
105
+ "epoch": 0.010235996588001138,
106
+ "grad_norm": 0.16702412068843842,
107
+ "learning_rate": 4.9453690018345144e-05,
108
+ "loss": 1.1981,
109
+ "step": 36
110
+ },
111
+ {
112
+ "epoch": 0.011088996303667898,
113
+ "grad_norm": 0.1968175172805786,
114
+ "learning_rate": 4.932095175695911e-05,
115
+ "loss": 1.1675,
116
+ "step": 39
117
+ },
118
+ {
119
+ "epoch": 0.01194199601933466,
120
+ "grad_norm": 0.18244469165802002,
121
+ "learning_rate": 4.917401074463441e-05,
122
+ "loss": 1.1584,
123
+ "step": 42
124
+ },
125
+ {
126
+ "epoch": 0.01279499573500142,
127
+ "grad_norm": 0.16749081015586853,
128
+ "learning_rate": 4.901295279078431e-05,
129
+ "loss": 1.1134,
130
+ "step": 45
131
+ },
132
+ {
133
+ "epoch": 0.013647995450668183,
134
+ "grad_norm": 0.17398597300052643,
135
+ "learning_rate": 4.883787194871841e-05,
136
+ "loss": 1.1139,
137
+ "step": 48
138
+ },
139
+ {
140
+ "epoch": 0.014500995166334945,
141
+ "grad_norm": 0.17164087295532227,
142
+ "learning_rate": 4.864887046071813e-05,
143
+ "loss": 1.079,
144
+ "step": 51
145
+ },
146
+ {
147
+ "epoch": 0.015353994882001705,
148
+ "grad_norm": 0.1644001007080078,
149
+ "learning_rate": 4.8446058698330115e-05,
150
+ "loss": 1.0646,
151
+ "step": 54
152
+ },
153
+ {
154
+ "epoch": 0.01620699459766847,
155
+ "grad_norm": 0.16490623354911804,
156
+ "learning_rate": 4.822955509791233e-05,
157
+ "loss": 1.0739,
158
+ "step": 57
159
+ },
160
+ {
161
+ "epoch": 0.017059994313335228,
162
+ "grad_norm": 0.17708458006381989,
163
+ "learning_rate": 4.799948609147061e-05,
164
+ "loss": 1.0897,
165
+ "step": 60
166
+ },
167
+ {
168
+ "epoch": 0.01791299402900199,
169
+ "grad_norm": 0.15597032010555267,
170
+ "learning_rate": 4.7755986032825864e-05,
171
+ "loss": 1.0566,
172
+ "step": 63
173
+ },
174
+ {
175
+ "epoch": 0.018765993744668752,
176
+ "grad_norm": 0.17728550732135773,
177
+ "learning_rate": 4.74991971191553e-05,
178
+ "loss": 1.0275,
179
+ "step": 66
180
+ },
181
+ {
182
+ "epoch": 0.019334660221779928,
183
+ "eval_loss": 1.0011852979660034,
184
+ "eval_runtime": 168.1174,
185
+ "eval_samples_per_second": 35.237,
186
+ "eval_steps_per_second": 4.408,
187
+ "step": 68
188
  }
189
  ],
190
  "logging_steps": 3,
 
204
  "attributes": {}
205
  }
206
  },
207
+ "total_flos": 2.1776001197408256e+16,
208
  "train_batch_size": 8,
209
  "trial_name": null,
210
  "trial_params": null