leixa commited on
Commit
fdb3eaa
1 Parent(s): beca71e

Training in progress, step 52, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:955ee48fa44fbd547a69a17e32f27b4906e3dbf00d84babd8e3d9ea83598276a
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bfb19cdffd8f2a70518ab31199f4b1827a2fbc7e048b596427974934df16daa
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21c9361a80a2d599be762356ad296eb8f3d22e72b304e2c586b2e9f1a3633354
3
  size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215dde8de4d4b55ff881d6061b02018ce74f36f87bf60b5e2a9be7f84ff0140d
3
  size 325339796
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bd48ab963ce214f47ac10c51eb718536f51b1a5f4605ab693a533222db0cdb5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2c3514f0eafec6c2b250a2a0a609ed59534ef31c7ba11da3fe225c079d1575
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b06606e40b3f01ec7202acbfd1d0145e551b0842ff237681a6bdde0b316ef9b5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:792ad29798cba378feffa861e2939c0c9205d97b89b46c0161cdef16a3d333a2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.010800332317917475,
5
  "eval_steps": 13,
6
- "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -130,6 +130,42 @@
130
  "eval_samples_per_second": 13.195,
131
  "eval_steps_per_second": 1.651,
132
  "step": 39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  }
134
  ],
135
  "logging_steps": 3,
@@ -149,7 +185,7 @@
149
  "attributes": {}
150
  }
151
  },
152
- "total_flos": 6.220067744671334e+16,
153
  "train_batch_size": 8,
154
  "trial_name": null,
155
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.014400443090556632,
5
  "eval_steps": 13,
6
+ "global_step": 52,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
130
  "eval_samples_per_second": 13.195,
131
  "eval_steps_per_second": 1.651,
132
  "step": 39
133
+ },
134
+ {
135
+ "epoch": 0.011631127111603435,
136
+ "grad_norm": 2.9625065326690674,
137
+ "learning_rate": 8.765357330018056e-05,
138
+ "loss": 1.5417,
139
+ "step": 42
140
+ },
141
+ {
142
+ "epoch": 0.012461921905289393,
143
+ "grad_norm": 2.775251865386963,
144
+ "learning_rate": 8.535533905932738e-05,
145
+ "loss": 1.4174,
146
+ "step": 45
147
+ },
148
+ {
149
+ "epoch": 0.013292716698975353,
150
+ "grad_norm": 3.418978214263916,
151
+ "learning_rate": 8.289693629698564e-05,
152
+ "loss": 1.444,
153
+ "step": 48
154
+ },
155
+ {
156
+ "epoch": 0.014123511492661313,
157
+ "grad_norm": 2.627183437347412,
158
+ "learning_rate": 8.0289502192041e-05,
159
+ "loss": 1.3904,
160
+ "step": 51
161
+ },
162
+ {
163
+ "epoch": 0.014400443090556632,
164
+ "eval_loss": 0.3832974135875702,
165
+ "eval_runtime": 461.0753,
166
+ "eval_samples_per_second": 13.191,
167
+ "eval_steps_per_second": 1.65,
168
+ "step": 52
169
  }
170
  ],
171
  "logging_steps": 3,
 
185
  "attributes": {}
186
  }
187
  },
188
+ "total_flos": 8.249073800419738e+16,
189
  "train_batch_size": 8,
190
  "trial_name": null,
191
  "trial_params": null