VERSIL91 commited on
Commit
f8a4529
1 Parent(s): 8a4be28

Training in progress, step 18, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efe715dfc3c6655975b4c0cb344e7c945494e1463e96c18bf9c77000e7066604
3
  size 27024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa532b40d5c0681f2baf3e51922c8aea3f5f742de78d5404e2c7fb13b9d1384
3
  size 27024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa28aca63d30e6a77fca80f9f9286ec5bb1020314276276e1f34a85650641b30
3
  size 63974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51b96be3eb3a7f526346cab91aa0e09347e5f1b0e2ea7802e6818d1e2a82d14
3
  size 63974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af82c1f1d4ec999545a290f8875f59e0434314b9e98a3c2553c0c541daf4cdfa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb04243bb25bf686a4dbec102fd9ca7613ad5c31a6a03dfc0541d4df84930100
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:541ccba4d24c3fd7a3a1bd1b7c1a8ef18368821282014bf91f536020daf3aafe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f110d38baddc83b27dc701018da1279ccbbfd00f24dd48bfba755b5837c286c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.56973293768546,
5
  "eval_steps": 6,
6
- "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -115,6 +115,56 @@
115
  "eval_samples_per_second": 497.765,
116
  "eval_steps_per_second": 248.882,
117
  "step": 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  }
119
  ],
120
  "logging_steps": 1,
@@ -134,7 +184,7 @@
134
  "attributes": {}
135
  }
136
  },
137
- "total_flos": 966585384960.0,
138
  "train_batch_size": 2,
139
  "trial_name": null,
140
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8545994065281899,
5
  "eval_steps": 6,
6
+ "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
115
  "eval_samples_per_second": 497.765,
116
  "eval_steps_per_second": 248.882,
117
  "step": 12
118
+ },
119
+ {
120
+ "epoch": 0.6172106824925816,
121
+ "grad_norm": 0.10060179978609085,
122
+ "learning_rate": 8.535533905932738e-05,
123
+ "loss": 10.3803,
124
+ "step": 13
125
+ },
126
+ {
127
+ "epoch": 0.6646884272997032,
128
+ "grad_norm": 0.11765701323747635,
129
+ "learning_rate": 7.500000000000001e-05,
130
+ "loss": 10.3862,
131
+ "step": 14
132
+ },
133
+ {
134
+ "epoch": 0.712166172106825,
135
+ "grad_norm": 0.12245073914527893,
136
+ "learning_rate": 6.294095225512603e-05,
137
+ "loss": 10.3825,
138
+ "step": 15
139
+ },
140
+ {
141
+ "epoch": 0.7596439169139466,
142
+ "grad_norm": 0.10479110479354858,
143
+ "learning_rate": 5e-05,
144
+ "loss": 10.3832,
145
+ "step": 16
146
+ },
147
+ {
148
+ "epoch": 0.8071216617210683,
149
+ "grad_norm": 0.09735947102308273,
150
+ "learning_rate": 3.705904774487396e-05,
151
+ "loss": 10.3786,
152
+ "step": 17
153
+ },
154
+ {
155
+ "epoch": 0.8545994065281899,
156
+ "grad_norm": 0.11688768118619919,
157
+ "learning_rate": 2.500000000000001e-05,
158
+ "loss": 10.3835,
159
+ "step": 18
160
+ },
161
+ {
162
+ "epoch": 0.8545994065281899,
163
+ "eval_loss": 10.379833221435547,
164
+ "eval_runtime": 0.0775,
165
+ "eval_samples_per_second": 464.759,
166
+ "eval_steps_per_second": 232.379,
167
+ "step": 18
168
  }
169
  ],
170
  "logging_steps": 1,
 
184
  "attributes": {}
185
  }
186
  },
187
+ "total_flos": 1432098078720.0,
188
  "train_batch_size": 2,
189
  "trial_name": null,
190
  "trial_params": null