ardaspear commited on
Commit
f019105
·
verified ·
1 Parent(s): 7b5b3a6

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5407db02f2d5db8097ead4f9c2a8b0b31cb387a7431b1c6f38e1d993980400c9
3
  size 6804608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba79555c3c3efdec97edd16076639700328263b318a1b74af3683d0725d022a
3
  size 6804608
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd64b8e862b1830e591121d1f7e3ffcaa2e260cabff3db6007ff8e2f5b846fef
3
  size 3633530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e708dc4a1735af4b8afbf7be39cec4a4d655c4d0822adab89c56c114b0e68a
3
  size 3633530
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08c742876dbc7023e2848d7bb1b55279fe9f0c050c2c6db00f82a2c0bd875952
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2baafb56fe3a2619cfc0fecd20a068c58a057e4c42cd93b97063a3d1ac12cb5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d359eb5d29e75fb2bbe5b7026981da69b95b8ad1fea469302d13cde104f7e8a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012525050100200401,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -93,6 +93,84 @@
93
  "eval_samples_per_second": 89.694,
94
  "eval_steps_per_second": 44.874,
95
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "logging_steps": 5,
@@ -112,7 +190,7 @@
112
  "attributes": {}
113
  }
114
  },
115
- "total_flos": 342926504755200.0,
116
  "train_batch_size": 2,
117
  "trial_name": null,
118
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.025050100200400802,
5
  "eval_steps": 50,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
93
  "eval_samples_per_second": 89.694,
94
  "eval_steps_per_second": 44.874,
95
  "step": 50
96
+ },
97
+ {
98
+ "epoch": 0.01377755511022044,
99
+ "grad_norm": 1.3579661846160889,
100
+ "learning_rate": 8.678619553365659e-05,
101
+ "loss": 2.561,
102
+ "step": 55
103
+ },
104
+ {
105
+ "epoch": 0.01503006012024048,
106
+ "grad_norm": 1.8097760677337646,
107
+ "learning_rate": 8.386407858128706e-05,
108
+ "loss": 2.5397,
109
+ "step": 60
110
+ },
111
+ {
112
+ "epoch": 0.01628256513026052,
113
+ "grad_norm": 1.3172852993011475,
114
+ "learning_rate": 8.07106356344834e-05,
115
+ "loss": 2.4455,
116
+ "step": 65
117
+ },
118
+ {
119
+ "epoch": 0.01753507014028056,
120
+ "grad_norm": 2.0352325439453125,
121
+ "learning_rate": 7.734740790612136e-05,
122
+ "loss": 2.4834,
123
+ "step": 70
124
+ },
125
+ {
126
+ "epoch": 0.018787575150300603,
127
+ "grad_norm": 1.3835731744766235,
128
+ "learning_rate": 7.379736965185368e-05,
129
+ "loss": 2.4561,
130
+ "step": 75
131
+ },
132
+ {
133
+ "epoch": 0.02004008016032064,
134
+ "grad_norm": 1.4761914014816284,
135
+ "learning_rate": 7.008477123264848e-05,
136
+ "loss": 2.3603,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 0.021292585170340682,
141
+ "grad_norm": 1.3032515048980713,
142
+ "learning_rate": 6.623497346023418e-05,
143
+ "loss": 2.2767,
144
+ "step": 85
145
+ },
146
+ {
147
+ "epoch": 0.022545090180360723,
148
+ "grad_norm": 1.3516151905059814,
149
+ "learning_rate": 6.227427435703997e-05,
150
+ "loss": 2.3037,
151
+ "step": 90
152
+ },
153
+ {
154
+ "epoch": 0.02379759519038076,
155
+ "grad_norm": 1.2633843421936035,
156
+ "learning_rate": 5.8229729514036705e-05,
157
+ "loss": 2.3047,
158
+ "step": 95
159
+ },
160
+ {
161
+ "epoch": 0.025050100200400802,
162
+ "grad_norm": 2.345054864883423,
163
+ "learning_rate": 5.4128967273616625e-05,
164
+ "loss": 2.3067,
165
+ "step": 100
166
+ },
167
+ {
168
+ "epoch": 0.025050100200400802,
169
+ "eval_loss": 2.3011841773986816,
170
+ "eval_runtime": 18.2416,
171
+ "eval_samples_per_second": 92.152,
172
+ "eval_steps_per_second": 46.103,
173
+ "step": 100
174
  }
175
  ],
176
  "logging_steps": 5,
 
190
  "attributes": {}
191
  }
192
  },
193
+ "total_flos": 685853009510400.0,
194
  "train_batch_size": 2,
195
  "trial_name": null,
196
  "trial_params": null