kooff11 commited on
Commit
29abbe7
·
verified ·
1 Parent(s): 8cf2119

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:106aa573046fcbe0f8bbeaab055c5ad672839af3c9f297a56d34080b4f1756bf
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e25fc3038a8e3f4d67294734295c486c8bc2398c6811e5f692a270d3008d6eb
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7934da1c86c6c18d31d87a9c7f8a94370b350894481d16aabda27d614f76c448
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1774f77f007af7b55a69980f0a6cc754386dc022cc57f072771b815409c819b
3
  size 23159290
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:917ad58882644007533c4abc141424ee42139f4dc0c5ccabb771f99d9f33b084
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd17b5df1022d14a6bc409d5199b678b4ef7ee120dd6b803365c640249735528
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c05c147f54d09793f94646e3c80d99a7f4fc46e672b3727c589118bb2c7a9f0e
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6003b9da5fb97bf7840c3b5bc091d085fbbef473002eef31535fde335f58e3e
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d45ccdc6e3c1cc0068abcc929ad0c964f2519a45cbca01b7ab8cd5851550cd1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b018ac4fe63057a6f92b6fc2b40d2df6ba47ab96e6b212db49c719e05a03a8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2620802620802621,
5
  "eval_steps": 10,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -93,6 +93,84 @@
93
  "eval_samples_per_second": 36.916,
94
  "eval_steps_per_second": 9.337,
95
  "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "logging_steps": 1,
@@ -112,7 +190,7 @@
112
  "attributes": {}
113
  }
114
  },
115
- "total_flos": 3.1209052633563136e+16,
116
  "train_batch_size": 2,
117
  "trial_name": null,
118
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5241605241605242,
5
  "eval_steps": 10,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
93
  "eval_samples_per_second": 36.916,
94
  "eval_steps_per_second": 9.337,
95
  "step": 10
96
+ },
97
+ {
98
+ "epoch": 0.2882882882882883,
99
+ "grad_norm": 0.5569241642951965,
100
+ "learning_rate": 8.609780469772623e-05,
101
+ "loss": 1.6436,
102
+ "step": 11
103
+ },
104
+ {
105
+ "epoch": 0.3144963144963145,
106
+ "grad_norm": 0.5158886313438416,
107
+ "learning_rate": 8.303373616950408e-05,
108
+ "loss": 1.6523,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 0.3407043407043407,
113
+ "grad_norm": 0.4968787729740143,
114
+ "learning_rate": 7.973165881521434e-05,
115
+ "loss": 1.6673,
116
+ "step": 13
117
+ },
118
+ {
119
+ "epoch": 0.3669123669123669,
120
+ "grad_norm": 0.49128782749176025,
121
+ "learning_rate": 7.621536417786159e-05,
122
+ "loss": 1.6519,
123
+ "step": 14
124
+ },
125
+ {
126
+ "epoch": 0.3931203931203931,
127
+ "grad_norm": 0.493300199508667,
128
+ "learning_rate": 7.251018724088367e-05,
129
+ "loss": 1.624,
130
+ "step": 15
131
+ },
132
+ {
133
+ "epoch": 0.41932841932841936,
134
+ "grad_norm": 0.5251982808113098,
135
+ "learning_rate": 6.864282388901544e-05,
136
+ "loss": 1.6108,
137
+ "step": 16
138
+ },
139
+ {
140
+ "epoch": 0.44553644553644556,
141
+ "grad_norm": 0.4884173274040222,
142
+ "learning_rate": 6.464113856382752e-05,
143
+ "loss": 1.6367,
144
+ "step": 17
145
+ },
146
+ {
147
+ "epoch": 0.47174447174447176,
148
+ "grad_norm": 0.5205239653587341,
149
+ "learning_rate": 6.0533963499786314e-05,
150
+ "loss": 1.6079,
151
+ "step": 18
152
+ },
153
+ {
154
+ "epoch": 0.49795249795249796,
155
+ "grad_norm": 0.5160295963287354,
156
+ "learning_rate": 5.6350890987343944e-05,
157
+ "loss": 1.6298,
158
+ "step": 19
159
+ },
160
+ {
161
+ "epoch": 0.5241605241605242,
162
+ "grad_norm": 0.46167778968811035,
163
+ "learning_rate": 5.212206015980742e-05,
164
+ "loss": 1.7101,
165
+ "step": 20
166
+ },
167
+ {
168
+ "epoch": 0.5241605241605242,
169
+ "eval_loss": 1.6339422464370728,
170
+ "eval_runtime": 6.9719,
171
+ "eval_samples_per_second": 36.862,
172
+ "eval_steps_per_second": 9.323,
173
+ "step": 20
174
  }
175
  ],
176
  "logging_steps": 1,
 
190
  "attributes": {}
191
  }
192
  },
193
+ "total_flos": 6.275680826713702e+16,
194
  "train_batch_size": 2,
195
  "trial_name": null,
196
  "trial_params": null