Jingxuan Tang commited on
Commit
3d9a836
·
1 Parent(s): c4e98a0

Upload 3 files

Browse files
Files changed (3) hide show
  1. adapter_config.json +20 -0
  2. adapter_model.bin +3 -0
  3. trainer_state.json +291 -0
adapter_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "indiejoseph/cantonese-llama-2-7b-oasst-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "down_proj"
18
+ ],
19
+ "task_type": "CAUSAL_LM"
20
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa37fb870780f26aa1e9127f6dcfc5b66b835d1f9dc63cfa9c06ccc18d58d01
3
+ size 15476477
trainer_state.json ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8720703125,
3
+ "best_model_checkpoint": "./alma-7b-parallel-ft-lora-canto/checkpoint-589",
4
+ "epoch": 1.99673735725938,
5
+ "global_step": 612,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 0.0006831300510639731,
13
+ "loss": 1.8545,
14
+ "step": 31
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "eval_loss": 1.275390625,
19
+ "eval_runtime": 161.389,
20
+ "eval_samples_per_second": 26.049,
21
+ "eval_steps_per_second": 1.63,
22
+ "step": 31
23
+ },
24
+ {
25
+ "epoch": 0.2,
26
+ "learning_rate": 0.0003944053188733077,
27
+ "loss": 1.2078,
28
+ "step": 62
29
+ },
30
+ {
31
+ "epoch": 0.2,
32
+ "eval_loss": 1.1513671875,
33
+ "eval_runtime": 161.4937,
34
+ "eval_samples_per_second": 26.032,
35
+ "eval_steps_per_second": 1.629,
36
+ "step": 62
37
+ },
38
+ {
39
+ "epoch": 0.3,
40
+ "learning_rate": 0.00030348848933344196,
41
+ "loss": 1.1077,
42
+ "step": 93
43
+ },
44
+ {
45
+ "epoch": 0.3,
46
+ "eval_loss": 1.0869140625,
47
+ "eval_runtime": 161.573,
48
+ "eval_samples_per_second": 26.019,
49
+ "eval_steps_per_second": 1.628,
50
+ "step": 93
51
+ },
52
+ {
53
+ "epoch": 0.4,
54
+ "learning_rate": 0.00025577443333465435,
55
+ "loss": 1.067,
56
+ "step": 124
57
+ },
58
+ {
59
+ "epoch": 0.4,
60
+ "eval_loss": 1.0439453125,
61
+ "eval_runtime": 161.4591,
62
+ "eval_samples_per_second": 26.038,
63
+ "eval_steps_per_second": 1.629,
64
+ "step": 124
65
+ },
66
+ {
67
+ "epoch": 0.51,
68
+ "learning_rate": 0.0002252213082307254,
69
+ "loss": 1.0179,
70
+ "step": 155
71
+ },
72
+ {
73
+ "epoch": 0.51,
74
+ "eval_loss": 1.015625,
75
+ "eval_runtime": 161.6031,
76
+ "eval_samples_per_second": 26.014,
77
+ "eval_steps_per_second": 1.627,
78
+ "step": 155
79
+ },
80
+ {
81
+ "epoch": 0.61,
82
+ "learning_rate": 0.00020351933162035313,
83
+ "loss": 0.9852,
84
+ "step": 186
85
+ },
86
+ {
87
+ "epoch": 0.61,
88
+ "eval_loss": 0.98876953125,
89
+ "eval_runtime": 161.723,
90
+ "eval_samples_per_second": 25.995,
91
+ "eval_steps_per_second": 1.626,
92
+ "step": 186
93
+ },
94
+ {
95
+ "epoch": 0.71,
96
+ "learning_rate": 0.0001870828693386971,
97
+ "loss": 0.9682,
98
+ "step": 217
99
+ },
100
+ {
101
+ "epoch": 0.71,
102
+ "eval_loss": 0.97314453125,
103
+ "eval_runtime": 161.5091,
104
+ "eval_samples_per_second": 26.029,
105
+ "eval_steps_per_second": 1.628,
106
+ "step": 217
107
+ },
108
+ {
109
+ "epoch": 0.81,
110
+ "learning_rate": 0.00017407765595569785,
111
+ "loss": 0.9565,
112
+ "step": 248
113
+ },
114
+ {
115
+ "epoch": 0.81,
116
+ "eval_loss": 0.95751953125,
117
+ "eval_runtime": 161.4684,
118
+ "eval_samples_per_second": 26.036,
119
+ "eval_steps_per_second": 1.629,
120
+ "step": 248
121
+ },
122
+ {
123
+ "epoch": 0.91,
124
+ "learning_rate": 0.00016345506187300654,
125
+ "loss": 0.9322,
126
+ "step": 279
127
+ },
128
+ {
129
+ "epoch": 0.91,
130
+ "eval_loss": 0.94091796875,
131
+ "eval_runtime": 161.6846,
132
+ "eval_samples_per_second": 26.001,
133
+ "eval_steps_per_second": 1.627,
134
+ "step": 279
135
+ },
136
+ {
137
+ "epoch": 1.01,
138
+ "learning_rate": 0.0001545664419689318,
139
+ "loss": 0.9079,
140
+ "step": 310
141
+ },
142
+ {
143
+ "epoch": 1.01,
144
+ "eval_loss": 0.93212890625,
145
+ "eval_runtime": 159.6077,
146
+ "eval_samples_per_second": 26.34,
147
+ "eval_steps_per_second": 1.648,
148
+ "step": 310
149
+ },
150
+ {
151
+ "epoch": 1.11,
152
+ "learning_rate": 0.00014698618394803282,
153
+ "loss": 0.8272,
154
+ "step": 341
155
+ },
156
+ {
157
+ "epoch": 1.11,
158
+ "eval_loss": 0.92333984375,
159
+ "eval_runtime": 159.6433,
160
+ "eval_samples_per_second": 26.334,
161
+ "eval_steps_per_second": 1.647,
162
+ "step": 341
163
+ },
164
+ {
165
+ "epoch": 1.21,
166
+ "learning_rate": 0.0001404218994998819,
167
+ "loss": 0.7966,
168
+ "step": 372
169
+ },
170
+ {
171
+ "epoch": 1.21,
172
+ "eval_loss": 0.92041015625,
173
+ "eval_runtime": 159.6168,
174
+ "eval_samples_per_second": 26.338,
175
+ "eval_steps_per_second": 1.648,
176
+ "step": 372
177
+ },
178
+ {
179
+ "epoch": 1.31,
180
+ "learning_rate": 0.00013466519604525415,
181
+ "loss": 0.7947,
182
+ "step": 403
183
+ },
184
+ {
185
+ "epoch": 1.31,
186
+ "eval_loss": 0.9150390625,
187
+ "eval_runtime": 159.659,
188
+ "eval_samples_per_second": 26.331,
189
+ "eval_steps_per_second": 1.647,
190
+ "step": 403
191
+ },
192
+ {
193
+ "epoch": 1.42,
194
+ "learning_rate": 0.00012956299912940142,
195
+ "loss": 0.8089,
196
+ "step": 434
197
+ },
198
+ {
199
+ "epoch": 1.42,
200
+ "eval_loss": 0.9052734375,
201
+ "eval_runtime": 159.599,
202
+ "eval_samples_per_second": 26.341,
203
+ "eval_steps_per_second": 1.648,
204
+ "step": 434
205
+ },
206
+ {
207
+ "epoch": 1.52,
208
+ "learning_rate": 0.000125,
209
+ "loss": 0.8113,
210
+ "step": 465
211
+ },
212
+ {
213
+ "epoch": 1.52,
214
+ "eval_loss": 0.89990234375,
215
+ "eval_runtime": 159.5797,
216
+ "eval_samples_per_second": 26.344,
217
+ "eval_steps_per_second": 1.648,
218
+ "step": 465
219
+ },
220
+ {
221
+ "epoch": 1.62,
222
+ "learning_rate": 0.00012088746297956931,
223
+ "loss": 0.7849,
224
+ "step": 496
225
+ },
226
+ {
227
+ "epoch": 1.62,
228
+ "eval_loss": 0.89111328125,
229
+ "eval_runtime": 159.5898,
230
+ "eval_samples_per_second": 26.343,
231
+ "eval_steps_per_second": 1.648,
232
+ "step": 496
233
+ },
234
+ {
235
+ "epoch": 1.72,
236
+ "learning_rate": 0.00011715583722580123,
237
+ "loss": 0.7796,
238
+ "step": 527
239
+ },
240
+ {
241
+ "epoch": 1.72,
242
+ "eval_loss": 0.8828125,
243
+ "eval_runtime": 159.6567,
244
+ "eval_samples_per_second": 26.331,
245
+ "eval_steps_per_second": 1.647,
246
+ "step": 527
247
+ },
248
+ {
249
+ "epoch": 1.82,
250
+ "learning_rate": 0.00011374973339937476,
251
+ "loss": 0.7768,
252
+ "step": 558
253
+ },
254
+ {
255
+ "epoch": 1.82,
256
+ "eval_loss": 0.87548828125,
257
+ "eval_runtime": 159.5528,
258
+ "eval_samples_per_second": 26.349,
259
+ "eval_steps_per_second": 1.648,
260
+ "step": 558
261
+ },
262
+ {
263
+ "epoch": 1.92,
264
+ "learning_rate": 0.00011062441971717747,
265
+ "loss": 0.7784,
266
+ "step": 589
267
+ },
268
+ {
269
+ "epoch": 1.92,
270
+ "eval_loss": 0.8720703125,
271
+ "eval_runtime": 159.542,
272
+ "eval_samples_per_second": 26.35,
273
+ "eval_steps_per_second": 1.648,
274
+ "step": 589
275
+ },
276
+ {
277
+ "epoch": 2.0,
278
+ "step": 612,
279
+ "total_flos": 7.958718731863982e+17,
280
+ "train_loss": 0.9485874550015319,
281
+ "train_runtime": 6936.4877,
282
+ "train_samples_per_second": 5.654,
283
+ "train_steps_per_second": 0.088
284
+ }
285
+ ],
286
+ "max_steps": 612,
287
+ "num_train_epochs": 2,
288
+ "total_flos": 7.958718731863982e+17,
289
+ "trial_name": null,
290
+ "trial_params": null
291
+ }