TrumpBiden commited on
Commit
ecea13e
1 Parent(s): 18b5f61

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.0052
19
 
20
  ## Model description
21
 
@@ -39,10 +39,8 @@ The following hyperparameters were used during training:
39
  - eval_batch_size: 8
40
  - seed: 42
41
  - distributed_type: multi-GPU
42
- - num_devices: 2
43
  - gradient_accumulation_steps: 128
44
- - total_train_batch_size: 1024
45
- - total_eval_batch_size: 16
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - num_epochs: 1
@@ -51,7 +49,7 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 0.9999 | 0.67 | 136 | 1.0053 |
55
 
56
 
57
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.1352
19
 
20
  ## Model description
21
 
 
39
  - eval_batch_size: 8
40
  - seed: 42
41
  - distributed_type: multi-GPU
 
42
  - gradient_accumulation_steps: 128
43
+ - total_train_batch_size: 512
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - num_epochs: 1
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
+ | 1.1217 | 0.51 | 1 | 1.1352 |
53
 
54
 
55
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "o_proj",
20
  "v_proj",
 
21
  "k_proj",
22
- "q_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "v_proj",
20
+ "q_proj",
21
  "k_proj",
22
+ "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4e7112cc3fd1d00c0dd964482b4fd6c8d9b256193941da9223bddebd8a13392
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b5c2c912aabf3d090802e193c213f70f281e46ddc175ace8d15677bb0291396
3
  size 218138576
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 0.67,
3
- "eval_loss": 1.0052200555801392,
4
- "eval_runtime": 2917.7145,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 7.921,
7
- "eval_steps_per_second": 0.495,
8
- "train_loss": 1.030409187078476,
9
- "train_runtime": 89444.4373,
10
- "train_samples": 207865,
11
- "train_samples_per_second": 2.324,
12
- "train_steps_per_second": 0.002
13
  }
 
1
  {
2
+ "epoch": 0.51,
3
+ "eval_loss": 1.1351821422576904,
4
+ "eval_runtime": 5760.9086,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 4.012,
7
+ "eval_steps_per_second": 0.501,
8
+ "train_loss": 1.1217185258865356,
9
+ "train_runtime": 6493.465,
10
+ "train_samples": 1000,
11
+ "train_samples_per_second": 0.154,
12
+ "train_steps_per_second": 0.0
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.67,
3
- "eval_loss": 1.0052200555801392,
4
- "eval_runtime": 2917.7145,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 7.921,
7
- "eval_steps_per_second": 0.495
8
  }
 
1
  {
2
+ "epoch": 0.51,
3
+ "eval_loss": 1.1351821422576904,
4
+ "eval_runtime": 5760.9086,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 4.012,
7
+ "eval_steps_per_second": 0.501
8
  }
runs/Dec18_05-24-09_a40-2/events.out.tfevents.1702877116.a40-2.3341159.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fba86adbe408b6c5c4baa8a106f951efe3f50f369e947f1103fd5fdad9068b4
3
+ size 5021
runs/Dec18_05-24-09_a40-2/events.out.tfevents.1702889370.a40-2.3341159.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9145e0ca8173f2e1d010ce3f20fefea139126cffc4d7437ac479e02eea86d5b5
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.67,
3
- "train_loss": 1.030409187078476,
4
- "train_runtime": 89444.4373,
5
- "train_samples": 207865,
6
- "train_samples_per_second": 2.324,
7
- "train_steps_per_second": 0.002
8
  }
 
1
  {
2
+ "epoch": 0.51,
3
+ "train_loss": 1.1217185258865356,
4
+ "train_runtime": 6493.465,
5
+ "train_samples": 1000,
6
+ "train_samples_per_second": 0.154,
7
+ "train_steps_per_second": 0.0
8
  }
trainer_state.json CHANGED
@@ -1,204 +1,42 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6699507389162561,
5
  "eval_steps": 500,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "learning_rate": 1.9998802517966852e-05,
14
- "loss": 1.1244,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.02,
19
- "learning_rate": 1.997007728639956e-05,
20
- "loss": 1.122,
21
- "step": 5
22
- },
23
- {
24
- "epoch": 0.05,
25
- "learning_rate": 1.9880488219356086e-05,
26
- "loss": 1.1164,
27
- "step": 10
28
- },
29
- {
30
- "epoch": 0.07,
31
- "learning_rate": 1.973176894846855e-05,
32
- "loss": 1.0903,
33
- "step": 15
34
- },
35
- {
36
- "epoch": 0.1,
37
- "learning_rate": 1.9524809490566878e-05,
38
- "loss": 1.0803,
39
- "step": 20
40
- },
41
- {
42
- "epoch": 0.12,
43
- "learning_rate": 1.926084840336821e-05,
44
- "loss": 1.0711,
45
- "step": 25
46
- },
47
- {
48
- "epoch": 0.15,
49
- "learning_rate": 1.894146537327533e-05,
50
- "loss": 1.0549,
51
- "step": 30
52
- },
53
- {
54
- "epoch": 0.17,
55
- "learning_rate": 1.8568571761675893e-05,
56
- "loss": 1.0363,
57
- "step": 35
58
- },
59
- {
60
- "epoch": 0.2,
61
- "learning_rate": 1.814439916631857e-05,
62
- "loss": 1.038,
63
- "step": 40
64
- },
65
- {
66
- "epoch": 0.22,
67
- "learning_rate": 1.7671486066220965e-05,
68
- "loss": 1.0281,
69
- "step": 45
70
- },
71
- {
72
- "epoch": 0.25,
73
- "learning_rate": 1.7152662630033506e-05,
74
- "loss": 1.0167,
75
- "step": 50
76
- },
77
- {
78
- "epoch": 0.27,
79
- "learning_rate": 1.659103377877423e-05,
80
- "loss": 1.0258,
81
- "step": 55
82
- },
83
- {
84
- "epoch": 0.3,
85
- "learning_rate": 1.598996060429634e-05,
86
- "loss": 1.0134,
87
- "step": 60
88
- },
89
- {
90
- "epoch": 0.32,
91
- "learning_rate": 1.5353040254690396e-05,
92
- "loss": 1.0172,
93
- "step": 65
94
- },
95
- {
96
- "epoch": 0.34,
97
- "learning_rate": 1.4684084406997903e-05,
98
- "loss": 1.0135,
99
- "step": 70
100
- },
101
- {
102
- "epoch": 0.37,
103
- "learning_rate": 1.3987096456067236e-05,
104
- "loss": 1.0109,
105
- "step": 75
106
- },
107
- {
108
- "epoch": 0.39,
109
- "learning_rate": 1.3266247556066122e-05,
110
- "loss": 0.9973,
111
- "step": 80
112
- },
113
- {
114
- "epoch": 0.42,
115
- "learning_rate": 1.252585165803135e-05,
116
- "loss": 1.0118,
117
- "step": 85
118
- },
119
- {
120
- "epoch": 0.44,
121
- "learning_rate": 1.1770339692844484e-05,
122
- "loss": 1.0013,
123
- "step": 90
124
- },
125
- {
126
- "epoch": 0.47,
127
- "learning_rate": 1.1004233054136726e-05,
128
- "loss": 1.0107,
129
- "step": 95
130
- },
131
- {
132
- "epoch": 0.49,
133
- "learning_rate": 1.0232116539815558e-05,
134
- "loss": 1.0039,
135
- "step": 100
136
- },
137
- {
138
- "epoch": 0.52,
139
- "learning_rate": 9.458610914145826e-06,
140
- "loss": 1.0032,
141
- "step": 105
142
- },
143
- {
144
- "epoch": 0.54,
145
- "learning_rate": 8.688345254588579e-06,
146
- "loss": 1.008,
147
- "step": 110
148
- },
149
- {
150
- "epoch": 0.57,
151
- "learning_rate": 7.92592924888925e-06,
152
- "loss": 1.0066,
153
- "step": 115
154
- },
155
- {
156
- "epoch": 0.59,
157
- "learning_rate": 7.175925608204428e-06,
158
- "loss": 1.006,
159
- "step": 120
160
- },
161
- {
162
- "epoch": 0.62,
163
- "learning_rate": 6.442822761362015e-06,
164
- "loss": 1.0059,
165
- "step": 125
166
- },
167
- {
168
- "epoch": 0.64,
169
- "learning_rate": 5.731007993667155e-06,
170
- "loss": 1.001,
171
- "step": 130
172
- },
173
- {
174
- "epoch": 0.67,
175
- "learning_rate": 5.044741191005908e-06,
176
- "loss": 0.9999,
177
- "step": 135
178
- },
179
- {
180
- "epoch": 0.67,
181
- "eval_loss": 1.005275845527649,
182
- "eval_runtime": 2913.4219,
183
- "eval_samples_per_second": 7.932,
184
- "eval_steps_per_second": 0.496,
185
- "step": 136
186
  },
187
  {
188
- "epoch": 0.67,
189
- "step": 136,
190
- "total_flos": 1.227587501862106e+19,
191
- "train_loss": 1.030409187078476,
192
- "train_runtime": 89444.4373,
193
- "train_samples_per_second": 2.324,
194
- "train_steps_per_second": 0.002
195
  }
196
  ],
197
  "logging_steps": 5,
198
- "max_steps": 203,
199
  "num_train_epochs": 1,
200
  "save_steps": 500,
201
- "total_flos": 1.227587501862106e+19,
202
  "trial_name": null,
203
  "trial_params": null
204
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.512,
5
  "eval_steps": 500,
6
+ "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.51,
13
+ "learning_rate": 0.0,
14
+ "loss": 1.1217,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.51,
19
+ "eval_loss": 1.1351821422576904,
20
+ "eval_runtime": 5862.1633,
21
+ "eval_samples_per_second": 3.942,
22
+ "eval_steps_per_second": 0.493,
23
+ "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  },
25
  {
26
+ "epoch": 0.51,
27
+ "step": 1,
28
+ "total_flos": 4.507945275280589e+16,
29
+ "train_loss": 1.1217185258865356,
30
+ "train_runtime": 6493.465,
31
+ "train_samples_per_second": 0.154,
32
+ "train_steps_per_second": 0.0
33
  }
34
  ],
35
  "logging_steps": 5,
36
+ "max_steps": 1,
37
  "num_train_epochs": 1,
38
  "save_steps": 500,
39
+ "total_flos": 4.507945275280589e+16,
40
  "trial_name": null,
41
  "trial_params": null
42
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4abbb7794a5869c706f86ff93250d606255d477d220e7a635acef423d64f2ec
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2bc14001a0c5f5939f44c7da712b8b242818373ff67764911429cf5317a6e4b
3
  size 4664