Isaak Carter Augustus commited on
Commit
9281c64
1 Parent(s): ce6ec12

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +388 -0
README.md CHANGED
@@ -17,6 +17,394 @@ JOSIE_Beta-4-7B-slerp is a merge of the following models using [LazyMergekit](ht
17
  * [Weyaxi/Einstein-v4-7B](https://huggingface.co/Weyaxi/Einstein-v4-7B)
18
  * [cognitivecomputations/dolphin-2.8-experiment26-7b](https://huggingface.co/cognitivecomputations/dolphin-2.8-experiment26-7b)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ## 🧩 Configuration
21
 
22
  ```yaml
 
17
  * [Weyaxi/Einstein-v4-7B](https://huggingface.co/Weyaxi/Einstein-v4-7B)
18
  * [cognitivecomputations/dolphin-2.8-experiment26-7b](https://huggingface.co/cognitivecomputations/dolphin-2.8-experiment26-7b)
19
 
20
+ # IMPORTANT!!!
21
+
22
+ Upon seeing the eval results on the LLM Leaderboard, this is my second best performing model, the best one it beta 3.
23
+
24
+ ```json
25
+ {
26
+ "all": {
27
+ "acc": 0.6395082113582112,
28
+ "acc_stderr": 0.0321581453772247,
29
+ "acc_norm": 0.6406802207782241,
30
+ "acc_norm_stderr": 0.032807499003593296,
31
+ "mc1": 0.379436964504284,
32
+ "mc1_stderr": 0.01698703926614299,
33
+ "mc2": 0.5593281467190233,
34
+ "mc2_stderr": 0.0156123862411416
35
+ },
36
+ "harness|arc:challenge|25": {
37
+ "acc": 0.6143344709897611,
38
+ "acc_stderr": 0.014224250973257182,
39
+ "acc_norm": 0.6356655290102389,
40
+ "acc_norm_stderr": 0.014063260279882419
41
+ },
42
+ "harness|hellaswag|10": {
43
+ "acc": 0.6618203545110536,
44
+ "acc_stderr": 0.004721231637092722,
45
+ "acc_norm": 0.8409679346743677,
46
+ "acc_norm_stderr": 0.0036495858528211842
47
+ },
48
+ "harness|hendrycksTest-abstract_algebra|5": {
49
+ "acc": 0.34,
50
+ "acc_stderr": 0.047609522856952365,
51
+ "acc_norm": 0.34,
52
+ "acc_norm_stderr": 0.047609522856952365
53
+ },
54
+ "harness|hendrycksTest-anatomy|5": {
55
+ "acc": 0.6,
56
+ "acc_stderr": 0.04232073695151589,
57
+ "acc_norm": 0.6,
58
+ "acc_norm_stderr": 0.04232073695151589
59
+ },
60
+ "harness|hendrycksTest-astronomy|5": {
61
+ "acc": 0.6973684210526315,
62
+ "acc_stderr": 0.037385206761196686,
63
+ "acc_norm": 0.6973684210526315,
64
+ "acc_norm_stderr": 0.037385206761196686
65
+ },
66
+ "harness|hendrycksTest-business_ethics|5": {
67
+ "acc": 0.6,
68
+ "acc_stderr": 0.04923659639173309,
69
+ "acc_norm": 0.6,
70
+ "acc_norm_stderr": 0.04923659639173309
71
+ },
72
+ "harness|hendrycksTest-clinical_knowledge|5": {
73
+ "acc": 0.6943396226415094,
74
+ "acc_stderr": 0.028353298073322666,
75
+ "acc_norm": 0.6943396226415094,
76
+ "acc_norm_stderr": 0.028353298073322666
77
+ },
78
+ "harness|hendrycksTest-college_biology|5": {
79
+ "acc": 0.7638888888888888,
80
+ "acc_stderr": 0.03551446610810826,
81
+ "acc_norm": 0.7638888888888888,
82
+ "acc_norm_stderr": 0.03551446610810826
83
+ },
84
+ "harness|hendrycksTest-college_chemistry|5": {
85
+ "acc": 0.46,
86
+ "acc_stderr": 0.05009082659620333,
87
+ "acc_norm": 0.46,
88
+ "acc_norm_stderr": 0.05009082659620333
89
+ },
90
+ "harness|hendrycksTest-college_computer_science|5": {
91
+ "acc": 0.52,
92
+ "acc_stderr": 0.050211673156867795,
93
+ "acc_norm": 0.52,
94
+ "acc_norm_stderr": 0.050211673156867795
95
+ },
96
+ "harness|hendrycksTest-college_mathematics|5": {
97
+ "acc": 0.32,
98
+ "acc_stderr": 0.04688261722621504,
99
+ "acc_norm": 0.32,
100
+ "acc_norm_stderr": 0.04688261722621504
101
+ },
102
+ "harness|hendrycksTest-college_medicine|5": {
103
+ "acc": 0.6705202312138728,
104
+ "acc_stderr": 0.03583901754736412,
105
+ "acc_norm": 0.6705202312138728,
106
+ "acc_norm_stderr": 0.03583901754736412
107
+ },
108
+ "harness|hendrycksTest-college_physics|5": {
109
+ "acc": 0.3627450980392157,
110
+ "acc_stderr": 0.04784060704105654,
111
+ "acc_norm": 0.3627450980392157,
112
+ "acc_norm_stderr": 0.04784060704105654
113
+ },
114
+ "harness|hendrycksTest-computer_security|5": {
115
+ "acc": 0.75,
116
+ "acc_stderr": 0.04351941398892446,
117
+ "acc_norm": 0.75,
118
+ "acc_norm_stderr": 0.04351941398892446
119
+ },
120
+ "harness|hendrycksTest-conceptual_physics|5": {
121
+ "acc": 0.5574468085106383,
122
+ "acc_stderr": 0.03246956919789958,
123
+ "acc_norm": 0.5574468085106383,
124
+ "acc_norm_stderr": 0.03246956919789958
125
+ },
126
+ "harness|hendrycksTest-econometrics|5": {
127
+ "acc": 0.4473684210526316,
128
+ "acc_stderr": 0.04677473004491199,
129
+ "acc_norm": 0.4473684210526316,
130
+ "acc_norm_stderr": 0.04677473004491199
131
+ },
132
+ "harness|hendrycksTest-electrical_engineering|5": {
133
+ "acc": 0.5586206896551724,
134
+ "acc_stderr": 0.04137931034482758,
135
+ "acc_norm": 0.5586206896551724,
136
+ "acc_norm_stderr": 0.04137931034482758
137
+ },
138
+ "harness|hendrycksTest-elementary_mathematics|5": {
139
+ "acc": 0.41005291005291006,
140
+ "acc_stderr": 0.025331202438944444,
141
+ "acc_norm": 0.41005291005291006,
142
+ "acc_norm_stderr": 0.025331202438944444
143
+ },
144
+ "harness|hendrycksTest-formal_logic|5": {
145
+ "acc": 0.42857142857142855,
146
+ "acc_stderr": 0.0442626668137991,
147
+ "acc_norm": 0.42857142857142855,
148
+ "acc_norm_stderr": 0.0442626668137991
149
+ },
150
+ "harness|hendrycksTest-global_facts|5": {
151
+ "acc": 0.34,
152
+ "acc_stderr": 0.04760952285695235,
153
+ "acc_norm": 0.34,
154
+ "acc_norm_stderr": 0.04760952285695235
155
+ },
156
+ "harness|hendrycksTest-high_school_biology|5": {
157
+ "acc": 0.7709677419354839,
158
+ "acc_stderr": 0.023904914311782648,
159
+ "acc_norm": 0.7709677419354839,
160
+ "acc_norm_stderr": 0.023904914311782648
161
+ },
162
+ "harness|hendrycksTest-high_school_chemistry|5": {
163
+ "acc": 0.5073891625615764,
164
+ "acc_stderr": 0.035176035403610105,
165
+ "acc_norm": 0.5073891625615764,
166
+ "acc_norm_stderr": 0.035176035403610105
167
+ },
168
+ "harness|hendrycksTest-high_school_computer_science|5": {
169
+ "acc": 0.69,
170
+ "acc_stderr": 0.04648231987117316,
171
+ "acc_norm": 0.69,
172
+ "acc_norm_stderr": 0.04648231987117316
173
+ },
174
+ "harness|hendrycksTest-high_school_european_history|5": {
175
+ "acc": 0.7818181818181819,
176
+ "acc_stderr": 0.03225078108306289,
177
+ "acc_norm": 0.7818181818181819,
178
+ "acc_norm_stderr": 0.03225078108306289
179
+ },
180
+ "harness|hendrycksTest-high_school_geography|5": {
181
+ "acc": 0.8080808080808081,
182
+ "acc_stderr": 0.028057791672989017,
183
+ "acc_norm": 0.8080808080808081,
184
+ "acc_norm_stderr": 0.028057791672989017
185
+ },
186
+ "harness|hendrycksTest-high_school_government_and_politics|5": {
187
+ "acc": 0.8963730569948186,
188
+ "acc_stderr": 0.02199531196364424,
189
+ "acc_norm": 0.8963730569948186,
190
+ "acc_norm_stderr": 0.02199531196364424
191
+ },
192
+ "harness|hendrycksTest-high_school_macroeconomics|5": {
193
+ "acc": 0.6435897435897436,
194
+ "acc_stderr": 0.02428314052946731,
195
+ "acc_norm": 0.6435897435897436,
196
+ "acc_norm_stderr": 0.02428314052946731
197
+ },
198
+ "harness|hendrycksTest-high_school_mathematics|5": {
199
+ "acc": 0.32592592592592595,
200
+ "acc_stderr": 0.028578348365473072,
201
+ "acc_norm": 0.32592592592592595,
202
+ "acc_norm_stderr": 0.028578348365473072
203
+ },
204
+ "harness|hendrycksTest-high_school_microeconomics|5": {
205
+ "acc": 0.6764705882352942,
206
+ "acc_stderr": 0.030388353551886797,
207
+ "acc_norm": 0.6764705882352942,
208
+ "acc_norm_stderr": 0.030388353551886797
209
+ },
210
+ "harness|hendrycksTest-high_school_physics|5": {
211
+ "acc": 0.3576158940397351,
212
+ "acc_stderr": 0.03913453431177258,
213
+ "acc_norm": 0.3576158940397351,
214
+ "acc_norm_stderr": 0.03913453431177258
215
+ },
216
+ "harness|hendrycksTest-high_school_psychology|5": {
217
+ "acc": 0.8440366972477065,
218
+ "acc_stderr": 0.015555802713590167,
219
+ "acc_norm": 0.8440366972477065,
220
+ "acc_norm_stderr": 0.015555802713590167
221
+ },
222
+ "harness|hendrycksTest-high_school_statistics|5": {
223
+ "acc": 0.5324074074074074,
224
+ "acc_stderr": 0.03402801581358966,
225
+ "acc_norm": 0.5324074074074074,
226
+ "acc_norm_stderr": 0.03402801581358966
227
+ },
228
+ "harness|hendrycksTest-high_school_us_history|5": {
229
+ "acc": 0.803921568627451,
230
+ "acc_stderr": 0.027865942286639318,
231
+ "acc_norm": 0.803921568627451,
232
+ "acc_norm_stderr": 0.027865942286639318
233
+ },
234
+ "harness|hendrycksTest-high_school_world_history|5": {
235
+ "acc": 0.7932489451476793,
236
+ "acc_stderr": 0.0263616516683891,
237
+ "acc_norm": 0.7932489451476793,
238
+ "acc_norm_stderr": 0.0263616516683891
239
+ },
240
+ "harness|hendrycksTest-human_aging|5": {
241
+ "acc": 0.6905829596412556,
242
+ "acc_stderr": 0.03102441174057221,
243
+ "acc_norm": 0.6905829596412556,
244
+ "acc_norm_stderr": 0.03102441174057221
245
+ },
246
+ "harness|hendrycksTest-human_sexuality|5": {
247
+ "acc": 0.7938931297709924,
248
+ "acc_stderr": 0.035477710041594654,
249
+ "acc_norm": 0.7938931297709924,
250
+ "acc_norm_stderr": 0.035477710041594654
251
+ },
252
+ "harness|hendrycksTest-international_law|5": {
253
+ "acc": 0.7851239669421488,
254
+ "acc_stderr": 0.03749492448709696,
255
+ "acc_norm": 0.7851239669421488,
256
+ "acc_norm_stderr": 0.03749492448709696
257
+ },
258
+ "harness|hendrycksTest-jurisprudence|5": {
259
+ "acc": 0.8240740740740741,
260
+ "acc_stderr": 0.036809181416738807,
261
+ "acc_norm": 0.8240740740740741,
262
+ "acc_norm_stderr": 0.036809181416738807
263
+ },
264
+ "harness|hendrycksTest-logical_fallacies|5": {
265
+ "acc": 0.7239263803680982,
266
+ "acc_stderr": 0.035123852837050475,
267
+ "acc_norm": 0.7239263803680982,
268
+ "acc_norm_stderr": 0.035123852837050475
269
+ },
270
+ "harness|hendrycksTest-machine_learning|5": {
271
+ "acc": 0.44642857142857145,
272
+ "acc_stderr": 0.04718471485219588,
273
+ "acc_norm": 0.44642857142857145,
274
+ "acc_norm_stderr": 0.04718471485219588
275
+ },
276
+ "harness|hendrycksTest-management|5": {
277
+ "acc": 0.7766990291262136,
278
+ "acc_stderr": 0.04123553189891431,
279
+ "acc_norm": 0.7766990291262136,
280
+ "acc_norm_stderr": 0.04123553189891431
281
+ },
282
+ "harness|hendrycksTest-marketing|5": {
283
+ "acc": 0.8760683760683761,
284
+ "acc_stderr": 0.021586494001281376,
285
+ "acc_norm": 0.8760683760683761,
286
+ "acc_norm_stderr": 0.021586494001281376
287
+ },
288
+ "harness|hendrycksTest-medical_genetics|5": {
289
+ "acc": 0.73,
290
+ "acc_stderr": 0.044619604333847394,
291
+ "acc_norm": 0.73,
292
+ "acc_norm_stderr": 0.044619604333847394
293
+ },
294
+ "harness|hendrycksTest-miscellaneous|5": {
295
+ "acc": 0.8212005108556832,
296
+ "acc_stderr": 0.01370264371536898,
297
+ "acc_norm": 0.8212005108556832,
298
+ "acc_norm_stderr": 0.01370264371536898
299
+ },
300
+ "harness|hendrycksTest-moral_disputes|5": {
301
+ "acc": 0.7167630057803468,
302
+ "acc_stderr": 0.024257901705323374,
303
+ "acc_norm": 0.7167630057803468,
304
+ "acc_norm_stderr": 0.024257901705323374
305
+ },
306
+ "harness|hendrycksTest-moral_scenarios|5": {
307
+ "acc": 0.2759776536312849,
308
+ "acc_stderr": 0.014950103002475356,
309
+ "acc_norm": 0.2759776536312849,
310
+ "acc_norm_stderr": 0.014950103002475356
311
+ },
312
+ "harness|hendrycksTest-nutrition|5": {
313
+ "acc": 0.7189542483660131,
314
+ "acc_stderr": 0.02573885479781873,
315
+ "acc_norm": 0.7189542483660131,
316
+ "acc_norm_stderr": 0.02573885479781873
317
+ },
318
+ "harness|hendrycksTest-philosophy|5": {
319
+ "acc": 0.7202572347266881,
320
+ "acc_stderr": 0.02549425935069491,
321
+ "acc_norm": 0.7202572347266881,
322
+ "acc_norm_stderr": 0.02549425935069491
323
+ },
324
+ "harness|hendrycksTest-prehistory|5": {
325
+ "acc": 0.7314814814814815,
326
+ "acc_stderr": 0.024659685185967284,
327
+ "acc_norm": 0.7314814814814815,
328
+ "acc_norm_stderr": 0.024659685185967284
329
+ },
330
+ "harness|hendrycksTest-professional_accounting|5": {
331
+ "acc": 0.475177304964539,
332
+ "acc_stderr": 0.02979071924382972,
333
+ "acc_norm": 0.475177304964539,
334
+ "acc_norm_stderr": 0.02979071924382972
335
+ },
336
+ "harness|hendrycksTest-professional_law|5": {
337
+ "acc": 0.4621903520208605,
338
+ "acc_stderr": 0.012733671880342504,
339
+ "acc_norm": 0.4621903520208605,
340
+ "acc_norm_stderr": 0.012733671880342504
341
+ },
342
+ "harness|hendrycksTest-professional_medicine|5": {
343
+ "acc": 0.6691176470588235,
344
+ "acc_stderr": 0.028582709753898445,
345
+ "acc_norm": 0.6691176470588235,
346
+ "acc_norm_stderr": 0.028582709753898445
347
+ },
348
+ "harness|hendrycksTest-professional_psychology|5": {
349
+ "acc": 0.6535947712418301,
350
+ "acc_stderr": 0.01924978569171721,
351
+ "acc_norm": 0.6535947712418301,
352
+ "acc_norm_stderr": 0.01924978569171721
353
+ },
354
+ "harness|hendrycksTest-public_relations|5": {
355
+ "acc": 0.6636363636363637,
356
+ "acc_stderr": 0.04525393596302506,
357
+ "acc_norm": 0.6636363636363637,
358
+ "acc_norm_stderr": 0.04525393596302506
359
+ },
360
+ "harness|hendrycksTest-security_studies|5": {
361
+ "acc": 0.7428571428571429,
362
+ "acc_stderr": 0.027979823538744546,
363
+ "acc_norm": 0.7428571428571429,
364
+ "acc_norm_stderr": 0.027979823538744546
365
+ },
366
+ "harness|hendrycksTest-sociology|5": {
367
+ "acc": 0.835820895522388,
368
+ "acc_stderr": 0.026193923544454125,
369
+ "acc_norm": 0.835820895522388,
370
+ "acc_norm_stderr": 0.026193923544454125
371
+ },
372
+ "harness|hendrycksTest-us_foreign_policy|5": {
373
+ "acc": 0.87,
374
+ "acc_stderr": 0.033799766898963086,
375
+ "acc_norm": 0.87,
376
+ "acc_norm_stderr": 0.033799766898963086
377
+ },
378
+ "harness|hendrycksTest-virology|5": {
379
+ "acc": 0.5481927710843374,
380
+ "acc_stderr": 0.03874371556587953,
381
+ "acc_norm": 0.5481927710843374,
382
+ "acc_norm_stderr": 0.03874371556587953
383
+ },
384
+ "harness|hendrycksTest-world_religions|5": {
385
+ "acc": 0.8187134502923976,
386
+ "acc_stderr": 0.029547741687640038,
387
+ "acc_norm": 0.8187134502923976,
388
+ "acc_norm_stderr": 0.029547741687640038
389
+ },
390
+ "harness|truthfulqa:mc|0": {
391
+ "mc1": 0.379436964504284,
392
+ "mc1_stderr": 0.01698703926614299,
393
+ "mc2": 0.5593281467190233,
394
+ "mc2_stderr": 0.0156123862411416
395
+ },
396
+ "harness|winogrande|5": {
397
+ "acc": 0.7932123125493291,
398
+ "acc_stderr": 0.011382566829235803
399
+ },
400
+ "harness|gsm8k|5": {
401
+ "acc": 0.6171341925701289,
402
+ "acc_stderr": 0.013389223491820474
403
+ }
404
+ }
405
+
406
+ ```
407
+
408
  ## 🧩 Configuration
409
 
410
  ```yaml