luisrguerra commited on
Commit
3e2aedd
1 Parent(s): 2867ede

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +48 -0
index.html CHANGED
@@ -135,6 +135,7 @@
135
  truthfulqa: null,
136
  hellaswag:null,
137
  arc:null,
 
138
  parameters: 'Probably smaller than GPT-4',
139
  organization: 'OpenAI',
140
  license: 'Proprietary',
@@ -149,6 +150,7 @@
149
  truthfulqa: null,
150
  hellaswag:null,
151
  arc:null,
 
152
  parameters: '1T (questionable)',
153
  organization: 'OpenAI',
154
  license: 'Proprietary',
@@ -163,6 +165,7 @@
163
  truthfulqa: 59,
164
  hellaswag:95.4,
165
  arc:96.3,
 
166
  parameters: '1T (questionable)',
167
  organization: 'OpenAI',
168
  license: 'Proprietary',
@@ -177,6 +180,7 @@
177
  truthfulqa: null,
178
  hellaswag:null,
179
  arc:null,
 
180
  parameters: '20B - 175B (not confirmed)',
181
  organization: 'OpenAI',
182
  license: 'Proprietary',
@@ -191,6 +195,7 @@
191
  truthfulqa: 47,
192
  hellaswag:85.5,
193
  arc:85.2,
 
194
  parameters: '20B - 175B (not confirmed)',
195
  organization: 'OpenAI',
196
  license: 'Proprietary',
@@ -205,6 +210,7 @@
205
  truthfulqa: null,
206
  hellaswag:null,
207
  arc:null,
 
208
  parameters: '20B - 175B (not confirmed)',
209
  organization: 'OpenAI',
210
  license: 'Proprietary',
@@ -219,6 +225,7 @@
219
  truthfulqa: null,
220
  hellaswag:null,
221
  arc:null,
 
222
  parameters: '137B',
223
  organization: 'Anthropic',
224
  license: 'Proprietary',
@@ -233,6 +240,7 @@
233
  truthfulqa: 69,
234
  hellaswag:null,
235
  arc:91,
 
236
  parameters: '137B',
237
  organization: 'Anthropic',
238
  license: 'Proprietary',
@@ -247,6 +255,7 @@
247
  truthfulqa: null,
248
  hellaswag:null,
249
  arc:null,
 
250
  parameters: null,
251
  organization: 'Anthropic',
252
  license: 'Proprietary',
@@ -261,6 +270,7 @@
261
  truthfulqa: null,
262
  hellaswag:null,
263
  arc:null,
 
264
  parameters: null,
265
  organization: 'Anthropic',
266
  license: 'Proprietary',
@@ -275,6 +285,7 @@
275
  truthfulqa: null,
276
  hellaswag:87.8,
277
  arc:null,
 
278
  parameters: null,
279
  organization: 'Google',
280
  license: 'Proprietary',
@@ -289,6 +300,7 @@
289
  truthfulqa: null,
290
  hellaswag:84.7,
291
  arc:null,
 
292
  parameters: null,
293
  organization: 'Google',
294
  license: 'Proprietary',
@@ -303,6 +315,7 @@
303
  truthfulqa: null,
304
  hellaswag:null,
305
  arc:null,
 
306
  parameters: null,
307
  organization: 'Mistral',
308
  license: 'Proprietary',
@@ -317,6 +330,7 @@
317
  truthfulqa: 46.7,
318
  hellaswag:86.7,
319
  arc:70.14,
 
320
  parameters: '45B (MOE)',
321
  organization: 'Mistral',
322
  license: 'Apache 2.0',
@@ -331,6 +345,7 @@
331
  truthfulqa: null,
332
  hellaswag:null,
333
  arc:null,
 
334
  parameters: "33B",
335
  organization: 'xAI',
336
  license: 'Proprietary',
@@ -345,10 +360,41 @@
345
  truthfulqa: 56.23,
346
  hellaswag:85.69,
347
  arc:64.59,
 
348
  parameters: '34B',
349
  organization: '01 AI',
350
  license: 'Yi License',
351
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  ]
353
 
354
  function setBenchmarkTable(data) {
@@ -363,6 +409,7 @@
363
  '<th>TruthfulQA</th>' +
364
  '<th>HellaSwag</th>' +
365
  '<th>ARC</th>' +
 
366
  '<th>Parameters</th>' +
367
  '<th>Organization</th>' +
368
  '<th>License</th>' +
@@ -379,6 +426,7 @@
379
  '<td>' + item.truthfulqa + '</td>' +
380
  '<td>' + item.hellaswag + '</td>' +
381
  '<td>' + item.arc + '</td>' +
 
382
  '<td>' + item.parameters + '</td>' +
383
  '<td>' + item.organization + '</td>' +
384
  '<td>' + item.license + '</td>' +
 
135
  truthfulqa: null,
136
  hellaswag:null,
137
  arc:null,
138
+ nothallucination: 97.0,
139
  parameters: 'Probably smaller than GPT-4',
140
  organization: 'OpenAI',
141
  license: 'Proprietary',
 
150
  truthfulqa: null,
151
  hellaswag:null,
152
  arc:null,
153
+ nothallucination: 97.0,
154
  parameters: '1T (questionable)',
155
  organization: 'OpenAI',
156
  license: 'Proprietary',
 
165
  truthfulqa: 59,
166
  hellaswag:95.4,
167
  arc:96.3,
168
+ nothallucination: 97.0,
169
  parameters: '1T (questionable)',
170
  organization: 'OpenAI',
171
  license: 'Proprietary',
 
180
  truthfulqa: null,
181
  hellaswag:null,
182
  arc:null,
183
+ nothallucination: 96.5,
184
  parameters: '20B - 175B (not confirmed)',
185
  organization: 'OpenAI',
186
  license: 'Proprietary',
 
195
  truthfulqa: 47,
196
  hellaswag:85.5,
197
  arc:85.2,
198
+ nothallucination: 96.5,
199
  parameters: '20B - 175B (not confirmed)',
200
  organization: 'OpenAI',
201
  license: 'Proprietary',
 
210
  truthfulqa: null,
211
  hellaswag:null,
212
  arc:null,
213
+ nothallucination: 96.5,
214
  parameters: '20B - 175B (not confirmed)',
215
  organization: 'OpenAI',
216
  license: 'Proprietary',
 
225
  truthfulqa: null,
226
  hellaswag:null,
227
  arc:null,
228
+ nothallucination: 91.5,
229
  parameters: '137B',
230
  organization: 'Anthropic',
231
  license: 'Proprietary',
 
240
  truthfulqa: 69,
241
  hellaswag:null,
242
  arc:91,
243
+ nothallucination: 91.5,
244
  parameters: '137B',
245
  organization: 'Anthropic',
246
  license: 'Proprietary',
 
255
  truthfulqa: null,
256
  hellaswag:null,
257
  arc:null,
258
+ nothallucination: null,
259
  parameters: null,
260
  organization: 'Anthropic',
261
  license: 'Proprietary',
 
270
  truthfulqa: null,
271
  hellaswag:null,
272
  arc:null,
273
+ nothallucination: null,
274
  parameters: null,
275
  organization: 'Anthropic',
276
  license: 'Proprietary',
 
285
  truthfulqa: null,
286
  hellaswag:87.8,
287
  arc:null,
288
+ nothallucination: null,
289
  parameters: null,
290
  organization: 'Google',
291
  license: 'Proprietary',
 
300
  truthfulqa: null,
301
  hellaswag:84.7,
302
  arc:null,
303
+ nothallucination: 95.2,
304
  parameters: null,
305
  organization: 'Google',
306
  license: 'Proprietary',
 
315
  truthfulqa: null,
316
  hellaswag:null,
317
  arc:null,
318
+ nothallucination: null,
319
  parameters: null,
320
  organization: 'Mistral',
321
  license: 'Proprietary',
 
330
  truthfulqa: 46.7,
331
  hellaswag:86.7,
332
  arc:70.14,
333
+ nothallucination: 90.7,
334
  parameters: '45B (MOE)',
335
  organization: 'Mistral',
336
  license: 'Apache 2.0',
 
345
  truthfulqa: null,
346
  hellaswag:null,
347
  arc:null,
348
+ nothallucination: null,
349
  parameters: "33B",
350
  organization: 'xAI',
351
  license: 'Proprietary',
 
360
  truthfulqa: 56.23,
361
  hellaswag:85.69,
362
  arc:64.59,
363
+ nothallucination: null,
364
  parameters: '34B',
365
  organization: '01 AI',
366
  license: 'Yi License',
367
  },
368
+ {
369
+ name: 'PPLX 70B Online',
370
+ mmlu: null,
371
+ mtbench: null,
372
+ arenaelo:1073,
373
+ gsm8k: null,
374
+ winogrande: null,
375
+ truthfulqa: null,
376
+ hellaswag:null,
377
+ arc:null,
378
+ nothallucination: null,
379
+ parameters: '70B',
380
+ organization: 'Perplexity AI',
381
+ license: 'Proprietary',
382
+ },
383
+ {
384
+ name: 'Llama 70B Chat',
385
+ mmlu: 63,
386
+ mtbench: 6.86,
387
+ arenaelo:1079,
388
+ gsm8k: null,
389
+ winogrande: null,
390
+ truthfulqa: null,
391
+ hellaswag:null,
392
+ arc:null,
393
+ nothallucination: 94.9,
394
+ parameters: '70B',
395
+ organization: 'Perplexity AI',
396
+ license: 'Proprietary',
397
+ },
398
  ]
399
 
400
  function setBenchmarkTable(data) {
 
409
  '<th>TruthfulQA</th>' +
410
  '<th>HellaSwag</th>' +
411
  '<th>ARC</th>' +
412
+ '<th>Not hallucination</th>' +
413
  '<th>Parameters</th>' +
414
  '<th>Organization</th>' +
415
  '<th>License</th>' +
 
426
  '<td>' + item.truthfulqa + '</td>' +
427
  '<td>' + item.hellaswag + '</td>' +
428
  '<td>' + item.arc + '</td>' +
429
+ '<td>' + item.nothallucination + '%'+ '</td>' +
430
  '<td>' + item.parameters + '</td>' +
431
  '<td>' + item.organization + '</td>' +
432
  '<td>' + item.license + '</td>' +