luisrguerra
commited on
Commit
•
cc187e8
1
Parent(s):
04313bf
Update index.html
Browse files- index.html +47 -16
index.html
CHANGED
@@ -73,8 +73,7 @@
|
|
73 |
<li>gpt-3.5-turbo-0125</li>
|
74 |
<li>gpt-3.5-turbo-0613</li>
|
75 |
<li>Claude 3 Haiku</li>
|
76 |
-
<li>
|
77 |
-
<li>OpenChat</li>
|
78 |
</ul>
|
79 |
<h4>Models with fewer hallucinations:</h4>
|
80 |
<ul>
|
@@ -183,6 +182,22 @@
|
|
183 |
|
184 |
<script>
|
185 |
const benchmarkData = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
{
|
187 |
name: 'gpt-4-0125-preview (turbo)',
|
188 |
mmlu: null,
|
@@ -299,7 +314,7 @@
|
|
299 |
name: 'Claude 3 Opus',
|
300 |
mmlu: 86.8,
|
301 |
mtbench: null,
|
302 |
-
arenaelo:
|
303 |
gsm8k: 95.0,
|
304 |
winogrande: null,
|
305 |
truthfulqa: null,
|
@@ -503,7 +518,7 @@
|
|
503 |
organization: 'Mistral',
|
504 |
license: 'Proprietary',
|
505 |
},
|
506 |
-
{
|
507 |
name: 'Mixtral 8x7B Instruct',
|
508 |
mmlu: 70.6,
|
509 |
mtbench: 8.3,
|
@@ -518,8 +533,8 @@
|
|
518 |
parameters: '45B (MOE)',
|
519 |
organization: 'Mistral',
|
520 |
license: 'Apache 2.0',
|
521 |
-
}
|
522 |
-
{
|
523 |
name: 'Grok 1',
|
524 |
mmlu: 73,
|
525 |
mtbench: null,
|
@@ -534,8 +549,8 @@
|
|
534 |
parameters: "33B",
|
535 |
organization: 'xAI',
|
536 |
license: 'Proprietary',
|
537 |
-
}
|
538 |
-
{
|
539 |
name: 'DBRX Instruct',
|
540 |
mmlu: 73.7,
|
541 |
mtbench: null,
|
@@ -550,8 +565,8 @@
|
|
550 |
parameters: null,
|
551 |
organization: 'Databricks',
|
552 |
license: 'Databricks Open Model',
|
553 |
-
}
|
554 |
-
{
|
555 |
name: 'Yi 34B',
|
556 |
mmlu: 73.5,
|
557 |
mtbench: null,
|
@@ -566,8 +581,8 @@
|
|
566 |
parameters: '34B',
|
567 |
organization: '01 AI',
|
568 |
license: 'Yi License',
|
569 |
-
}
|
570 |
-
{
|
571 |
name: 'PPLX 70B Online',
|
572 |
mmlu: null,
|
573 |
mtbench: null,
|
@@ -582,8 +597,24 @@
|
|
582 |
parameters: '70B',
|
583 |
organization: 'Perplexity AI',
|
584 |
license: 'Proprietary',
|
585 |
-
}
|
586 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
587 |
name: 'Llama 70B Chat',
|
588 |
mmlu: 63,
|
589 |
mtbench: 6.86,
|
@@ -596,9 +627,9 @@
|
|
596 |
nothallucination: 94.9,
|
597 |
alpacaeval: null,
|
598 |
parameters: '70B',
|
599 |
-
organization: '
|
600 |
-
license: '
|
601 |
-
}
|
602 |
]
|
603 |
|
604 |
function setBenchmarkTable(data) {
|
|
|
73 |
<li>gpt-3.5-turbo-0125</li>
|
74 |
<li>gpt-3.5-turbo-0613</li>
|
75 |
<li>Claude 3 Haiku</li>
|
76 |
+
<li>Meta Llama 3 70B Instruct</li>
|
|
|
77 |
</ul>
|
78 |
<h4>Models with fewer hallucinations:</h4>
|
79 |
<ul>
|
|
|
182 |
|
183 |
<script>
|
184 |
const benchmarkData = [
|
185 |
+
{
|
186 |
+
name: 'GPT-4o-2024-05-13',
|
187 |
+
mmlu: 88.7,
|
188 |
+
mtbench: null,
|
189 |
+
arenaelo:1287,
|
190 |
+
gsm8k: null,
|
191 |
+
winogrande: null,
|
192 |
+
truthfulqa: null,
|
193 |
+
hellaswag:null,
|
194 |
+
arc:null,
|
195 |
+
nothallucination: null,
|
196 |
+
alpacaeval: 57.5,
|
197 |
+
parameters: 'Unkonwn',
|
198 |
+
organization: 'OpenAI',
|
199 |
+
license: 'Proprietary',
|
200 |
+
},
|
201 |
{
|
202 |
name: 'gpt-4-0125-preview (turbo)',
|
203 |
mmlu: null,
|
|
|
314 |
name: 'Claude 3 Opus',
|
315 |
mmlu: 86.8,
|
316 |
mtbench: null,
|
317 |
+
arenaelo:1249,
|
318 |
gsm8k: 95.0,
|
319 |
winogrande: null,
|
320 |
truthfulqa: null,
|
|
|
518 |
organization: 'Mistral',
|
519 |
license: 'Proprietary',
|
520 |
},
|
521 |
+
/*{
|
522 |
name: 'Mixtral 8x7B Instruct',
|
523 |
mmlu: 70.6,
|
524 |
mtbench: 8.3,
|
|
|
533 |
parameters: '45B (MOE)',
|
534 |
organization: 'Mistral',
|
535 |
license: 'Apache 2.0',
|
536 |
+
},*/
|
537 |
+
/*{
|
538 |
name: 'Grok 1',
|
539 |
mmlu: 73,
|
540 |
mtbench: null,
|
|
|
549 |
parameters: "33B",
|
550 |
organization: 'xAI',
|
551 |
license: 'Proprietary',
|
552 |
+
},*/
|
553 |
+
/*{
|
554 |
name: 'DBRX Instruct',
|
555 |
mmlu: 73.7,
|
556 |
mtbench: null,
|
|
|
565 |
parameters: null,
|
566 |
organization: 'Databricks',
|
567 |
license: 'Databricks Open Model',
|
568 |
+
},*/
|
569 |
+
/*{
|
570 |
name: 'Yi 34B',
|
571 |
mmlu: 73.5,
|
572 |
mtbench: null,
|
|
|
581 |
parameters: '34B',
|
582 |
organization: '01 AI',
|
583 |
license: 'Yi License',
|
584 |
+
},*/
|
585 |
+
/*{
|
586 |
name: 'PPLX 70B Online',
|
587 |
mmlu: null,
|
588 |
mtbench: null,
|
|
|
597 |
parameters: '70B',
|
598 |
organization: 'Perplexity AI',
|
599 |
license: 'Proprietary',
|
600 |
+
},*/
|
601 |
{
|
602 |
+
name: 'Meta Llama 3 70B Instruct',
|
603 |
+
mmlu: 80.06,
|
604 |
+
mtbench: null,
|
605 |
+
arenaelo:1207,
|
606 |
+
gsm8k: 85.44,
|
607 |
+
winogrande: 82.87,
|
608 |
+
truthfulqa: 61.81,
|
609 |
+
hellaswag:85.69,
|
610 |
+
arc:71.42,
|
611 |
+
nothallucination: 95.5,
|
612 |
+
alpacaeval: 34.4,
|
613 |
+
parameters: '70B',
|
614 |
+
organization: 'Meta',
|
615 |
+
license: 'Open Model',
|
616 |
+
},
|
617 |
+
/*{
|
618 |
name: 'Llama 70B Chat',
|
619 |
mmlu: 63,
|
620 |
mtbench: 6.86,
|
|
|
627 |
nothallucination: 94.9,
|
628 |
alpacaeval: null,
|
629 |
parameters: '70B',
|
630 |
+
organization: 'Meta',
|
631 |
+
license: 'Open Model',
|
632 |
+
},*/
|
633 |
]
|
634 |
|
635 |
function setBenchmarkTable(data) {
|