luisrguerra commited on
Commit
cc187e8
1 Parent(s): 04313bf

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +47 -16
index.html CHANGED
@@ -73,8 +73,7 @@
73
  <li>gpt-3.5-turbo-0125</li>
74
  <li>gpt-3.5-turbo-0613</li>
75
  <li>Claude 3 Haiku</li>
76
- <li>Mixtral 8x7B Instruct</li>
77
- <li>OpenChat</li>
78
  </ul>
79
  <h4>Models with fewer hallucinations:</h4>
80
  <ul>
@@ -183,6 +182,22 @@
183
 
184
  <script>
185
  const benchmarkData = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  {
187
  name: 'gpt-4-0125-preview (turbo)',
188
  mmlu: null,
@@ -299,7 +314,7 @@
299
  name: 'Claude 3 Opus',
300
  mmlu: 86.8,
301
  mtbench: null,
302
- arenaelo:1255,
303
  gsm8k: 95.0,
304
  winogrande: null,
305
  truthfulqa: null,
@@ -503,7 +518,7 @@
503
  organization: 'Mistral',
504
  license: 'Proprietary',
505
  },
506
- {
507
  name: 'Mixtral 8x7B Instruct',
508
  mmlu: 70.6,
509
  mtbench: 8.3,
@@ -518,8 +533,8 @@
518
  parameters: '45B (MOE)',
519
  organization: 'Mistral',
520
  license: 'Apache 2.0',
521
- },
522
- {
523
  name: 'Grok 1',
524
  mmlu: 73,
525
  mtbench: null,
@@ -534,8 +549,8 @@
534
  parameters: "33B",
535
  organization: 'xAI',
536
  license: 'Proprietary',
537
- },
538
- {
539
  name: 'DBRX Instruct',
540
  mmlu: 73.7,
541
  mtbench: null,
@@ -550,8 +565,8 @@
550
  parameters: null,
551
  organization: 'Databricks',
552
  license: 'Databricks Open Model',
553
- },
554
- {
555
  name: 'Yi 34B',
556
  mmlu: 73.5,
557
  mtbench: null,
@@ -566,8 +581,8 @@
566
  parameters: '34B',
567
  organization: '01 AI',
568
  license: 'Yi License',
569
- },
570
- {
571
  name: 'PPLX 70B Online',
572
  mmlu: null,
573
  mtbench: null,
@@ -582,8 +597,24 @@
582
  parameters: '70B',
583
  organization: 'Perplexity AI',
584
  license: 'Proprietary',
585
- },
586
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  name: 'Llama 70B Chat',
588
  mmlu: 63,
589
  mtbench: 6.86,
@@ -596,9 +627,9 @@
596
  nothallucination: 94.9,
597
  alpacaeval: null,
598
  parameters: '70B',
599
- organization: 'Perplexity AI',
600
- license: 'Proprietary',
601
- },
602
  ]
603
 
604
  function setBenchmarkTable(data) {
 
73
  <li>gpt-3.5-turbo-0125</li>
74
  <li>gpt-3.5-turbo-0613</li>
75
  <li>Claude 3 Haiku</li>
76
+ <li>Meta Llama 3 70B Instruct</li>
 
77
  </ul>
78
  <h4>Models with fewer hallucinations:</h4>
79
  <ul>
 
182
 
183
  <script>
184
  const benchmarkData = [
185
+ {
186
+ name: 'GPT-4o-2024-05-13',
187
+ mmlu: 88.7,
188
+ mtbench: null,
189
+ arenaelo:1287,
190
+ gsm8k: null,
191
+ winogrande: null,
192
+ truthfulqa: null,
193
+ hellaswag:null,
194
+ arc:null,
195
+ nothallucination: null,
196
+ alpacaeval: 57.5,
197
+ parameters: 'Unkonwn',
198
+ organization: 'OpenAI',
199
+ license: 'Proprietary',
200
+ },
201
  {
202
  name: 'gpt-4-0125-preview (turbo)',
203
  mmlu: null,
 
314
  name: 'Claude 3 Opus',
315
  mmlu: 86.8,
316
  mtbench: null,
317
+ arenaelo:1249,
318
  gsm8k: 95.0,
319
  winogrande: null,
320
  truthfulqa: null,
 
518
  organization: 'Mistral',
519
  license: 'Proprietary',
520
  },
521
+ /*{
522
  name: 'Mixtral 8x7B Instruct',
523
  mmlu: 70.6,
524
  mtbench: 8.3,
 
533
  parameters: '45B (MOE)',
534
  organization: 'Mistral',
535
  license: 'Apache 2.0',
536
+ },*/
537
+ /*{
538
  name: 'Grok 1',
539
  mmlu: 73,
540
  mtbench: null,
 
549
  parameters: "33B",
550
  organization: 'xAI',
551
  license: 'Proprietary',
552
+ },*/
553
+ /*{
554
  name: 'DBRX Instruct',
555
  mmlu: 73.7,
556
  mtbench: null,
 
565
  parameters: null,
566
  organization: 'Databricks',
567
  license: 'Databricks Open Model',
568
+ },*/
569
+ /*{
570
  name: 'Yi 34B',
571
  mmlu: 73.5,
572
  mtbench: null,
 
581
  parameters: '34B',
582
  organization: '01 AI',
583
  license: 'Yi License',
584
+ },*/
585
+ /*{
586
  name: 'PPLX 70B Online',
587
  mmlu: null,
588
  mtbench: null,
 
597
  parameters: '70B',
598
  organization: 'Perplexity AI',
599
  license: 'Proprietary',
600
+ },*/
601
  {
602
+ name: 'Meta Llama 3 70B Instruct',
603
+ mmlu: 80.06,
604
+ mtbench: null,
605
+ arenaelo:1207,
606
+ gsm8k: 85.44,
607
+ winogrande: 82.87,
608
+ truthfulqa: 61.81,
609
+ hellaswag:85.69,
610
+ arc:71.42,
611
+ nothallucination: 95.5,
612
+ alpacaeval: 34.4,
613
+ parameters: '70B',
614
+ organization: 'Meta',
615
+ license: 'Open Model',
616
+ },
617
+ /*{
618
  name: 'Llama 70B Chat',
619
  mmlu: 63,
620
  mtbench: 6.86,
 
627
  nothallucination: 94.9,
628
  alpacaeval: null,
629
  parameters: '70B',
630
+ organization: 'Meta',
631
+ license: 'Open Model',
632
+ },*/
633
  ]
634
 
635
  function setBenchmarkTable(data) {