luisrguerra commited on
Commit
59a655b
1 Parent(s): f148f53

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +53 -0
index.html CHANGED
@@ -29,6 +29,9 @@
29
 
30
  <body>
31
  <div><canvas id="radarChart" height="750"></canvas></div>
 
 
 
32
  <p>The MMLU (Massive Multitask Language Understanding) test is a benchmark that measures language understanding and performance on 57 tasks.</p>
33
  <p>MT-Bench: Benchmark test with questions prepared by the Chatbot Arena team. Uses GPT-4 to evaluate responses.</p>
34
  <p>GSM8K is a dataset of 8.5K high quality linguistically diverse grade school math word problems created by human problem writers. A bright middle school student should be able to solve every problem.</p>
@@ -458,6 +461,21 @@
458
  organization: 'xAI',
459
  license: 'Proprietary',
460
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  {
462
  name: 'Yi 34B',
463
  mmlu: 73.5,
@@ -616,6 +634,41 @@
616
  data: data,
617
  options: options
618
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
  </script>
620
  </body>
621
  </html>
 
29
 
30
  <body>
31
  <div><canvas id="radarChart" height="750"></canvas></div>
32
+ <div><canvas id="mmluChart" height="200"></canvas></div>
33
+ <div><canvas id="gsm8kChart" height="200"></canvas></div>
34
+ <div><canvas id="arenaeloChart" height="200"></canvas></div>
35
  <p>The MMLU (Massive Multitask Language Understanding) test is a benchmark that measures language understanding and performance on 57 tasks.</p>
36
  <p>MT-Bench: Benchmark test with questions prepared by the Chatbot Arena team. Uses GPT-4 to evaluate responses.</p>
37
  <p>GSM8K is a dataset of 8.5K high quality linguistically diverse grade school math word problems created by human problem writers. A bright middle school student should be able to solve every problem.</p>
 
461
  organization: 'xAI',
462
  license: 'Proprietary',
463
  },
464
+ {
465
+ name: 'DBRX Instruct',
466
+ mmlu: 73.7,
467
+ mtbench: null,
468
+ arenaelo:null,
469
+ gsm8k: 66.9,
470
+ winogrande: 81.8,
471
+ truthfulqa: 66.9,
472
+ hellaswag:89.0,
473
+ arc:68.9,
474
+ nothallucination: null,
475
+ parameters: null,
476
+ organization: 'Databricks',
477
+ license: 'Databricks Open Model',
478
+ },
479
  {
480
  name: 'Yi 34B',
481
  mmlu: 73.5,
 
634
  data: data,
635
  options: options
636
  });
637
+
638
+
639
+ function updateChart(id,benchmarkName){
640
+ function sortBenchmarkData(benchmarkName){
641
+ return benchmarkData.sort((a, b) => b[benchmarkName] - a[benchmarkName]);
642
+ }
643
+ function removeItemsNull(data,benchmarkName){
644
+ return data.filter(item => item[benchmarkName] !== null);
645
+ }
646
+ benchmarkData2 = removeItemsNull(sortBenchmarkData(benchmarkName),benchmarkName);
647
+ function getLabelSetMlluChart(data){
648
+ return data.map(item => item.name);
649
+ }
650
+ function getDataSetMlluChart(data){
651
+ return data.map(item => item[benchmarkName]);
652
+ }
653
+ let element = document.getElementById(id).getContext('2d');
654
+ new Chart(element, {
655
+ type: 'bar',
656
+ data: {
657
+ labels: getLabelSetMlluChart(benchmarkData2),
658
+ datasets: [{
659
+ label: benchmarkName,
660
+ data: getDataSetMlluChart(benchmarkData2)
661
+ }]
662
+ },
663
+ options: {
664
+ maintainAspectRatio: false
665
+ }
666
+ });
667
+ }
668
+ updateChart('mmluChart','mmlu');
669
+ updateChart('gsm8kChart','gsm8k');
670
+ updateChart('arenaeloChart','arenaelo');
671
+
672
  </script>
673
  </body>
674
  </html>