luisrguerra
commited on
Commit
•
59a655b
1
Parent(s):
f148f53
Update index.html
Browse files- index.html +53 -0
index.html
CHANGED
@@ -29,6 +29,9 @@
|
|
29 |
|
30 |
<body>
|
31 |
<div><canvas id="radarChart" height="750"></canvas></div>
|
|
|
|
|
|
|
32 |
<p>The MMLU (Massive Multitask Language Understanding) test is a benchmark that measures language understanding and performance on 57 tasks.</p>
|
33 |
<p>MT-Bench: Benchmark test with questions prepared by the Chatbot Arena team. Uses GPT-4 to evaluate responses.</p>
|
34 |
<p>GSM8K is a dataset of 8.5K high quality linguistically diverse grade school math word problems created by human problem writers. A bright middle school student should be able to solve every problem.</p>
|
@@ -458,6 +461,21 @@
|
|
458 |
organization: 'xAI',
|
459 |
license: 'Proprietary',
|
460 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
{
|
462 |
name: 'Yi 34B',
|
463 |
mmlu: 73.5,
|
@@ -616,6 +634,41 @@
|
|
616 |
data: data,
|
617 |
options: options
|
618 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
</script>
|
620 |
</body>
|
621 |
</html>
|
|
|
29 |
|
30 |
<body>
|
31 |
<div><canvas id="radarChart" height="750"></canvas></div>
|
32 |
+
<div><canvas id="mmluChart" height="200"></canvas></div>
|
33 |
+
<div><canvas id="gsm8kChart" height="200"></canvas></div>
|
34 |
+
<div><canvas id="arenaeloChart" height="200"></canvas></div>
|
35 |
<p>The MMLU (Massive Multitask Language Understanding) test is a benchmark that measures language understanding and performance on 57 tasks.</p>
|
36 |
<p>MT-Bench: Benchmark test with questions prepared by the Chatbot Arena team. Uses GPT-4 to evaluate responses.</p>
|
37 |
<p>GSM8K is a dataset of 8.5K high quality linguistically diverse grade school math word problems created by human problem writers. A bright middle school student should be able to solve every problem.</p>
|
|
|
461 |
organization: 'xAI',
|
462 |
license: 'Proprietary',
|
463 |
},
|
464 |
+
{
|
465 |
+
name: 'DBRX Instruct',
|
466 |
+
mmlu: 73.7,
|
467 |
+
mtbench: null,
|
468 |
+
arenaelo:null,
|
469 |
+
gsm8k: 66.9,
|
470 |
+
winogrande: 81.8,
|
471 |
+
truthfulqa: 66.9,
|
472 |
+
hellaswag:89.0,
|
473 |
+
arc:68.9,
|
474 |
+
nothallucination: null,
|
475 |
+
parameters: null,
|
476 |
+
organization: 'Databricks',
|
477 |
+
license: 'Databricks Open Model',
|
478 |
+
},
|
479 |
{
|
480 |
name: 'Yi 34B',
|
481 |
mmlu: 73.5,
|
|
|
634 |
data: data,
|
635 |
options: options
|
636 |
});
|
637 |
+
|
638 |
+
|
639 |
+
function updateChart(id,benchmarkName){
|
640 |
+
function sortBenchmarkData(benchmarkName){
|
641 |
+
return benchmarkData.sort((a, b) => b[benchmarkName] - a[benchmarkName]);
|
642 |
+
}
|
643 |
+
function removeItemsNull(data,benchmarkName){
|
644 |
+
return data.filter(item => item[benchmarkName] !== null);
|
645 |
+
}
|
646 |
+
benchmarkData2 = removeItemsNull(sortBenchmarkData(benchmarkName),benchmarkName);
|
647 |
+
function getLabelSetMlluChart(data){
|
648 |
+
return data.map(item => item.name);
|
649 |
+
}
|
650 |
+
function getDataSetMlluChart(data){
|
651 |
+
return data.map(item => item[benchmarkName]);
|
652 |
+
}
|
653 |
+
let element = document.getElementById(id).getContext('2d');
|
654 |
+
new Chart(element, {
|
655 |
+
type: 'bar',
|
656 |
+
data: {
|
657 |
+
labels: getLabelSetMlluChart(benchmarkData2),
|
658 |
+
datasets: [{
|
659 |
+
label: benchmarkName,
|
660 |
+
data: getDataSetMlluChart(benchmarkData2)
|
661 |
+
}]
|
662 |
+
},
|
663 |
+
options: {
|
664 |
+
maintainAspectRatio: false
|
665 |
+
}
|
666 |
+
});
|
667 |
+
}
|
668 |
+
updateChart('mmluChart','mmlu');
|
669 |
+
updateChart('gsm8kChart','gsm8k');
|
670 |
+
updateChart('arenaeloChart','arenaelo');
|
671 |
+
|
672 |
</script>
|
673 |
</body>
|
674 |
</html>
|