guerra-llm-ai-leaderboard

Running

App Files Files Community

guerra-llm-ai-leaderboard / index.html

luisrguerra

Update index.html

dc54056 verified 5 months ago

raw

history blame

28.8 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Guerra LLM Ranking</title>
	<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>

	</head>

	<style>
	body{
	font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
	color:hsl(0, 0%, 25%);
	}
	table{
	width: 100%;
	}
	table, th, td {
	border: 1px solid;
	border-color: hsl(0, 0%, 60%);
	border-collapse: collapse;
	}
	th, td {
	padding: 6px;
	text-align: left;
	}
	</style>

	<body>
	<div><canvas id="radarChart" height="750"></canvas></div>
	<div><canvas id="mmluChart" height="150"></canvas></div>
	<div><canvas id="gsm8kChart" height="150"></canvas></div>
	<div><canvas id="arenaeloChart" height="150"></canvas></div>
	<div><canvas id="nothallucinationChart" height="150"></canvas></div>
	<div><canvas id="truthfulqaChart" height="150"></canvas></div>
	<div><canvas id="hellaSwagChart" height="150"></canvas></div>
	<div><canvas id="winograndeChart" height="150"></canvas></div>
	<div><canvas id="arcChart" height="150"></canvas></div>
	<div><canvas id="mtbenchChart" height="150"></canvas></div>
	<div><canvas id="alpacaevalChart" height="150"></canvas></div>
	<p>The MMLU (Massive Multitask Language Understanding) test is a benchmark that measures language understanding and performance on 57 tasks.</p>
	<p>MT-Bench: Benchmark test with questions prepared by the Chatbot Arena team. Uses GPT-4 to evaluate responses.</p>
	<p>GSM8K is a dataset of 8.5K high quality linguistically diverse grade school math word problems created by human problem writers. A bright middle school student should be able to solve every problem.</p>
	<p>Vectara's Hallucination Evaluation Model. This evaluates how often an LLM introduces hallucinations when summarizing a document.</p>
	<div id="tableBenchMark"></div>
	<h4>Best models for solving math problems:</h4>
	<ul>
	<li>gpt-4o-2024-05-13</li>
	<li>gpt-4-Turbo-2024-04-09</li>
	<li>gpt-4-0125-preview (turbo)</li>
	<li>gpt-4-1106-preview (turbo)</li>
	<li>gpt-4-0613</li>
	<li>gpt-4-0314</li>
	<li>Gemini Ultra 1.0</li>
	<li>Gemini Pro 1.5</li>
	<li>Gemini Advanced</li>
	<li>Claude 3 Opus</li>
	<li>Claude 3 Sonnet</li>
	</ul>
	<h4>Best models for large text:</h4>
	<ul>
	<li>gpt-4o-2024-05-13</li>
	<li>gpt-4-Turbo-2024-04-09</li>
	<li>gpt-4-0125-preview (turbo)</li>
	<li>gpt-4-1106-preview (turbo)</li>
	<li>Gemini Ultra 1.0</li>
	<li>Gemini Pro 1.5</li>
	<li>Gemini Advanced</li>
	<li>Claude 3 Opus</li>
	<li>Claude 3 Sonnet</li>
	<li>Claude 3 Haiku</li>
	<li>Claude 2-2.1</li>
	<li>Claude Instant 1-1.2</li>
	</ul>
	<h4>Models with the best cost benefit:</h4>
	<ul>
	<li>gpt-4o-2024-05-13</li>
	<li>Gemini Pro 1.5</li>
	<li>gpt-3.5-turbo-0125</li>
	<li>gpt-3.5-turbo-0613</li>
	<li>Claude 3 Haiku</li>
	<li>Meta Llama 3 70B Instruct</li>
	</ul>
	<h4>Models with fewer hallucinations:</h4>
	<ul>
	<li>gpt-4o-2024-05-13</li>
	<li>gpt-4-0125-preview (turbo)</li>
	<li>gpt-4-1106-preview (turbo)</li>
	<li>gpt-4-0613</li>
	<li>gpt-4-0314</li>
	<li>Gemini Ultra 1.0</li>
	<li>Gemini Pro 1.5</li>
	<li>Claude 2.1</li>
	<li>Snowflake Arctic Instruct</li>
	<li>Intel Neural Chat 7B</li>
	</ul>
	<h4>Models with a high level of hallucinations:</h4>
	<ul>
	<li>Gemma 1-1.1 7B</li>
	<li>DBRX Instruct</li>
	<li>Microsoft Phi 2</li>
	<li>Mistral 7B</li>
	<li>Google Palm 2</li>
	<li>Mixtral 8x7B Instruct</li>
	<li>Yi 34B</li>
	</ul>
	<h4>Open Models:</h4>
	<ul>
	<li>Mixtral 8x7B Instruct</li>
	<li>Mistral 7B</li>
	<li>Phi-3</li>
	<li>Yi 34B</li>
	<li>Grok 1</li>
	<li>DBRX Instruct</li>
	<li>Llama 3 8-70B</li>
	<li>Gemma 2-7B</li>
	</ul>
	<h4>Can be trained in online service:</h4>
	<ul>
	<li>gpt-3.5-turbo-1106</li>
	<li>gpt-3.5-turbo-0613</li>
	<li>gpt-4-0613</li>
	</ul>
	<h4>Can be trained locally:</h4>
	<ul>
	<li>Llama 3 8-70B</li>
	<li>Mixtral 8x7B Instruct</li>
	<li>Yi 34B</li>
	</ul>
	<h4>Has widely available api service:</h4>
	<ul>
	<li>gpt-4-0125-preview (turbo) - OpenAI</li>
	<li>gpt-4-1106-preview (turbo) - OpenAI</li>
	<li>gpt-4-0613 - OpenAI</li>
	<li>gpt-4-0314 - OpenAI</li>
	<li>gpt-3.5-turbo-1106 - OpenAI</li>
	<li>gpt-4-0314 - OpenAI</li>
	<li>Gemini Pro 1.0-1.5 - Openrouter with compatibility with OpenAI api, Google api service.</li>
	<li>Claude 3 - Openrouter with compatibility with OpenAI api, Anthropic api service.</li>
	<li>Claude 2-2.1 - Openrouter with compatibility with OpenAI api, Anthropic api service.</li>
	<li>Claude Instant 1-1.2 - Openrouter with compatibility with OpenAI api, Anthropic api service.</li>
	<li>Mistral Medium - Openrouter with compatibility with OpenAI api, Mistral service has a waiting list.</li>
	<li>Mixtral 8x7B Instruct - Deepinfra with compatibility with OpenAI api.</li>
	<li>Yi 34B - Deepinfra with compatibility with OpenAI api.</li>
	</ul>
	<h4>Models with the same level of GPT-4 Turbo:</h4>
	<ul>
	<li>Claude 3 Opus</li>
	</ul>
	<h4>Models with the same level of GPT-4 but lower than GPT-4 Turbo:</h4>
	<ul>
	<li>Gemini Ultra 1.0</li>
	<li>Gemini Pro 1.5</li>
	<li>Gemini Advanced</li>
	<li>Gemini Pro (Bard/Online)</li>
	<li>Claude 3 Sonnet</li>
	</ul>
	<h4>Models with the same level or better than GPT-3.5 but lower than GPT-4:</h4>
	<ul>
	<li>Claude 3 Haiku</li>
	<li>Claude 2-2.1</li>
	<li>Claude 1</li>
	<li>Claude Instant 1-1.2</li>
	<li>Phi-3 Medium</li>
	<li>Llama 3 70B Instruct</li>
	<li>Gemini-1.5-Flash-API-0514</li>
	<li>Command R+</li>
	</ul>
	<h4>Versions of models already surpassed by fine-tune, new versions or new architectures:</h4>
	<ul>
	<li>gpt-4-0613</li>
	<li>gpt-4-0314</li>
	<li>Gemini Pro 1.0</li>
	<li>Grok 1</li>
	<li>Phi-2</li>
	<li>DBRX Instruct</li>
	<li>Mistral Medium</li>
	<li>Gemma 1.0 7B</li>
	<li>Zephyr-ORPO-141b-A35b-v0.1</li>
	<li>Yi 1.0 34B</li>
	<li>gpt-4-0613</li>
	<li>gpt-4-0314</li>
	<li>Claude 2-2.1</li>
	<li>Claude Instant 1-1.2</li>
	<li>Qwen 1.0</li>
	<li>Falcon 180B</li>
	<li>Llama 1 and Llama 2</li>
	<li>Guanaco 65B</li>
	<li>Palm 2 Chat Bison</li>
	<li>Dolly V2</li>
	<li>Alpaca</li>
	<li>CodeLlama-34b-Instruct-hf</li>
	<li>SOLAR-10.7B-Instruct-v1.0</li>
	<li>Mistral-7B-v0.2</li>
	<li>Mistral-7B-v0.1</li>
	<li>MythoMax-L2</li>
	<li>Zephyr 7B Alpha and Beta</li>
	<li>Airoboros 70b</li>
	<li>OpenChat-3.5-1210</li>
	<li>StableLM Tuned Alpha</li>
	<li>Stable Beluga 2</li>
	</ul>
	<h4>Best OpenAI Models:</h4>
	<ul>
	<li>gpt-4o-2024-05-13</li>
	<li>gpt-4-Turbo-2024-04-09</li>
	<li>gpt-4-0125-preview (turbo)</li>
	<li>gpt-4-1106-preview (turbo)</li>
	<li>gpt-3.5-turbo-0613</li>
	<li>gpt-3.5-turbo-0125</li>
	</ul>
	<h4>API services:</h4>
	<ul>
	<li>Openrouter</li>
	<li>OpenAI</li>
	<li>Google Cloud</li>
	<li>Anthropic</li>
	<li>Azure</li>
	<li>Deepinfra</li>
	<li>Together</li>
	<li>OctoAI</li>
	<li>Lepton</li>
	<li>Fireworks</li>
	<li>Perplexity</li>
	<li>Groq</li>
	<li>Mistral</li>
	<li>NovitaAI</li>
	<li>Cohere</li>
	<li>DeepSeek</li>
	</ul>


	<script>
	const benchmarkData = [
	{
	name: 'GPT-4o-2024-05-13',
	mmlu: 88.7,
	mtbench: null,
	arenaelo:1287,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: 57.5,
	parameters: 'Unkonwn',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'gpt-4-0125-preview (turbo)',
	mmlu: null,
	mtbench: null,
	arenaelo:1249,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: 'Probably smaller than GPT-4',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'gpt-4-1106-preview (turbo)',
	mmlu: null,
	mtbench: 9.32,
	arenaelo:1252,
	gsm8k: 95.3,
	winogrande: 81.8,
	truthfulqa: 75.7,
	hellaswag:92.7,
	arc:94.2,
	nothallucination: 97.0,
	alpacaeval: 50,
	parameters: 'Probably smaller than GPT-4',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'gpt-4-0613',
	mmlu: null,
	mtbench: 9.18,
	arenaelo:1160,
	gsm8k: 96.8,
	winogrande: 87.1,
	truthfulqa: 79.7,
	hellaswag:91.9,
	arc:94.6,
	nothallucination: 97.0,
	alpacaeval: 30.2,
	parameters: '1T (questionable)',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'gpt-4-0314',
	mmlu: 86.4,
	mtbench: 8.96,
	arenaelo:1185,
	gsm8k: 92,
	winogrande: 87.5,
	truthfulqa: 59,
	hellaswag:95.4,
	arc:96.3,
	nothallucination: 97.0,
	alpacaeval: 35.3,
	parameters: '1T (questionable)',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'gpt-3.5-turbo-0613',
	mmlu: null,
	mtbench: 8.39,
	arenaelo:1115,
	gsm8k: null,
	winogrande: 55.3,
	truthfulqa: 61.4,
	hellaswag:79.4,
	arc:81.7,
	nothallucination: 96.5,
	alpacaeval: 22.7,
	parameters: '20B - 175B (not confirmed)',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'gpt-3.5-turbo-0301',
	mmlu: 70,
	mtbench: 7.94,
	arenaelo:1103,
	gsm8k: 57.1,
	winogrande: 81.6,
	truthfulqa: 47,
	hellaswag:85.5,
	arc:85.2,
	nothallucination: 96.5,
	alpacaeval: 18.1,
	parameters: '20B - 175B (not confirmed)',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'gpt-3.5-turbo-1106',
	mmlu: null,
	mtbench: 8.32,
	arenaelo:1069,
	gsm8k: null,
	winogrande: 54,
	truthfulqa: 60.7,
	hellaswag:60.8,
	arc:79.1,
	nothallucination: 96.5,
	alpacaeval: 19.3,
	parameters: '20B - 175B (not confirmed)',
	organization: 'OpenAI',
	license: 'Proprietary',
	},
	{
	name: 'Claude 3 Opus',
	mmlu: 86.8,
	mtbench: null,
	arenaelo:1249,
	gsm8k: 95.0,
	winogrande: null,
	truthfulqa: null,
	hellaswag:95.4,
	arc:96.4,
	nothallucination: 92.6,
	alpacaeval: 40.4,
	parameters: null,
	organization: 'Anthropic',
	license: 'Proprietary',
	},
	{
	name: 'Claude 3 Sonnet',
	mmlu: 79.0,
	mtbench: null,
	arenaelo:1200,
	gsm8k: 92.3,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:89.0,
	nothallucination: 94,
	alpacaeval: 34.9,
	parameters: null,
	organization: 'Anthropic',
	license: 'Proprietary',
	},
	{
	name: 'Claude 3 Haiku',
	mmlu: 75.2,
	mtbench: null,
	arenaelo:1177,
	gsm8k: 88.9,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:85.9,
	nothallucination: 92.4,
	alpacaeval: null,
	parameters: null,
	organization: 'Anthropic',
	license: 'Proprietary',
	},
	{
	name: 'Claude 2.1',
	mmlu: null,
	mtbench: 8.18,
	arenaelo:1116,
	gsm8k: 88,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: 91.5,
	alpacaeval: 25.3,
	parameters: '137B',
	organization: 'Anthropic',
	license: 'Proprietary',
	},
	{
	name: 'Claude 2.0',
	mmlu: 78.5,
	mtbench: 8.06,
	arenaelo:1127,
	gsm8k: 71.2,
	winogrande: null,
	truthfulqa: 69,
	hellaswag:null,
	arc:91,
	nothallucination: 91.5,
	alpacaeval: 28.2,
	parameters: '137B',
	organization: 'Anthropic',
	license: 'Proprietary',
	},
	{
	name: 'Claude 1.0',
	mmlu: 77,
	mtbench: 7.9,
	arenaelo:1146,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: 27.3,
	parameters: null,
	organization: 'Anthropic',
	license: 'Proprietary',
	},
	{
	name: 'Claude Instant 1',
	mmlu: 73.4,
	mtbench: 7.85,
	arenaelo:1105,
	gsm8k: 86.7,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Anthropic',
	license: 'Proprietary',
	},
	{
	name: 'Gemini-Advanced-0514',
	mmlu: null,
	mtbench: null,
	arenaelo:1267,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Google',
	license: 'Proprietary',
	},
	{
	name: 'Gemini-1.5-Flash-API-0514',
	mmlu: 78.9,
	mtbench: null,
	arenaelo:1230,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Google',
	license: 'Proprietary',
	},
	{
	name: 'Gemini-1.5-Pro-API-0514',
	mmlu: 85.9,
	mtbench: null,
	arenaelo:1265,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Google',
	license: 'Proprietary',
	},
	{
	name: 'Gemini-1.5-Pro-API-0409-Preview',
	mmlu: 81.9,
	mtbench: null,
	arenaelo:1258,
	gsm8k: 91.7,
	winogrande: null,
	truthfulqa: null,
	hellaswag:92.5,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Google',
	license: 'Proprietary',
	},
	{
	name: 'Gemini Ultra',
	mmlu: 83.7,
	mtbench: null,
	arenaelo:null,
	gsm8k: 88.9,
	winogrande: null,
	truthfulqa: null,
	hellaswag:87.8,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Google',
	license: 'Proprietary',
	},
	{
	name: 'Gemini Pro Online',
	mmlu: null,
	mtbench: null,
	arenaelo:1204,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Google',
	license: 'Proprietary',
	},
	{
	name: 'Gemini Pro',
	mmlu: 71.8,
	mtbench: null,
	arenaelo:1127,
	gsm8k: 77.9,
	winogrande: null,
	truthfulqa: null,
	hellaswag:84.7,
	arc:null,
	nothallucination: 95.2,
	alpacaeval: 24.4,
	parameters: null,
	organization: 'Google',
	license: 'Proprietary',
	},
	{
	name: 'Mistral Large',
	mmlu: 81.2,
	mtbench: null,
	arenaelo:1157,
	gsm8k: null,
	winogrande: 86.7,
	truthfulqa: 50.5,
	hellaswag:89.2,
	arc:94.2,
	nothallucination: null,
	alpacaeval: 32.7,
	parameters: null,
	organization: 'Mistral',
	license: 'Proprietary',
	},
	{
	name: 'Mistral Medium',
	mmlu: 75.3,
	mtbench: 8.61,
	arenaelo:1146,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: 28.6,
	parameters: null,
	organization: 'Mistral',
	license: 'Proprietary',
	},
	/*{
	name: 'Mixtral 8x7B Instruct',
	mmlu: 70.6,
	mtbench: 8.3,
	arenaelo:1114,
	gsm8k: 58.4,
	winogrande: 81.2,
	truthfulqa: 46.7,
	hellaswag:86.7,
	arc:70.14,
	nothallucination: 90.7,
	alpacaeval: 23.7,
	parameters: '45B (MOE)',
	organization: 'Mistral',
	license: 'Apache 2.0',
	},*/
	/*{
	name: 'Grok 1',
	mmlu: 73,
	mtbench: null,
	arenaelo:null,
	gsm8k: 72.9,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: "33B",
	organization: 'xAI',
	license: 'Proprietary',
	},*/
	/*{
	name: 'DBRX Instruct',
	mmlu: 73.7,
	mtbench: null,
	arenaelo:null,
	gsm8k: 66.9,
	winogrande: 81.8,
	truthfulqa: 66.9,
	hellaswag:89.0,
	arc:68.9,
	nothallucination: null,
	alpacaeval: null,
	parameters: null,
	organization: 'Databricks',
	license: 'Databricks Open Model',
	},*/
	/*{
	name: 'Yi 34B',
	mmlu: 73.5,
	mtbench: null,
	arenaelo:1100,
	gsm8k: 50.64,
	winogrande: 83.03,
	truthfulqa: 56.23,
	hellaswag:85.69,
	arc:64.59,
	nothallucination: null,
	alpacaeval: 27.2,
	parameters: '34B',
	organization: '01 AI',
	license: 'Yi License',
	},*/
	/*{
	name: 'PPLX 70B Online',
	mmlu: null,
	mtbench: null,
	arenaelo:1073,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: null,
	alpacaeval: null,
	parameters: '70B',
	organization: 'Perplexity AI',
	license: 'Proprietary',
	},*/
	{
	name: 'Meta Llama 3 70B Instruct',
	mmlu: 80.06,
	mtbench: null,
	arenaelo:1207,
	gsm8k: 85.44,
	winogrande: 82.87,
	truthfulqa: 61.81,
	hellaswag:85.69,
	arc:71.42,
	nothallucination: 95.5,
	alpacaeval: 34.4,
	parameters: '70B',
	organization: 'Meta',
	license: 'Open Model',
	},
	/*{
	name: 'Llama 70B Chat',
	mmlu: 63,
	mtbench: 6.86,
	arenaelo:1079,
	gsm8k: null,
	winogrande: null,
	truthfulqa: null,
	hellaswag:null,
	arc:null,
	nothallucination: 94.9,
	alpacaeval: null,
	parameters: '70B',
	organization: 'Meta',
	license: 'Open Model',
	},*/
	]

	function setBenchmarkTable(data) {
	let tableHTML = '<table border="1">' +
	'<tr>' +
	'<th>Name</th>' +
	'<th>MMLU</th>' +
	'<th>MT-Bench</th>' +
	'<th>Arena Elo</th>' +
	'<th>GSM8k</th>' +
	'<th>Winogrande</th>' +
	'<th>TruthfulQA</th>' +
	'<th>HellaSwag</th>' +
	'<th>ARC</th>' +
	'<th>AlpacaEval</th>' +
	'<th>Not hallucination</th>' +
	'<th>Parameters</th>' +
	'<th>Organization</th>' +
	'<th>License</th>' +
	'</tr>';

	data.forEach(function(item) {
	tableHTML += '<tr>' +
	'<td>' + item.name + '</td>' +
	'<td>' + item.mmlu + '</td>' +
	'<td>' + item.mtbench + '</td>' +
	'<td>' + item.arenaelo + '</td>' +
	'<td>' + item.gsm8k + '</td>' +
	'<td>' + item.winogrande + '</td>' +
	'<td>' + item.truthfulqa + '</td>' +
	'<td>' + item.hellaswag + '</td>' +
	'<td>' + item.arc + '</td>' +
	'<td>' + item.alpacaeval + '%'+ '</td>' +
	'<td>' + item.nothallucination + '%'+ '</td>' +
	'<td>' + item.parameters + '</td>' +
	'<td>' + item.organization + '</td>' +
	'<td>' + item.license + '</td>' +
	'</tr>';
	});

	tableHTML += '</table>';
	document.getElementById('tableBenchMark').innerHTML = tableHTML;
	}

	setBenchmarkTable(benchmarkData);

	function getBenchmarkMaxValue(benchmarkName,data) {
	let maxValue = 0;
	for (let i = 0; i < data.length; i++) {
	if (data[i][benchmarkName] > maxValue) {
	maxValue = data[i][benchmarkName];
	}
	}
	return maxValue;

	}

	function getDataSetRadar(data) {
	const mmluMaxValue = getBenchmarkMaxValue("mmlu",data);
	const mmluMultiplier = 100/mmluMaxValue;
	const arenaeloMaxValue = getBenchmarkMaxValue("arenaelo",data);
	const arenaeloMultiplier = 100/arenaeloMaxValue;
	const gsm8kMaxValue = getBenchmarkMaxValue("gsm8k",data);
	const gsm8kMultiplier = 100/gsm8kMaxValue;
	const winograndeMaxValue = getBenchmarkMaxValue("winogrande",data);
	const winograndeMultiplier = 100/winograndeMaxValue;
	const truthfulqaMaxValue = getBenchmarkMaxValue("truthfulqa",data);
	const truthfulqaMultiplier = 100/truthfulqaMaxValue;
	const hellaswagMaxValue = getBenchmarkMaxValue("hellaswag",data);
	const hellaswagMultiplier = 100/hellaswagMaxValue;
	const arcMaxValue = getBenchmarkMaxValue("arc",data);
	const arcMultiplier = 100/arcMaxValue;
	const alpacaevalMaxValue = getBenchmarkMaxValue("alpacaeval",data);
	const alpacaevalMultiplier = 100/alpacaevalMaxValue;
	const notHallucinationMaxValue = getBenchmarkMaxValue("nothallucination",data);
	const notHallucinationMultiplier = 100/notHallucinationMaxValue;
	let dataset = [];
	for (let i = 0; i < data.length; i++) {
	dataset.push({
	label: data[i].name,
	data: [
	(data[i].mmlu*mmluMultiplier),
	(data[i].arenaelo*arenaeloMultiplier),
	(data[i].gsm8k*gsm8kMultiplier),
	(data[i].winogrande*winograndeMultiplier),
	(data[i].truthfulqa*truthfulqaMultiplier),
	(data[i].hellaswag*hellaswagMultiplier),
	(data[i].arc*arcMultiplier),
	(data[i].alpacaeval*alpacaevalMultiplier),
	(data[i].nothallucination*notHallucinationMultiplier)
	],
	borderWidth: 2
	})
	}
	return dataset;
	}
	const dataSetRadar = getDataSetRadar(benchmarkData);
	let data = {
	labels: ['MMLU','Arena Elo','GSM8k','Winogrande','TruthfulQA','HellaSwag','ARC','AlpacaEval','Not Hallucination'],
	datasets: getDataSetRadar(benchmarkData)
	};

	let options = {
	responsive: true,
	maintainAspectRatio: false,
	scale: {
	ticks: {
	stepSize: 10,
	}
	},
	};

	let ctx = document.getElementById('radarChart').getContext('2d');
	new Chart(ctx, {
	type: 'radar',
	data: data,
	options: options
	});


	function updateChart(id,benchmarkName){
	function sortBenchmarkData(benchmarkName){
	return benchmarkData.sort((a, b) => b[benchmarkName] - a[benchmarkName]);
	}
	function removeItemsNull(data,benchmarkName){
	return data.filter(item => item[benchmarkName] !== null);
	}
	benchmarkData2 = removeItemsNull(sortBenchmarkData(benchmarkName),benchmarkName);
	function getLabelSetMlluChart(data){
	return data.map(item => item.name);
	}
	function getDataSetMlluChart(data){
	return data.map(item => item[benchmarkName]);
	}
	let element = document.getElementById(id).getContext('2d');
	new Chart(element, {
	type: 'bar',
	data: {
	labels: getLabelSetMlluChart(benchmarkData2),
	datasets: [{
	label: benchmarkName,
	data: getDataSetMlluChart(benchmarkData2)
	}]
	},
	options: {
	maintainAspectRatio: false
	}
	});
	}
	updateChart('mmluChart','mmlu');
	updateChart('gsm8kChart','gsm8k');
	updateChart('arenaeloChart','arenaelo');
	updateChart('nothallucinationChart','nothallucination');
	updateChart('truthfulqaChart','truthfulqa');
	updateChart('hellaSwagChart','hellaswag');
	updateChart('winograndeChart','winogrande');
	updateChart('arcChart','arc');
	updateChart('mtbenchChart','mtbench');
	updateChart('alpacaevalChart','alpacaeval');

	</script>
	</body>
	</html>