diff --git a/README.md b/README.md index 4cf5ef50f0abf860e32fab6144f0f2c46f260543..9b54a929f382375c5a10221cb1d003e7b510229c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,15 @@ --- -title: Chatbot Arena Leaderboard +title: LMSys Chatbot Arena Leaderboard emoji: πŸ†πŸ€– colorFrom: indigo colorTo: green sdk: gradio +sdk_version: 3.50.2 +app_file: app.py pinned: false license: apache-2.0 tags: -- leaderboard -sdk_version: 4.44.1 + - leaderboard --- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/app.py b/app.py index 0bf577082c918bc70852556a874c26f873888034..1fac9ac2b058e1df4c681ae5663a0ecc3427c12e 100644 --- a/app.py +++ b/app.py @@ -1,61 +1,451 @@ -from fastchat.serve.monitor.monitor import build_leaderboard_tab, build_basic_stats_tab, basic_component_values, leader_component_values -from fastchat.utils import build_logger, get_window_url_params_js - +"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" +import ast import argparse import glob -import re +import pickle + import gradio as gr +import numpy as np +import pandas as pd + + +# notebook_url = "https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing" +notebook_url = "https://colab.research.google.com/drive/1KdwokPjirkTmpO_P1WByFNFiqxWQquwH#scrollTo=o_CpbkGEbhrK" + + +basic_component_values = [None] * 6 +leader_component_values = [None] * 5 + + +def make_default_md(arena_df, elo_results): + total_votes = sum(arena_df["num_battles"]) // 2 + total_models = len(arena_df) + + leaderboard_md = f""" +# πŸ† LMSYS Chatbot Arena Leaderboard +| [Vote](https://chat.lmsys.org) | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) | + +LMSYS [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) is a crowdsourced open platform for LLM evals. +We've collected over **500,000** human preference votes to rank LLMs with the Elo ranking system. +""" + return leaderboard_md + + +def make_arena_leaderboard_md(arena_df): + total_votes = sum(arena_df["num_battles"]) // 2 + total_models = len(arena_df) + + leaderboard_md = f""" +Total #models: **{total_models}**. Total #votes: **{total_votes}**. Last updated: March 29, 2024. + +Contribute your vote πŸ—³οΈ at [chat.lmsys.org](https://chat.lmsys.org)! Find more analysis in the [notebook]({notebook_url}). +""" + return leaderboard_md + + +def make_full_leaderboard_md(elo_results): + leaderboard_md = f""" +Three benchmarks are displayed: **Arena Elo**, **MT-Bench** and **MMLU**. +- [Chatbot Arena](https://chat.lmsys.org/?arena) - a crowdsourced, randomized battle platform. We use 500K+ user votes to compute Elo ratings. +- [MT-Bench](https://arxiv.org/abs/2306.05685): a set of challenging multi-turn questions. We use GPT-4 to grade the model responses. +- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot): a test to measure a model's multitask accuracy on 57 tasks. + +πŸ’» Code: The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). +The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval). +Higher values are better for all benchmarks. Empty cells mean not available. +""" + return leaderboard_md + + +def make_leaderboard_md_live(elo_results): + leaderboard_md = f""" +# Leaderboard +Last updated: {elo_results["last_updated_datetime"]} +{elo_results["leaderboard_table"]} +""" + return leaderboard_md + + +def update_elo_components(max_num_files, elo_results_file): + log_files = get_log_files(max_num_files) + + # Leaderboard + if elo_results_file is None: # Do live update + battles = clean_battle_data(log_files) + elo_results = report_elo_analysis_results(battles) + + leader_component_values[0] = make_leaderboard_md_live(elo_results) + leader_component_values[1] = elo_results["win_fraction_heatmap"] + leader_component_values[2] = elo_results["battle_count_heatmap"] + leader_component_values[3] = elo_results["bootstrap_elo_rating"] + leader_component_values[4] = elo_results["average_win_rate_bar"] + + # Basic stats + basic_stats = report_basic_stats(log_files) + md0 = f"Last updated: {basic_stats['last_updated_datetime']}" + + md1 = "### Action Histogram\n" + md1 += basic_stats["action_hist_md"] + "\n" + + md2 = "### Anony. Vote Histogram\n" + md2 += basic_stats["anony_vote_hist_md"] + "\n" + + md3 = "### Model Call Histogram\n" + md3 += basic_stats["model_hist_md"] + "\n" + + md4 = "### Model Call (Last 24 Hours)\n" + md4 += basic_stats["num_chats_last_24_hours"] + "\n" + + basic_component_values[0] = md0 + basic_component_values[1] = basic_stats["chat_dates_bar"] + basic_component_values[2] = md1 + basic_component_values[3] = md2 + basic_component_values[4] = md3 + basic_component_values[5] = md4 + + +def update_worker(max_num_files, interval, elo_results_file): + while True: + tic = time.time() + update_elo_components(max_num_files, elo_results_file) + durtaion = time.time() - tic + print(f"update duration: {durtaion:.2f} s") + time.sleep(max(interval - durtaion, 0)) def load_demo(url_params, request: gr.Request): logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}") return basic_component_values + leader_component_values -def build_demo(elo_results_file, leaderboard_table_file): - from fastchat.serve.gradio_web_server import block_css - text_size = gr.themes.sizes.text_lg - # load theme from theme.json - theme = gr.themes.Default.load("theme.json") - # set text size to large - theme.text_size = text_size - theme.set( - button_large_text_size="40px", - button_small_text_size="40px", - button_large_text_weight="1000", - button_small_text_weight="1000", - button_shadow="*shadow_drop_lg", - button_shadow_hover="*shadow_drop_lg", - checkbox_label_shadow="*shadow_drop_lg", - button_shadow_active="*shadow_inset", - button_secondary_background_fill="*primary_300", - button_secondary_background_fill_dark="*primary_700", - button_secondary_background_fill_hover="*primary_200", - button_secondary_background_fill_hover_dark="*primary_500", - button_secondary_text_color="*primary_800", - button_secondary_text_color_dark="white", +def model_hyperlink(model_name, link): + return f'{model_name}' + + +def load_leaderboard_table_csv(filename, add_hyperlink=True): + lines = open(filename).readlines() + heads = [v.strip() for v in lines[0].split(",")] + rows = [] + for i in range(1, len(lines)): + row = [v.strip() for v in lines[i].split(",")] + for j in range(len(heads)): + item = {} + for h, v in zip(heads, row): + if h == "Arena Elo rating": + if v != "-": + v = int(ast.literal_eval(v)) + else: + v = np.nan + elif h == "MMLU": + if v != "-": + v = round(ast.literal_eval(v) * 100, 1) + else: + v = np.nan + elif h == "MT-bench (win rate %)": + if v != "-": + v = round(ast.literal_eval(v[:-1]), 1) + else: + v = np.nan + elif h == "MT-bench (score)": + if v != "-": + v = round(ast.literal_eval(v), 2) + else: + v = np.nan + item[h] = v + if add_hyperlink: + item["Model"] = model_hyperlink(item["Model"], item["Link"]) + rows.append(item) + + return rows + + +def build_basic_stats_tab(): + empty = "Loading ..." + basic_component_values[:] = [empty, None, empty, empty, empty, empty] + + md0 = gr.Markdown(empty) + gr.Markdown("#### Figure 1: Number of model calls and votes") + plot_1 = gr.Plot(show_label=False) + with gr.Row(): + with gr.Column(): + md1 = gr.Markdown(empty) + with gr.Column(): + md2 = gr.Markdown(empty) + with gr.Row(): + with gr.Column(): + md3 = gr.Markdown(empty) + with gr.Column(): + md4 = gr.Markdown(empty) + return [md0, plot_1, md1, md2, md3, md4] + +def get_full_table(arena_df, model_table_df): + values = [] + for i in range(len(model_table_df)): + row = [] + model_key = model_table_df.iloc[i]["key"] + model_name = model_table_df.iloc[i]["Model"] + # model display name + row.append(model_name) + if model_key in arena_df.index: + idx = arena_df.index.get_loc(model_key) + row.append(round(arena_df.iloc[idx]["rating"])) + else: + row.append(np.nan) + row.append(model_table_df.iloc[i]["MT-bench (score)"]) + row.append(model_table_df.iloc[i]["MMLU"]) + # Organization + row.append(model_table_df.iloc[i]["Organization"]) + # license + row.append(model_table_df.iloc[i]["License"]) + + values.append(row) + values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9) + return values + + +def get_arena_table(arena_df, model_table_df): + # sort by rating + arena_df = arena_df.sort_values(by=["rating"], ascending=False) + values = [] + for i in range(len(arena_df)): + row = [] + model_key = arena_df.index[i] + model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[ + 0 + ] + + # rank + ranking = arena_df.iloc[i].get("final_ranking") or i+1 + row.append(ranking) + # model display name + row.append(model_name) + # elo rating + row.append(round(arena_df.iloc[i]["rating"])) + upper_diff = round( + arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"] + ) + lower_diff = round( + arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"] + ) + row.append(f"+{upper_diff}/-{lower_diff}") + # num battles + row.append(round(arena_df.iloc[i]["num_battles"])) + # Organization + row.append( + model_table_df[model_table_df["key"] == model_key]["Organization"].values[0] + ) + # license + row.append( + model_table_df[model_table_df["key"] == model_key]["License"].values[0] + ) + + cutoff_date = model_table_df[model_table_df["key"] == model_key]["Knowledge cutoff date"].values[0] + if cutoff_date == "-": + row.append("Unknown") + else: + row.append(cutoff_date) + values.append(row) + return values + +def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False): + if elo_results_file is None: # Do live update + default_md = "Loading ..." + p1 = p2 = p3 = p4 = None + else: + with open(elo_results_file, "rb") as fin: + elo_results = pickle.load(fin) + if "full" in elo_results: + elo_results = elo_results["full"] + + p1 = elo_results["win_fraction_heatmap"] + p2 = elo_results["battle_count_heatmap"] + p3 = elo_results["bootstrap_elo_rating"] + p4 = elo_results["average_win_rate_bar"] + arena_df = elo_results["leaderboard_table_df"] + default_md = make_default_md(arena_df, elo_results) + + md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown") + if leaderboard_table_file: + data = load_leaderboard_table_csv(leaderboard_table_file) + model_table_df = pd.DataFrame(data) + + with gr.Tabs() as tabs: + # arena table + arena_table_vals = get_arena_table(arena_df, model_table_df) + with gr.Tab("Arena Elo", id=0): + md = make_arena_leaderboard_md(arena_df) + gr.Markdown(md, elem_id="leaderboard_markdown") + gr.Dataframe( + headers=[ + "Rank", + "πŸ€– Model", + "⭐ Arena Elo", + "πŸ“Š 95% CI", + "πŸ—³οΈ Votes", + "Organization", + "License", + "Knowledge Cutoff", + ], + datatype=[ + "str", + "markdown", + "number", + "str", + "number", + "str", + "str", + "str", + ], + value=arena_table_vals, + elem_id="arena_leaderboard_dataframe", + height=700, + column_widths=[50, 200, 120, 100, 100, 150, 150, 100], + wrap=True, + ) + with gr.Tab("Full Leaderboard", id=1): + md = make_full_leaderboard_md(elo_results) + gr.Markdown(md, elem_id="leaderboard_markdown") + full_table_vals = get_full_table(arena_df, model_table_df) + gr.Dataframe( + headers=[ + "πŸ€– Model", + "⭐ Arena Elo", + "πŸ“ˆ MT-bench", + "πŸ“š MMLU", + "Organization", + "License", + ], + datatype=["markdown", "number", "number", "number", "str", "str"], + value=full_table_vals, + elem_id="full_leaderboard_dataframe", + column_widths=[200, 100, 100, 100, 150, 150], + height=700, + wrap=True, + ) + if not show_plot: + gr.Markdown( + """ ## Visit our [HF space](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) for more analysis! + If you want to see more models, please help us [add them](https://github.com/lm-sys/FastChat/blob/main/docs/arena.md#how-to-add-a-new-model). + """, + elem_id="leaderboard_markdown", + ) + else: + pass + + gr.Markdown( + f"""Note: we take the 95% confidence interval into account when determining a model's ranking. +A model is ranked higher only if its lower bound of model score is higher than the upper bound of the other model's score. +See Figure 3 below for visualization of the confidence intervals. +""", + elem_id="leaderboard_markdown" ) + leader_component_values[:] = [default_md, p1, p2, p3, p4] + + if show_plot: + gr.Markdown( + f"""## More Statistics for Chatbot Arena\n +Below are figures for more statistics. The code for generating them is also included in this [notebook]({notebook_url}). +You can find more discussions in this blog [post](https://lmsys.org/blog/2023-12-07-leaderboard/). + """, + elem_id="leaderboard_markdown" + ) + with gr.Row(): + with gr.Column(): + gr.Markdown( + "#### Figure 1: Fraction of Model A Wins for All Non-tied A vs. B Battles" + ) + plot_1 = gr.Plot(p1, show_label=False) + with gr.Column(): + gr.Markdown( + "#### Figure 2: Battle Count for Each Combination of Models (without Ties)" + ) + plot_2 = gr.Plot(p2, show_label=False) + with gr.Row(): + with gr.Column(): + gr.Markdown( + "#### Figure 3: Confidence Intervals on Model Strength (via Bootstrapping)" + ) + plot_3 = gr.Plot(p3, show_label=False) + with gr.Column(): + gr.Markdown( + "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)" + ) + plot_4 = gr.Plot(p4, show_label=False) + + gr.Markdown(acknowledgment_md) + + if show_plot: + return [md_1, plot_1, plot_2, plot_3, plot_4] + return [md_1] + +block_css = """ +#notice_markdown { + font-size: 104% +} +#notice_markdown th { + display: none; +} +#notice_markdown td { + padding-top: 6px; + padding-bottom: 6px; +} +#leaderboard_markdown { + font-size: 104% +} +#leaderboard_markdown td { + padding-top: 6px; + padding-bottom: 6px; +} +#leaderboard_dataframe td { + line-height: 0.1em; +} +footer { + display:none !important +} +.sponsor-image-about img { + margin: 0 20px; + margin-top: 20px; + height: 40px; + max-height: 100%; + width: auto; + float: left; +} +""" + +acknowledgment_md = """ +### Acknowledgment +We thank [Kaggle](https://www.kaggle.com/), [MBZUAI](https://mbzuai.ac.ae/), [a16z](https://www.a16z.com/), [Together AI](https://www.together.ai/), [Anyscale](https://www.anyscale.com/), [HuggingFace](https://huggingface.co/) for their generous [sponsorship](https://lmsys.org/donations/). + + +""" + +def build_demo(elo_results_file, leaderboard_table_file): + text_size = gr.themes.sizes.text_lg + with gr.Blocks( title="Chatbot Arena Leaderboard", - theme=theme, + theme=gr.themes.Base(text_size=text_size), css=block_css, ) as demo: leader_components = build_leaderboard_tab( - elo_results_file, leaderboard_table_file, arena_hard_file, show_plot=True, mirror=True + elo_results_file, leaderboard_table_file, show_plot=True ) return demo + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--share", action="store_true") - parser.add_argument("--host", default="0.0.0.0") - parser.add_argument("--port", type=int, default=7860) args = parser.parse_args() - logger = build_logger("monitor", "monitor.log") - logger.info(f"args: {args}") - elo_result_files = glob.glob("elo_results_*.pkl") elo_result_files.sort(key=lambda x: int(x[12:-4])) elo_result_file = elo_result_files[-1] @@ -63,10 +453,6 @@ if __name__ == "__main__": leaderboard_table_files = glob.glob("leaderboard_table_*.csv") leaderboard_table_files.sort(key=lambda x: int(x[18:-4])) leaderboard_table_file = leaderboard_table_files[-1] - - arena_hard_files = glob.glob("arena_hard_auto_leaderboard_*.csv") - arena_hard_files.sort(key=lambda x: float(x[29:32])) - arena_hard_file = arena_hard_files[-1] demo = build_demo(elo_result_file, leaderboard_table_file) - demo.launch(share=args.share, server_name=args.host, server_port=args.port) \ No newline at end of file + demo.launch(share=args.share) diff --git a/arena_hard_auto_leaderboard_v0.1.csv b/arena_hard_auto_leaderboard_v0.1.csv deleted file mode 100644 index a80b4cb255e9a7b0d894e9018c80ec227782540d..0000000000000000000000000000000000000000 --- a/arena_hard_auto_leaderboard_v0.1.csv +++ /dev/null @@ -1,61 +0,0 @@ -model,score,rating_q025,rating_q975,CI,avg_tokens,date -gpt-4-turbo-2024-04-09,82.63,80.75,84.6,"(1.9, 2.0)",662.0,2024-07-31 -claude-3-5-sonnet-20240620,79.35,77.25,80.62,"(2.1, 1.3)",567.0,2024-07-31 -gpt-4o-2024-05-13,79.21,77.42,80.71,"(1.8, 1.5)",696.0,2024-07-31 -gpt-4-0125-preview,77.96,75.94,79.9,"(2.0, 1.9)",619.0,2024-07-31 -athene-70b-0725,76.83,74.84,78.74,"(2.0, 1.9)",683.0,2024-07-31 -gpt-4o-mini-2024-07-18,74.94,72.66,77.07,"(2.3, 2.1)",668.0,2024-07-31 -gemini-1.5-pro-api-0514,71.96,69.62,74.62,"(2.3, 2.7)",676.0,2024-07-31 -yi-large-preview,71.48,69.02,73.37,"(2.5, 1.9)",720.0,2024-07-31 -mistral-large-2407,70.42,68.11,72.43,"(2.3, 2.0)",623.0,2024-07-31 -llama-3.1-405b-instruct,64.09,61.43,66.55,"(2.7, 2.5)",633.0,2024-07-31 -glm-4-0520,63.84,61.28,66.19,"(2.6, 2.3)",636.0,2024-07-31 -yi-large,63.7,61.76,65.86,"(1.9, 2.2)",626.0,2024-07-31 -deepseek-coder-v2,62.3,59.82,64.72,"(2.5, 2.4)",578.0,2024-07-31 -claude-3-opus-20240229,60.36,57.56,62.34,"(2.8, 2.0)",541.0,2024-07-31 -gemma-2-27b-it,57.51,55.11,60.12,"(2.4, 2.6)",577.0,2024-07-31 -llama-3.1-70b-instruct,55.73,52.85,58.2,"(2.9, 2.5)",628.0,2024-07-31 -glm-4-0116,55.72,53.83,58.16,"(1.9, 2.4)",622.0,2024-07-31 -gemini-1.5-pro-api-0409-preview,53.37,51.13,56.66,"(2.2, 3.3)",478.0,2024-07-31 -glm-4-air,50.88,48.62,53.21,"(2.3, 2.3)",619.0,2024-07-31 -gpt-4-0314,50.0,50.0,50.0,"(0.0, 0.0)",423.0,2024-07-31 -gemini-1.5-flash-api-0514,49.61,47.46,52.17,"(2.1, 2.6)",642.0,2024-07-31 -qwen2-72b-instruct,46.86,44.57,49.29,"(2.3, 2.4)",515.0,2024-07-31 -claude-3-sonnet-20240229,46.8,44.12,49.04,"(2.7, 2.2)",552.0,2024-07-31 -llama-3-70b-instruct,46.57,43.84,49.18,"(2.7, 2.6)",591.0,2024-07-31 -claude-3-haiku-20240307,41.47,39.57,44.02,"(1.9, 2.6)",505.0,2024-07-31 -gpt-4-0613,37.9,35.6,40.36,"(2.3, 2.5)",354.0,2024-07-31 -mistral-large-2402,37.71,34.81,39.77,"(2.9, 2.1)",400.0,2024-07-31 -mixtral-8x22b-instruct-v0.1,36.36,34.21,38.55,"(2.1, 2.2)",430.0,2024-07-31 -qwen1.5-72b-chat,36.12,33.88,38.15,"(2.2, 2.0)",474.0,2024-07-31 -phi-3-medium-4k-instruct,33.37,31.26,35.14,"(2.1, 1.8)",517.0,2024-07-31 -command-r-plus,33.07,30.85,35.12,"(2.2, 2.0)",541.0,2024-07-31 -mistral-medium,31.9,29.66,34.31,"(2.2, 2.4)",485.0,2024-07-31 -phi-3-small-8k-instruct,29.77,27.94,31.97,"(1.8, 2.2)",568.0,2024-07-31 -mistral-next,27.37,25.4,29.09,"(2.0, 1.7)",297.0,2024-07-31 -gpt-3.5-turbo-0613,24.82,22.54,26.29,"(2.3, 1.5)",401.0,2024-07-31 -dbrx-instruct-preview,24.63,22.33,26.83,"(2.3, 2.2)",415.0,2024-07-31 -claude-2.0,23.99,21.71,25.65,"(2.3, 1.7)",295.0,2024-07-31 -mixtral-8x7b-instruct-v0.1,23.4,21.38,25.41,"(2.0, 2.0)",457.0,2024-07-31 -gpt-3.5-turbo-0125,23.34,21.67,25.27,"(1.7, 1.9)",329.0,2024-07-31 -yi-34b-chat,23.15,20.75,24.7,"(2.4, 1.6)",611.0,2024-07-31 -starling-lm-7b-beta,23.01,20.81,24.66,"(2.2, 1.6)",530.0,2024-07-31 -claude-2.1,22.77,20.65,25.43,"(2.1, 2.7)",290.0,2024-07-31 -llama-3.1-8b-instruct,21.34,19.71,23.09,"(1.6, 1.8)",861.0,2024-07-31 -snorkel-mistral-pairrm-dpo,20.73,19.04,22.05,"(1.7, 1.3)",564.0,2024-07-31 -llama-3-8b-instruct,20.56,18.82,22.61,"(1.7, 2.1)",585.0,2024-07-31 -gpt-3.5-turbo-1106,18.87,17.06,20.58,"(1.8, 1.7)",285.0,2024-07-31 -gpt-3.5-turbo-0314,18.05,16.57,20.06,"(1.5, 2.0)",334.0,2024-07-31 -gemini-pro,17.8,15.96,19.32,"(1.8, 1.5)",322.0,2024-07-31 -snowflake-arctic-instruct,17.61,16.12,19.27,"(1.5, 1.7)",365.0,2024-07-31 -command-r,17.02,15.73,18.51,"(1.3, 1.5)",432.0,2024-07-31 -phi-3-mini-128k-instruct,15.43,13.94,17.02,"(1.5, 1.6)",609.0,2024-07-31 -tulu-2-dpo-70b,14.99,13.05,16.82,"(1.9, 1.8)",550.0,2024-07-31 -starling-lm-7b-alpha,12.8,11.23,14.5,"(1.6, 1.7)",483.0,2024-07-31 -mistral-7b-instruct,12.57,11.05,14.11,"(1.5, 1.5)",541.0,2024-07-31 -gemma-1.1-7b-it,12.09,10.61,13.43,"(1.5, 1.3)",341.0,2024-07-31 -llama-2-70b-chat,11.55,10.02,13.01,"(1.5, 1.5)",595.0,2024-07-31 -vicuna-33b,8.63,7.59,9.84,"(1.0, 1.2)",451.0,2024-07-31 -gemma-7b-it,7.47,6.5,8.6,"(1.0, 1.1)",378.0,2024-07-31 -gemma-1.1-2b-it,3.37,2.74,4.14,"(0.6, 0.8)",316.0,2024-07-31 -gemma-2b-it,3.0,2.33,3.67,"(0.7, 0.7)",369.0,2024-07-31 diff --git a/elo_results_20240327.pkl b/elo_results_20240327.pkl deleted file mode 100644 index fef6689875eb5149742c387dc34579e3cc592c25..0000000000000000000000000000000000000000 --- a/elo_results_20240327.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bab4e9fa00e9d7c8244723993174af2c4f35ffc8487cc3059504b72658f06f43 -size 457743 diff --git a/elo_results_20240329.pkl b/elo_results_20240329.pkl new file mode 100644 index 0000000000000000000000000000000000000000..39941ac0cd24cca9ac130c0930921cd82b08e722 --- /dev/null +++ b/elo_results_20240329.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f4c037f68c9ddbf27b70b1cb333ca37bf70ff9a3cddad7a93cd62bca709cd77 +size 115776 diff --git a/elo_results_20240403.pkl b/elo_results_20240403.pkl deleted file mode 100644 index 29b8aa627f12aa92198d256adb0fb1fe49f129bd..0000000000000000000000000000000000000000 --- a/elo_results_20240403.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce8cebf41da8c06eee0f37156e01be83cc43182e0f00444311b4ad97a83154be -size 690286 diff --git a/elo_results_20240409.pkl b/elo_results_20240409.pkl deleted file mode 100644 index 48a0b30e715086aa6932f47727e1529ab5efaa5d..0000000000000000000000000000000000000000 --- a/elo_results_20240409.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6774f780b63f569666e9a85b12eddceef3af75e1d1799ff7c6e0529102950c3 -size 119947 diff --git a/elo_results_20240410.pkl b/elo_results_20240410.pkl deleted file mode 100644 index f316b006e313b8f58bb7c76b00555646988f8035..0000000000000000000000000000000000000000 --- a/elo_results_20240410.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a5757ab9692c6121451f2c787700507fe6b866837329ab0a47a9003a274338f -size 120963 diff --git a/elo_results_20240411.pkl b/elo_results_20240411.pkl deleted file mode 100644 index 6b5720055ed523c1b24d77717bfba2febd4a404e..0000000000000000000000000000000000000000 --- a/elo_results_20240411.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fada8d86ddb6dae319c5bda602d921859cc4280fdd53388eff446d80c3ab8192 -size 1183214 diff --git a/elo_results_20240413.pkl b/elo_results_20240413.pkl deleted file mode 100644 index 0d558c4f84d20d70cf44a7ec9c113b070b227220..0000000000000000000000000000000000000000 --- a/elo_results_20240413.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ad8ebb2a8602a7c72382fc65521fbe7b06bb36dcf6b6cc582c6b89b1d7b1a87 -size 1064654 diff --git a/elo_results_20240418.pkl b/elo_results_20240418.pkl deleted file mode 100644 index c91ed8b794c8c69adf3f887f8df15627cba4ee19..0000000000000000000000000000000000000000 --- a/elo_results_20240418.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1b05163db100df9ef62c7efb3332891d3321c6094e787af3b4ef4a9afe2becdb -size 1130887 diff --git a/elo_results_20240419.pkl b/elo_results_20240419.pkl deleted file mode 100644 index 98afdfeff93df1f4eecb9c42da7eb2a8dcbe29f8..0000000000000000000000000000000000000000 --- a/elo_results_20240419.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05239b0b4406f270fdc845632981024dec639b8351dcb1a2308def3bbcea2e68 -size 1130756 diff --git a/elo_results_20240422.pkl b/elo_results_20240422.pkl deleted file mode 100644 index 8552e44b8f70955ce00149c6f1555b8a402a4a16..0000000000000000000000000000000000000000 --- a/elo_results_20240422.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:436c6bc88b6c03b672a3a87ddb3b101ec1c5ff03d47d64196986b5d6ca7909cd -size 1254718 diff --git a/elo_results_20240426.pkl b/elo_results_20240426.pkl deleted file mode 100644 index 6f722c9c6d9bca9bca3d593fb7753eba07f6b94e..0000000000000000000000000000000000000000 --- a/elo_results_20240426.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a39b33094ac93d4a1e0bc57bfbb17368515ce5a7e4504d3d1e310a14cd056943 -size 1275849 diff --git a/elo_results_20240501.pkl b/elo_results_20240501.pkl deleted file mode 100644 index d3f30146e5521128d575111fa5e766152a34dbbe..0000000000000000000000000000000000000000 --- a/elo_results_20240501.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35f506d5d34555cbc055abc901623fae3aa7b429057cf3039cb1b460fdc8f41c -size 1159628 diff --git a/elo_results_20240508.pkl b/elo_results_20240508.pkl deleted file mode 100644 index 3d7ac7f086480ca0ceede83e0c44ce209e8b2d2e..0000000000000000000000000000000000000000 --- a/elo_results_20240508.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79acc98f859a5f597338eff0ae98025abfec80087d60336c0d735e7dd3595eb2 -size 1188396 diff --git a/elo_results_20240515.pkl b/elo_results_20240515.pkl deleted file mode 100644 index 45de87bd8521fbd2e1385c2b9914d5e46cfe196b..0000000000000000000000000000000000000000 --- a/elo_results_20240515.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:692e9b280f1587b85f28aaeceee52928a92f6a98ee81e3d63a2d789c82eb9abc -size 1596977 diff --git a/elo_results_20240516.pkl b/elo_results_20240516.pkl deleted file mode 100644 index ab3cbdb287a701ffff98e23104390a399c585af2..0000000000000000000000000000000000000000 --- a/elo_results_20240516.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e3ff3e278c66aa28aece3e52369d128ca6707673e0e811304294c5d8a85aaf81 -size 1476125 diff --git a/elo_results_20240519.pkl b/elo_results_20240519.pkl deleted file mode 100644 index dcabf43fd7b91f08edb6e4d829998dbe0c0f9ba5..0000000000000000000000000000000000000000 --- a/elo_results_20240519.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70c1136517de9396d72b2e14faee88382e503d0b80e56a5131e220173f6b472b -size 1604729 diff --git a/elo_results_20240520.pkl b/elo_results_20240520.pkl deleted file mode 100644 index 02a43a5581af40f66894b2a4912d0e568b48aa90..0000000000000000000000000000000000000000 --- a/elo_results_20240520.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82d5306e5b88813cea3490a0cf2d02952219c52257789d6077caeee986996567 -size 1628933 diff --git a/elo_results_20240527.pkl b/elo_results_20240527.pkl deleted file mode 100644 index eecd4258ddb6eebaf42832ec98b1910836f58883..0000000000000000000000000000000000000000 --- a/elo_results_20240527.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca33433b15176a4b7c4f13584b2129ebd60f059524648c4a4a986aad4a84bc1e -size 1666346 diff --git a/elo_results_20240602.pkl b/elo_results_20240602.pkl deleted file mode 100644 index ebe795f2d777548f8a522e3a4043468c8184b5fc..0000000000000000000000000000000000000000 --- a/elo_results_20240602.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1e884bc9a41a3aa9916e29ca34e5ac2e52e6f8d7e314e380facd06f9ae855145 -size 2278603 diff --git a/elo_results_20240606.pkl b/elo_results_20240606.pkl deleted file mode 100644 index af8c6ef0b430a77d596efb4628dd744bb10adc83..0000000000000000000000000000000000000000 --- a/elo_results_20240606.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6073ca1b10e9ac34c2b67b73636dcd2303b4a9291c9a440ad3813c33ef5fa170 -size 2295194 diff --git a/elo_results_20240611.pkl b/elo_results_20240611.pkl deleted file mode 100644 index b45da125c3c37538c58f41c44a8e72da3966054b..0000000000000000000000000000000000000000 --- a/elo_results_20240611.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b0f14fede0abeee60825682ffab3b07b50af5d9924de3c8114ddac469b34779 -size 2310921 diff --git a/elo_results_20240617.pkl b/elo_results_20240617.pkl deleted file mode 100644 index b29c65627fd05d80668020eccc51b295a04e473b..0000000000000000000000000000000000000000 --- a/elo_results_20240617.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a6e53f4d339bbc3420b349e89a315a1c6d2fa3c9847b206aeb02e1e5170aea73 -size 2491948 diff --git a/elo_results_20240621.pkl b/elo_results_20240621.pkl deleted file mode 100644 index f047e887b654bd0ac14166c3e212a5dcf3d92a1f..0000000000000000000000000000000000000000 --- a/elo_results_20240621.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b098f9f69009376d87ef4d317c81c648d0558f7c912f693e285de5d3115e309b -size 2526260 diff --git a/elo_results_20240623.pkl b/elo_results_20240623.pkl deleted file mode 100644 index b80e123bc5b3c93456d8fe8f2e027a39db9ddff6..0000000000000000000000000000000000000000 --- a/elo_results_20240623.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1469aac4504cd49bdbf1566093c5be912fd6ffe27f62213fd4961eefc92b4e30 -size 2544361 diff --git a/elo_results_20240626.pkl b/elo_results_20240626.pkl deleted file mode 100644 index bd5e0762c2882f099c88023226cf239dd1254e00..0000000000000000000000000000000000000000 --- a/elo_results_20240626.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6b7c6dc746e8dfb7fed966d9c027468dccbd31da5866af6fff0083478ef52ff -size 2429037 diff --git a/elo_results_20240629.pkl b/elo_results_20240629.pkl deleted file mode 100644 index 72de7e06a147627c5132c7801c38a840a2e9dece..0000000000000000000000000000000000000000 --- a/elo_results_20240629.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:acdaa9f0e89e01a5d1ee914d750727bf9877cde8bf2e195439bc7625b80f197f -size 2679801 diff --git a/elo_results_20240706.pkl b/elo_results_20240706.pkl deleted file mode 100644 index 6345240f6cf3bf668c3e5b1ae7a5e455f4819785..0000000000000000000000000000000000000000 --- a/elo_results_20240706.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dbd774b3b25712428c96cf54b5cb8c4d912e8b8215edbcee3dee0974fd898c8e -size 2702290 diff --git a/elo_results_20240708.pkl b/elo_results_20240708.pkl deleted file mode 100644 index b6b09474b5e40a618ef96ce3f0fd524cbe743b64..0000000000000000000000000000000000000000 --- a/elo_results_20240708.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d72cd3bf16c9af40910a2faf9f1403df2788596390e0f13452835748968679d -size 3154928 diff --git a/elo_results_20240716.pkl b/elo_results_20240716.pkl deleted file mode 100644 index ac677421ae762b07d1b75181dd3150199722d946..0000000000000000000000000000000000000000 --- a/elo_results_20240716.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dec0c95bbaf8a2ed382b1676eba0b22ad89fa8815d28bc22c94cc6ea00205e5d -size 3029902 diff --git a/elo_results_20240722.pkl b/elo_results_20240722.pkl deleted file mode 100644 index fbe037d8928043cb39409b138ad5606274dbbb02..0000000000000000000000000000000000000000 --- a/elo_results_20240722.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c01b5a786ffd8bf64b72d39ca3fc7dee5483852c6a515f515532c8096a6e16d4 -size 3054518 diff --git a/elo_results_20240725.pkl b/elo_results_20240725.pkl deleted file mode 100644 index f9cfed558e73369e627e87908c799908c8bd7513..0000000000000000000000000000000000000000 --- a/elo_results_20240725.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1bfde449e424f12c9316f0c64062fc1ef9926e4a924bbaeafa455fcde0decb6f -size 3073542 diff --git a/elo_results_20240730.pkl b/elo_results_20240730.pkl deleted file mode 100644 index 8e80b89afd743a5b2441f086ce1b0157684cfec6..0000000000000000000000000000000000000000 --- a/elo_results_20240730.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8db59f5f0852f26600f85c3188547e5d97b4641906c979bcc178cd5be7a7554c -size 3137995 diff --git a/elo_results_20240731.pkl b/elo_results_20240731.pkl deleted file mode 100644 index 5c5059a2127ace18f3a40cde4bce63ab99a82b21..0000000000000000000000000000000000000000 --- a/elo_results_20240731.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9aaff49a611fef8ff7bec058827d7de3426973c15bc538ac6bb39e764cc14b34 -size 3157482 diff --git a/elo_results_20240801.pkl b/elo_results_20240801.pkl deleted file mode 100644 index ce29adecd3d6a14b73ad91dbd45c7005590510a0..0000000000000000000000000000000000000000 --- a/elo_results_20240801.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cdaa16f762adc54cbe05ab03dff88587491193852846b749980294b7f1ea2bec -size 3182414 diff --git a/elo_results_20240805.pkl b/elo_results_20240805.pkl deleted file mode 100644 index 1642f4f3e8f3097be329765b7137964e9d889d7e..0000000000000000000000000000000000000000 --- a/elo_results_20240805.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e638bfe870e4409c4d768d57287cc8f6310caf85191d8020ba89f7f59ee9f6d8 -size 3202114 diff --git a/elo_results_20240806.pkl b/elo_results_20240806.pkl deleted file mode 100644 index f819aa8680baf21478b656a471befba83e702c69..0000000000000000000000000000000000000000 --- a/elo_results_20240806.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a4f31f36636a280589bb1039f9d1f405a989df6a8f74d1af30555d891b23a416 -size 3261205 diff --git a/elo_results_20240813.pkl b/elo_results_20240813.pkl deleted file mode 100644 index e8057bfc5e534d167ba3c6906dfd922d9dc6dfb4..0000000000000000000000000000000000000000 --- a/elo_results_20240813.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b4c5dfd4247b704f07e61ae27dc0642a3d3cfa9a6872cc3dc03d1888a594de9f -size 2943734 diff --git a/elo_results_20240822.pkl b/elo_results_20240822.pkl deleted file mode 100644 index fe4b8dd4cedf4fed1abd51d6d79862f0b8ddb852..0000000000000000000000000000000000000000 --- a/elo_results_20240822.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c733894de1252232a63ce3632abe52504fb6bcf43e17bb49fea2b5ad8d76116f -size 3004697 diff --git a/elo_results_20240823.pkl b/elo_results_20240823.pkl deleted file mode 100644 index 39aabf028ee4a141e2e22bc0307c673deee64122..0000000000000000000000000000000000000000 --- a/elo_results_20240823.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec103aa6bf5d0f02f8bd2c69c8ccfc8f1be1b44c7dc004d967c8d5ce470975b5 -size 3039588 diff --git a/elo_results_20240827.pkl b/elo_results_20240827.pkl deleted file mode 100644 index 3a1d73a8638601489968c871df34ce6f952e055d..0000000000000000000000000000000000000000 --- a/elo_results_20240827.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbcf63bc492b9e2018fdd2c82924f375f12db20dd577f2c139a8ff82a2d08159 -size 3093445 diff --git a/elo_results_20240828.pkl b/elo_results_20240828.pkl deleted file mode 100644 index d78a983b50f97d7307855b54ac5671a788a528df..0000000000000000000000000000000000000000 --- a/elo_results_20240828.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7da13b5f061a7a5a112e5ca45ff707d6cf6259c8a01b40ea5b77bbd5bd3d5b0 -size 3819732 diff --git a/elo_results_20240904.pkl b/elo_results_20240904.pkl deleted file mode 100644 index 90cec8fbb7fc633806da2ee0917b7703ef40eb63..0000000000000000000000000000000000000000 --- a/elo_results_20240904.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf6117b1e28bb982e965d20b927685dce98750a82c255b868588b5b2318aaee9 -size 3486555 diff --git a/elo_results_20240915.pkl b/elo_results_20240915.pkl deleted file mode 100644 index d2f794cf74d59c20ac6c55ffc9b89fd71c2c56ba..0000000000000000000000000000000000000000 --- a/elo_results_20240915.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42512fe148c81eeab05961ed64fe446dc7f6ed3703f976fc4c8c2a6a3a3e6bef -size 3726145 diff --git a/elo_results_20240917.pkl b/elo_results_20240917.pkl deleted file mode 100644 index 18cb15bbff1d9cc92ce95f149c8a90bc7b225eae..0000000000000000000000000000000000000000 --- a/elo_results_20240917.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d15ed36a6e429a9213e9230fbd28de05e7788f758c4282660dd77a4689f98590 -size 3768775 diff --git a/elo_results_20240927.pkl b/elo_results_20240927.pkl deleted file mode 100644 index 73ba31a8f7a9578e658c8ac77202e124448d8567..0000000000000000000000000000000000000000 --- a/elo_results_20240927.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c96f9a95fbbf44b8cb46129cc8ea09eef1bb6e43b8ec12d1c8837091d23ee69 -size 3860967 diff --git a/elo_results_20241007.pkl b/elo_results_20241007.pkl deleted file mode 100644 index b2340014a9cf21d81b56e96053363a7d94cd48b1..0000000000000000000000000000000000000000 --- a/elo_results_20241007.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3891408ad17e52226989783562eb75f784dfb1d0a30dd9051e943b48cd0b117c -size 3919717 diff --git a/elo_results_20241015.pkl b/elo_results_20241015.pkl deleted file mode 100644 index fc1d2cbfe9cf74abfb5010f09115098738811e19..0000000000000000000000000000000000000000 --- a/elo_results_20241015.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6daea1d6b24298f32f23c903872aa6c852453de29bf82aa0d2e7aaf5337af715 -size 4058396 diff --git a/elo_results_20241023.pkl b/elo_results_20241023.pkl deleted file mode 100644 index 725a73d29663c8b086d4aa6d181adf1a73ada6a6..0000000000000000000000000000000000000000 --- a/elo_results_20241023.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e592a6807b4affbc2e203b385ff491743116b4854c9bcd3c3bbf037fba6092f -size 4374135 diff --git a/elo_results_20241028.pkl b/elo_results_20241028.pkl deleted file mode 100644 index 8d842243e970765af03f35c24d60a07c7d2380e4..0000000000000000000000000000000000000000 --- a/elo_results_20241028.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eb1b06bd57f44c46862bb7c326c39e09fa0e298257d3e8fb96d466a7200fc77d -size 4417082 diff --git a/elo_results_20241104.pkl b/elo_results_20241104.pkl deleted file mode 100644 index 8e5e4a6576afef1f7b2e845f593d2e4b9cc0ff99..0000000000000000000000000000000000000000 --- a/elo_results_20241104.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c0129cb6833f0d81f9407489883c588f5712666ecdc35111de725d3502e61a07 -size 4472516 diff --git a/elo_results_20241112.pkl b/elo_results_20241112.pkl deleted file mode 100644 index d786021eec86992585ce65afd7e3b4cc2f389a96..0000000000000000000000000000000000000000 --- a/elo_results_20241112.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab321a67e6831f495d8ac7d6e052f16eae495fb4a82454f8d7bd361bda6f4706 -size 4472047 diff --git a/elo_results_20241113.pkl b/elo_results_20241113.pkl deleted file mode 100644 index 955dd1f4c114aaf2bb55e57fe3121df51d999769..0000000000000000000000000000000000000000 --- a/elo_results_20241113.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d0493678e9a9d2d86bdc8adf483824ef731b8efe8a41c5cef1d81bf21f5e7e9 -size 4496349 diff --git a/elo_results_20241120.pkl b/elo_results_20241120.pkl deleted file mode 100644 index b1141e1fc92ac755350806437a00846db761c059..0000000000000000000000000000000000000000 --- a/elo_results_20241120.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3e5f7d30d02d57f07d0506d385d2007fb7ae4a33a24a2e8e62fc95e9434c565e -size 4585156 diff --git a/elo_results_20241121.pkl b/elo_results_20241121.pkl deleted file mode 100644 index a305fbaeecef151fe0b38b17fb5d3c8eff35ee26..0000000000000000000000000000000000000000 --- a/elo_results_20241121.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:610c77a4c4b47f1bb256a3dfa1808d5d32b5d9c819aa5f34b1aae857a959c303 -size 4613615 diff --git a/elo_results_20241122.pkl b/elo_results_20241122.pkl deleted file mode 100644 index a81fbb2d275c06d7eaf10d712eb3f00330898d57..0000000000000000000000000000000000000000 --- a/elo_results_20241122.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5387ed94e44688ff4cacc52644290409dd09580a0c745016c5d473ff9f3368bd -size 4656304 diff --git a/elo_results_20241201.pkl b/elo_results_20241201.pkl deleted file mode 100644 index 7513f8ad18618c98b89fb0a2f85d82a8fb840d1d..0000000000000000000000000000000000000000 --- a/elo_results_20241201.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e43a1652cfd47ab95459d2a9ac8e5fc8065001f8160dcc8c460c4b1bbda2e58e -size 4743052 diff --git a/elo_results_20241205.pkl b/elo_results_20241205.pkl deleted file mode 100644 index 6c45519a9618cd269d20658254703fb1abf2f4f2..0000000000000000000000000000000000000000 --- a/elo_results_20241205.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:651b61a2fdb0f16884975c2d71577f67e311ed0b5675badbe5db2f4b0068d3e5 -size 4757570 diff --git a/elo_results_20241210.pkl b/elo_results_20241210.pkl deleted file mode 100644 index d146c7d81d251c3e9a7264e52a451a0b44004e5c..0000000000000000000000000000000000000000 --- a/elo_results_20241210.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ebcf524b476eba3cc260d9e3f8f8e42d187c56481748645276cd70ea6e1caaa2 -size 4796359 diff --git a/elo_results_20241215.pkl b/elo_results_20241215.pkl deleted file mode 100644 index 4df4e94b2be193b35f693b3b5e41047e8917bc12..0000000000000000000000000000000000000000 --- a/elo_results_20241215.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af899ac7ceed98b9c127c69fad5b6115bd02e00fe94a2a580734639ef9cdb417 -size 4928341 diff --git a/elo_results_20241218.pkl b/elo_results_20241218.pkl deleted file mode 100644 index 36b8a80c422a755528d73bf50407f4869c0150a9..0000000000000000000000000000000000000000 --- a/elo_results_20241218.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9380779e9effeef27f1face0759c2e11075c8b2c4a809f565a9cfd40944bb7a2 -size 4956709 diff --git a/elo_results_20241222.pkl b/elo_results_20241222.pkl deleted file mode 100644 index 2d2476a7f4751678bcc08cf9ebc4e5ed92e6efb9..0000000000000000000000000000000000000000 --- a/elo_results_20241222.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:030bc6db11694fcf5e025e6aa849d28e61dc4c7bcb66a0b562f7cd2673d9599d -size 5018497 diff --git a/elo_results_20241230.pkl b/elo_results_20241230.pkl deleted file mode 100644 index 66073af1240e7aa1c646fb86e157c5ddd7df9b1a..0000000000000000000000000000000000000000 --- a/elo_results_20241230.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:517d699389bdbf7f01ec341306be72ffad2ee36d224a240369f817ae0486db87 -size 5051817 diff --git a/elo_results_20250105.pkl b/elo_results_20250105.pkl deleted file mode 100644 index 510d6a8ec8e34d5937afab80d996d9afe0806b52..0000000000000000000000000000000000000000 --- a/elo_results_20250105.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:609f7b97d9c67d79214c8083a3d8f24f6e0893c3fd45cb079eb2748b7acbc5e8 -size 5979482 diff --git a/elo_results_20250115.pkl b/elo_results_20250115.pkl deleted file mode 100644 index 58c410aef4fa9300512bd98e8d600daa2f4d2122..0000000000000000000000000000000000000000 --- a/elo_results_20250115.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:758c111032a6e355760d5d4074b435174100d43faf1d96ca07e34b76477c56a3 -size 6054581 diff --git a/elo_results_20250119.pkl b/elo_results_20250119.pkl deleted file mode 100644 index efe2336c7126147095f45c62aee16c197c95ef27..0000000000000000000000000000000000000000 --- a/elo_results_20250119.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:27105cd16c8d3456548c8ab8e93d52abc71634b1e70237173b260cb0bbab93df -size 6082897 diff --git a/elo_results_20250121.pkl b/elo_results_20250121.pkl deleted file mode 100644 index ff42637be3a379a68844c32b863cb35bdd1237a4..0000000000000000000000000000000000000000 --- a/elo_results_20250121.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:81bfdd73af127cf20a9c5effd9871d7d5c5b1dd2ce1e9c2c5076b45feff844df -size 6111217 diff --git a/elo_results_20250122.pkl b/elo_results_20250122.pkl deleted file mode 100644 index 41cb554947b4216b2de1c6a9a14b5afd22362877..0000000000000000000000000000000000000000 --- a/elo_results_20250122.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9e220831e892b7f69e6ac0f4d21c3b091aaf59b0b51c7a19e5618980b07068a -size 6115326 diff --git a/elo_results_20250124.pkl b/elo_results_20250124.pkl deleted file mode 100644 index 11843cebb3a45561df3e70febed109f3818f8543..0000000000000000000000000000000000000000 --- a/elo_results_20250124.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b3ea59502af8135c8cd24074d6a1ff5f710553ab46acaab9259a6bbeed535851 -size 6234818 diff --git a/elo_results_20250128.pkl b/elo_results_20250128.pkl deleted file mode 100644 index c3c6ad9e3d94f45da77307bb91898aa3aae6b6d0..0000000000000000000000000000000000000000 --- a/elo_results_20250128.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bdbd9f8bacb67e3e4e829e077b5621fb443f50b6ce0f73ab017607ffccccce1a -size 6172158 diff --git a/elo_results_20250203.pkl b/elo_results_20250203.pkl deleted file mode 100644 index 8611f00cec792e3675bad4d24bebeee4bac297e5..0000000000000000000000000000000000000000 --- a/elo_results_20250203.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c591519e3818519bace906c2a79aa9cba7cb09ee5074a7f389def5ff84c1b6aa -size 6221605 diff --git a/elo_results_20250205.pkl b/elo_results_20250205.pkl deleted file mode 100644 index b3e2daf88d3cdaf9e3f90047d53e89fa315f2c7b..0000000000000000000000000000000000000000 --- a/elo_results_20250205.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53603d4d811e02261964a97bc8cbb46be4380f914388667974ba483e94262e3e -size 6305480 diff --git a/elo_results_20250206.pkl b/elo_results_20250206.pkl deleted file mode 100644 index 46c8c9c21d415817d034775c2739af0cfde8cea3..0000000000000000000000000000000000000000 --- a/elo_results_20250206.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d3e6a23cd1fd5fc826ffbdc3084256e6f375e02b1a2bd950f3ca8da8e38ad17 -size 6330761 diff --git a/elo_results_20250209.pkl b/elo_results_20250209.pkl deleted file mode 100644 index 7da699f46c9d0fe76053e51dce79eacc533aa43e..0000000000000000000000000000000000000000 --- a/elo_results_20250209.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4f4adfd4b26595c489a0aa41dc5357cb97536c9f67f87e6a7da3bfa8ea2b7d57 -size 6334820 diff --git a/elo_results_20250211.pkl b/elo_results_20250211.pkl deleted file mode 100644 index 4acad251990c607c13d97274ced3061310ecb0df..0000000000000000000000000000000000000000 --- a/elo_results_20250211.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d1aec63fbea7585a4744bbbc95a0ef89503b9121ae0e8527408d3ff7661f388b -size 6387624 diff --git a/elo_results_20250214.pkl b/elo_results_20250214.pkl deleted file mode 100644 index ccba0f5774bde96d0fd112e434f356352651bd60..0000000000000000000000000000000000000000 --- a/elo_results_20250214.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5134236a0ed3ab0b651e17302749a3dbb0b4c97322722274adf6e42bfa995dc3 -size 6381600 diff --git a/elo_results_20250217.pkl b/elo_results_20250217.pkl deleted file mode 100644 index 3904ed7c5d2c760e6ac3cf8e47bbca37f0e2e8e3..0000000000000000000000000000000000000000 --- a/elo_results_20250217.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cdd16087da85d7b31beaf8ceb0e7787e4b72845203640c83f4d38f2795cd632 -size 6405776 diff --git a/elo_results_20250221.pkl b/elo_results_20250221.pkl deleted file mode 100644 index cddcda3c04e1c322b1bbea1b582f7006019a756a..0000000000000000000000000000000000000000 --- a/elo_results_20250221.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0268b9a9d065fd898cfa12bf87d044bb2a6ff014f9abbdaa9199f2edc03cb0d0 -size 6433933 diff --git a/elo_results_20250227.pkl b/elo_results_20250227.pkl deleted file mode 100644 index 7d0dd9b5a1b3ce65d3178a6634365efd18196cbc..0000000000000000000000000000000000000000 --- a/elo_results_20250227.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ebcb3f395269eeacc7e126c475925889dd7bcc034a874a357367c6dcd252b209 -size 6511359 diff --git a/elo_results_20250303.pkl b/elo_results_20250303.pkl deleted file mode 100644 index 8f42bb82c8da70194018e52ad511412bb9625656..0000000000000000000000000000000000000000 --- a/elo_results_20250303.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:759c7cc1f1ef8f19deafca21cab4b0c7755fa600e002ed7fddd0814249a4b879 -size 6562718 diff --git a/elo_results_20250311.pkl b/elo_results_20250311.pkl deleted file mode 100644 index 9f601ae1f0f9ad51a58db9a72a8e7c86a160d83e..0000000000000000000000000000000000000000 --- a/elo_results_20250311.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b23be10d86c94218f7ed0dca2e7b40c5c66e327dc2bf224ed0c047b0ce99d73a -size 6599164 diff --git a/index.html b/index.html deleted file mode 100644 index 15fd4fc282a9e999464c7338d8638c51b0152c9e..0000000000000000000000000000000000000000 --- a/index.html +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - Chatbot Arena - - - -
Please visit leaderboard.lmsys.org.
- - \ No newline at end of file diff --git a/leaderboard_table_20240404.csv b/leaderboard_table_20240329.csv similarity index 96% rename from leaderboard_table_20240404.csv rename to leaderboard_table_20240329.csv index 7077b0a66448693e98763a444cf8fd3f0d679aef..8de88033d6e3127d8f996a27a1adb33ec8b64edd 100644 --- a/leaderboard_table_20240404.csv +++ b/leaderboard_table_20240329.csv @@ -88,7 +88,4 @@ codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta, olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -dbrx-instruct,DBRX-instruct,-,-,2024/3,Apache-2.0,Databricks,- -command-r,Command R,-,-,2024/3,Apache-2.0,Cohere,- -qwen1.5-14b-chat,Qwen1.5-14B-Chat,-,-,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,-,-,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ \ No newline at end of file +command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r diff --git a/leaderboard_table_20240409.csv b/leaderboard_table_20240409.csv deleted file mode 100644 index 63a9e1873331d85729547907bd8269f2492043cb..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240409.csv +++ /dev/null @@ -1,95 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it diff --git a/leaderboard_table_20240410.csv b/leaderboard_table_20240410.csv deleted file mode 100644 index 29b6abb1c8f3938eed30dbd18ec74e36af435096..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240410.csv +++ /dev/null @@ -1,96 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm diff --git a/leaderboard_table_20240411.csv b/leaderboard_table_20240411.csv deleted file mode 100644 index 122a4895ffeca892197d1268a9a48db329d22000..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240411.csv +++ /dev/null @@ -1,97 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 diff --git a/leaderboard_table_20240413.csv b/leaderboard_table_20240413.csv deleted file mode 100644 index 8367266c00d5ceddbfc7151415035fd7919a3c18..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240413.csv +++ /dev/null @@ -1,97 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 diff --git a/leaderboard_table_20240418.csv b/leaderboard_table_20240418.csv deleted file mode 100644 index 9a1f47119a145103ef0c1b44a9b2c5b98fe2db0c..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240418.csv +++ /dev/null @@ -1,104 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ diff --git a/leaderboard_table_20240419.csv b/leaderboard_table_20240419.csv deleted file mode 100644 index 9a1f47119a145103ef0c1b44a9b2c5b98fe2db0c..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240419.csv +++ /dev/null @@ -1,104 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ diff --git a/leaderboard_table_20240422.csv b/leaderboard_table_20240422.csv deleted file mode 100644 index e19efe3df79488428fd9776ebd1d023a1ebaeee4..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240422.csv +++ /dev/null @@ -1,104 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Meta Llama 3 70b Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Meta Llama 3 8b Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ diff --git a/leaderboard_table_20240426.csv b/leaderboard_table_20240426.csv deleted file mode 100644 index 7549557f41030a8acf3eb793139c028f9e09fe4c..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240426.csv +++ /dev/null @@ -1,106 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini 1.5 Pro API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ diff --git a/leaderboard_table_20240501.csv b/leaderboard_table_20240501.csv deleted file mode 100644 index 575c728a47f6e6e3d0f3ae6a6c36038697020f7a..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240501.csv +++ /dev/null @@ -1,107 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini 1.5 Pro API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ diff --git a/leaderboard_table_20240508.csv b/leaderboard_table_20240508.csv deleted file mode 100644 index a109cc4f397fdd61876c5ea3f24220072fc5b688..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240508.csv +++ /dev/null @@ -1,110 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini 1.5 Pro API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model diff --git a/leaderboard_table_20240515.csv b/leaderboard_table_20240515.csv deleted file mode 100644 index b228d865606168548c610b6a007d31ca60efc881..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240515.csv +++ /dev/null @@ -1,111 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini 1.5 Pro API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ diff --git a/leaderboard_table_20240516.csv b/leaderboard_table_20240516.csv deleted file mode 100644 index 13d969a7533215aa63d1c0c0f7d317f3e6113f62..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240516.csv +++ /dev/null @@ -1,112 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini 1.5 Pro API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ diff --git a/leaderboard_table_20240519.csv b/leaderboard_table_20240519.csv deleted file mode 100644 index 13d969a7533215aa63d1c0c0f7d317f3e6113f62..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240519.csv +++ /dev/null @@ -1,112 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini 1.5 Pro API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ diff --git a/leaderboard_table_20240520.csv b/leaderboard_table_20240520.csv deleted file mode 100644 index 68952f3a76261005e3aabf87fed34662ec584bb9..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240520.csv +++ /dev/null @@ -1,114 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini 1.5 Pro API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://www.01.ai/ -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ diff --git a/leaderboard_table_20240527.csv b/leaderboard_table_20240527.csv deleted file mode 100644 index 7e7adbf77f3db6ae4ba7d7f91fd74ecfac6bd2d5..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240527.csv +++ /dev/null @@ -1,117 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://www.01.ai/ -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ diff --git a/leaderboard_table_20240602.csv b/leaderboard_table_20240602.csv deleted file mode 100644 index 2d287e464e680a03a930f364a38fbf2fd144b324..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240602.csv +++ /dev/null @@ -1,121 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://www.01.ai/ -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct diff --git a/leaderboard_table_20240606.csv b/leaderboard_table_20240606.csv deleted file mode 100644 index 40b358182ba85c76fd5165c81de1dc273343001b..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240606.csv +++ /dev/null @@ -1,122 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://www.01.ai/ -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ diff --git a/leaderboard_table_20240611.csv b/leaderboard_table_20240611.csv deleted file mode 100644 index 77d390b34ce79d6258e816ea914079fd4a5667b0..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240611.csv +++ /dev/null @@ -1,123 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing diff --git a/leaderboard_table_20240617.csv b/leaderboard_table_20240617.csv deleted file mode 100644 index 993b6c24054b756419e783ca4eda182965433fcc..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240617.csv +++ /dev/null @@ -1,125 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation diff --git a/leaderboard_table_20240621.csv b/leaderboard_table_20240621.csv deleted file mode 100644 index b416647e9a8a83d0160690c68e63e5b623e43e92..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240621.csv +++ /dev/null @@ -1,127 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct diff --git a/leaderboard_table_20240623.csv b/leaderboard_table_20240623.csv deleted file mode 100644 index c1e6aefee713e82967777ffa6dc67e8f897909ca..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240623.csv +++ /dev/null @@ -1,128 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet diff --git a/leaderboard_table_20240626.csv b/leaderboard_table_20240626.csv deleted file mode 100644 index e526c73afaf6582fec27b94add9fe5cb0b9cf109..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240626.csv +++ /dev/null @@ -1,130 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -late-june-chatbot,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -im-also-a-late-june-chatbot,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma diff --git a/leaderboard_table_20240629.csv b/leaderboard_table_20240629.csv deleted file mode 100644 index bf5362757a32759d92b2bc83ff98ee738eda6173..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240629.csv +++ /dev/null @@ -1,131 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ diff --git a/leaderboard_table_20240706.csv b/leaderboard_table_20240706.csv deleted file mode 100644 index 5a9e14444cc854cfa655c6aad9429303ae913c84..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240706.csv +++ /dev/null @@ -1,132 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct diff --git a/leaderboard_table_20240708.csv b/leaderboard_table_20240708.csv deleted file mode 100644 index 5a9e14444cc854cfa655c6aad9429303ae913c84..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240708.csv +++ /dev/null @@ -1,132 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct diff --git a/leaderboard_table_20240716.csv b/leaderboard_table_20240716.csv deleted file mode 100644 index 624a4d8595b61e7215c4c405affcaf20df87ac1b..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240716.csv +++ /dev/null @@ -1,134 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,Proprietary,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B diff --git a/leaderboard_table_20240722.csv b/leaderboard_table_20240722.csv deleted file mode 100644 index 0f68d45316c38d7685f7b071efa8ca2723ad4868..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240722.csv +++ /dev/null @@ -1,135 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,Proprietary,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ diff --git a/leaderboard_table_20240725.csv b/leaderboard_table_20240725.csv deleted file mode 100644 index 5c49e4dc49d08da8242435f2e4c8809afb6b3fc6..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240725.csv +++ /dev/null @@ -1,136 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,Proprietary,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -athene-70b,Athene-70b,-,-,2024/07,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B diff --git a/leaderboard_table_20240730.csv b/leaderboard_table_20240730.csv deleted file mode 100644 index 2355e1b6d19692263d8e43f402fc8d39630dd657..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240730.csv +++ /dev/null @@ -1,140 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,Proprietary,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ diff --git a/leaderboard_table_20240731.csv b/leaderboard_table_20240731.csv deleted file mode 100644 index 8c88a9aec1821c1a28fd75d26a16f7fb8078239c..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240731.csv +++ /dev/null @@ -1,144 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,Proprietary,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2B-it,-,0.513,2024/7,Gemma license,Google,https://ai.google.dev/gemma#introducing-gemma-2 -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Proprietary,Mistral,https://mistral.ai/news/mistral-large-2407/ diff --git a/leaderboard_table_20240801.csv b/leaderboard_table_20240801.csv deleted file mode 100644 index a9f2c14b7bbd026b7b4082c51c1f5e819f9ccaad..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240801.csv +++ /dev/null @@ -1,145 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview -bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/ -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-API-0409-Preview,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini-Advanced-0514,-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-API-0514,-,0.859,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-API-0514,-,0.789,2023/11,Proprietary,Google,https://deepmind.google/technologies/gemini/flash/ -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://ai.google.dev/gemma -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2B-it,-,0.513,2024/7,Gemma license,Google,https://ai.google.dev/gemma#introducing-gemma-2 -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Proprietary,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 diff --git a/leaderboard_table_20240805.csv b/leaderboard_table_20240805.csv deleted file mode 100644 index 9018aa99ad97f15dd1e4d92e85ccd56bf065e167..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240805.csv +++ /dev/null @@ -1,145 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 diff --git a/leaderboard_table_20240806.csv b/leaderboard_table_20240806.csv deleted file mode 100644 index 0d43c786c2ff1fd0208d6c68b84a73de0cf1faab..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240806.csv +++ /dev/null @@ -1,148 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 diff --git a/leaderboard_table_20240813.csv b/leaderboard_table_20240813.csv deleted file mode 100644 index 9da612e15fa68772ec8b11407b49d8d013edcbc8..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240813.csv +++ /dev/null @@ -1,149 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest-2024-08-08,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 diff --git a/leaderboard_table_20240822.csv b/leaderboard_table_20240822.csv deleted file mode 100644 index 3aaa8dcd63b9d3862d2246baaf46a823caa3cd40..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240822.csv +++ /dev/null @@ -1,154 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct diff --git a/leaderboard_table_20240823.csv b/leaderboard_table_20240823.csv deleted file mode 100644 index 03e55291d2056291127ae013ccf60acc918860c6..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240823.csv +++ /dev/null @@ -1,156 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 diff --git a/leaderboard_table_20240827.csv b/leaderboard_table_20240827.csv deleted file mode 100644 index 93df3817b5be6ad20ea460f8366c7cd68c9cbbf2..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240827.csv +++ /dev/null @@ -1,160 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ diff --git a/leaderboard_table_20240828.csv b/leaderboard_table_20240828.csv deleted file mode 100644 index 93df3817b5be6ad20ea460f8366c7cd68c9cbbf2..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240828.csv +++ /dev/null @@ -1,160 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ diff --git a/leaderboard_table_20240904.csv b/leaderboard_table_20240904.csv deleted file mode 100644 index 3ea3c3a9aee3855e50d8d19bb914a929a9c6102c..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240904.csv +++ /dev/null @@ -1,166 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct diff --git a/leaderboard_table_20240915.csv b/leaderboard_table_20240915.csv deleted file mode 100644 index 510df8ce1a503038c13949327fa3496fdab311a5..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240915.csv +++ /dev/null @@ -1,174 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405b-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,qwen2-vl-7b-instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes diff --git a/leaderboard_table_20240917.csv b/leaderboard_table_20240917.csv deleted file mode 100644 index 6017236ba9cfdd0f0286bf33eb3376b9f6460bbd..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240917.csv +++ /dev/null @@ -1,176 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405b-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,qwen2-vl-7b-instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 diff --git a/leaderboard_table_20240927.csv b/leaderboard_table_20240927.csv deleted file mode 100644 index 19fc340733c57cd2c7acbe21129ce6cc17f91e89..0000000000000000000000000000000000000000 --- a/leaderboard_table_20240927.csv +++ /dev/null @@ -1,184 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405b-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7b-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20b-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12b-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ diff --git a/leaderboard_table_20241007.csv b/leaderboard_table_20241007.csv deleted file mode 100644 index 5ceba06080824ed13cda0c93d71e23113758a2e8..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241007.csv +++ /dev/null @@ -1,187 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405b-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7b-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20b-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12b-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b diff --git a/leaderboard_table_20241015.csv b/leaderboard_table_20241015.csv deleted file mode 100644 index e9eb2eda1495002185ba830cd2430e9448a2de66..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241015.csv +++ /dev/null @@ -1,191 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405b-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7b-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20b-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12b-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction diff --git a/leaderboard_table_20241023.csv b/leaderboard_table_20241023.csv deleted file mode 100644 index 3e35c45155975251c61a5980cbafd931e9334c00..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241023.csv +++ /dev/null @@ -1,194 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet,-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405b-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7b-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20b-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12b-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70b-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://build.nvidia.com/nvidia/llama-3_1-nemotron-70b-instruct diff --git a/leaderboard_table_20241028.csv b/leaderboard_table_20241028.csv deleted file mode 100644 index 9971fd3b5a70a1e1501d6f9a7eb5161a2f0d9977..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241028.csv +++ /dev/null @@ -1,196 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2b-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70b-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8b-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9b-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4k-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405b-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405b-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70b-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8b-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70b,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128k-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8b-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9b-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72b-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7b-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20b-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1b-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11b-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12b-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70b-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51b-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use diff --git a/leaderboard_table_20241104.csv b/leaderboard_table_20241104.csv deleted file mode 100644 index 60c23a1647db6c05f974f78e9a5ac7451efaddbe..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241104.csv +++ /dev/null @@ -1,200 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models diff --git a/leaderboard_table_20241112.csv b/leaderboard_table_20241112.csv deleted file mode 100644 index a2fa00fbca4bb19027e21f509333ec1013999ebc..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241112.csv +++ /dev/null @@ -1,202 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 diff --git a/leaderboard_table_20241113.csv b/leaderboard_table_20241113.csv deleted file mode 100644 index 1854dcf0b85257bfbcf8b35ffb82ed772593a35e..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241113.csv +++ /dev/null @@ -1,203 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 diff --git a/leaderboard_table_20241120.csv b/leaderboard_table_20241120.csv deleted file mode 100644 index c1e4104af3593a7d38b5acd1e91182d5ab71ce99..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241120.csv +++ /dev/null @@ -1,207 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct diff --git a/leaderboard_table_20241121.csv b/leaderboard_table_20241121.csv deleted file mode 100644 index 43f59d649ed0eef03f90638ff8a21bb4edc6b64b..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241121.csv +++ /dev/null @@ -1,209 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision diff --git a/leaderboard_table_20241122.csv b/leaderboard_table_20241122.csv deleted file mode 100644 index 6189d630ad82427ef6c07e952abc329b45a1fa9c..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241122.csv +++ /dev/null @@ -1,211 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b diff --git a/leaderboard_table_20241201.csv b/leaderboard_table_20241201.csv deleted file mode 100644 index 63f05e4e6d2ac2868b76eaebfd5dec1830c31e19..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241201.csv +++ /dev/null @@ -1,213 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 diff --git a/leaderboard_table_20241205.csv b/leaderboard_table_20241205.csv deleted file mode 100644 index b611708025cc4b090783970603097afc6131eee7..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241205.csv +++ /dev/null @@ -1,214 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 diff --git a/leaderboard_table_20241210.csv b/leaderboard_table_20241210.csv deleted file mode 100644 index c481473a26e4ade60bb26737e12096507a761ee4..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241210.csv +++ /dev/null @@ -1,216 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use diff --git a/leaderboard_table_20241215.csv b/leaderboard_table_20241215.csv deleted file mode 100644 index 24119cb7929c604c45cd6b98d98ae88a6e5a0b1f..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241215.csv +++ /dev/null @@ -1,222 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b diff --git a/leaderboard_table_20241218.csv b/leaderboard_table_20241218.csv deleted file mode 100644 index bf1c4eca340e3ef568e05134a2739845a2f75398..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241218.csv +++ /dev/null @@ -1,224 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w diff --git a/leaderboard_table_20241222.csv b/leaderboard_table_20241222.csv deleted file mode 100644 index 4b7e61281f6ca81960e39b70fbcb3ad6a5360092..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241222.csv +++ /dev/null @@ -1,227 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B diff --git a/leaderboard_table_20241230.csv b/leaderboard_table_20241230.csv deleted file mode 100644 index 2ce7d9013fc19dd9eafb6f48c133f1d3e9fcd6d6..0000000000000000000000000000000000000000 --- a/leaderboard_table_20241230.csv +++ /dev/null @@ -1,230 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct diff --git a/leaderboard_table_20250105.csv b/leaderboard_table_20250105.csv deleted file mode 100644 index 51cc2f12c6fab28f96c697817b47b09ac17032ea..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250105.csv +++ /dev/null @@ -1,239 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Stability AI,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,flux-1-dev,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e diff --git a/leaderboard_table_20250115.csv b/leaderboard_table_20250115.csv deleted file mode 100644 index 5ed09ddaaf031b1cb4a76e1d83b18b95e4462643..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250115.csv +++ /dev/null @@ -1,242 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Stability AI,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,flux-1-dev,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e diff --git a/leaderboard_table_20250119.csv b/leaderboard_table_20250119.csv deleted file mode 100644 index 2e3372ae35774dbe75966774f51f9023ac755209..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250119.csv +++ /dev/null @@ -1,244 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Stability AI,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,flux-1-dev,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e diff --git a/leaderboard_table_20250121.csv b/leaderboard_table_20250121.csv deleted file mode 100644 index 36fd4b57e9a76768863238e033b5ccadd4ada737..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250121.csv +++ /dev/null @@ -1,245 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Stability AI,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,flux-1-dev,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e diff --git a/leaderboard_table_20250122.csv b/leaderboard_table_20250122.csv deleted file mode 100644 index 967467690534055ba540c170c4970ed83183fa7a..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250122.csv +++ /dev/null @@ -1,246 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250124.csv b/leaderboard_table_20250124.csv deleted file mode 100644 index 3ba44b326fd28bf8ed5cbf9ff4e2da0c6e49a896..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250124.csv +++ /dev/null @@ -1,247 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250128.csv b/leaderboard_table_20250128.csv deleted file mode 100644 index 3ba44b326fd28bf8ed5cbf9ff4e2da0c6e49a896..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250128.csv +++ /dev/null @@ -1,247 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250203.csv b/leaderboard_table_20250203.csv deleted file mode 100644 index 76bc7c8e46006e1a76e05e9ac0029472bb59805d..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250203.csv +++ /dev/null @@ -1,249 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen-max-2025-01-25,Qwen-Max-2025-01-25,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250205.csv b/leaderboard_table_20250205.csv deleted file mode 100644 index f7be331b20e6f9493ac999c438ffc6a43ab80f25..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250205.csv +++ /dev/null @@ -1,252 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview,Gemini-2.0-Flash-Lite-Preview,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250206.csv b/leaderboard_table_20250206.csv deleted file mode 100644 index cf1af356e02a9f7146385d4707410d8db85910e0..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250206.csv +++ /dev/null @@ -1,254 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview,Gemini-2.0-Flash-Lite-Preview,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250209.csv b/leaderboard_table_20250209.csv deleted file mode 100644 index 310fc07fbad09681eca23ea8bbe0158501a91948..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250209.csv +++ /dev/null @@ -1,255 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite-Preview-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250211.csv b/leaderboard_table_20250211.csv deleted file mode 100644 index d2a9eae7d08db7b9ba2093ad70cbbe331efc22cf..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250211.csv +++ /dev/null @@ -1,257 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite-Preview-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -chatgpt-4o-latest-20250129,ChatGPT-4o-latest (2025-01-29),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -mistral-small-24b-instruct-2501,Mistral-Small-24B-Instruct-2501 ,-,-,-,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250214.csv b/leaderboard_table_20250214.csv deleted file mode 100644 index f4a55ca54d9e20e01cccf3ae8da86755ee98e1a5..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250214.csv +++ /dev/null @@ -1,258 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite-Preview-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -chatgpt-4o-latest-20250129,ChatGPT-4o-latest (2025-01-29),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -mistral-small-24b-instruct-2501,Mistral-Small-24B-Instruct-2501,-,-,-,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501 -qwen-plus-0125,Qwen-Plus-0125,-,-,-,Proprietary,Alibaba,https://www.alibabacloud.com/help/en/model-studio/developer-reference/what-is-qwen-llm -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250217.csv b/leaderboard_table_20250217.csv deleted file mode 100644 index 92b8ee7cb100ccd97d843188275469bf31a8671e..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250217.csv +++ /dev/null @@ -1,259 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite-Preview-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -chatgpt-4o-latest-20250129,ChatGPT-4o-latest (2025-01-29),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -mistral-small-24b-instruct-2501,Mistral-Small-24B-Instruct-2501,-,-,-,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501 -qwen-plus-0125,Qwen-Plus-0125,-,-,-,Proprietary,Alibaba,https://www.alibabacloud.com/help/en/model-studio/developer-reference/what-is-qwen-llm -early-grok-3,chocolate (Early Grok-3),-,-,-,Proprietary,xAI,https://x.com/lmarena_ai/status/1891706264800936307 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250221.csv b/leaderboard_table_20250221.csv deleted file mode 100644 index 21feaedddcaefd55ed71e131883b1ae989427d2c..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250221.csv +++ /dev/null @@ -1,260 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite-Preview-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -chatgpt-4o-latest-20250129,ChatGPT-4o-latest (2025-01-29),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -mistral-small-24b-instruct-2501,Mistral-Small-24B-Instruct-2501,-,-,-,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501 -qwen-plus-0125,Qwen-Plus-0125,-,-,-,Proprietary,Alibaba,https://www.alibabacloud.com/help/en/model-studio/developer-reference/what-is-qwen-llm -early-grok-3,chocolate (Early Grok-3),-,-,-,Proprietary,xAI,https://x.com/lmarena_ai/status/1891706264800936307 -o3-mini-high,o3-mini-high,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/guides/reasoning#reasoning-effort -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250227.csv b/leaderboard_table_20250227.csv deleted file mode 100644 index 59c2802a56b57e87a954f952f863800445bc15f2..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250227.csv +++ /dev/null @@ -1,263 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite-Preview-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite-preview-02-05 -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -chatgpt-4o-latest-20250129,ChatGPT-4o-latest (2025-01-29),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -mistral-small-24b-instruct-2501,Mistral-Small-24B-Instruct-2501,-,-,-,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501 -qwen-plus-0125,Qwen-Plus-0125,-,-,-,Proprietary,Alibaba,https://www.alibabacloud.com/help/en/model-studio/developer-reference/what-is-qwen-llm -early-grok-3,chocolate (Early Grok-3),-,-,-,Proprietary,xAI,https://x.com/lmarena_ai/status/1891706264800936307 -o3-mini-high,o3-mini-high,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/guides/reasoning#reasoning-effort -claude-3-7-sonnet-20250219,Claude 3.7 Sonnet,-,-,-,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-7-sonnet -hunyuan-large-2025-02-10,Hunyuan-Large-2025-02-10,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -hunyuan-standard-2025-02-10,Hunyuan-Standard-2025-02-10,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250303.csv b/leaderboard_table_20250303.csv deleted file mode 100644 index 39510478e44fea816ded60d5962cc6b21c081494..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250303.csv +++ /dev/null @@ -1,266 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -chatgpt-4o-latest-20250129,ChatGPT-4o-latest (2025-01-29),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -mistral-small-24b-instruct-2501,Mistral-Small-24B-Instruct-2501,-,-,-,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501 -qwen-plus-0125,Qwen-Plus-0125,-,-,-,Proprietary,Alibaba,https://www.alibabacloud.com/help/en/model-studio/developer-reference/what-is-qwen-llm -early-grok-3,chocolate (Early Grok-3),-,-,-,Proprietary,xAI,https://x.com/lmarena_ai/status/1891706264800936307 -o3-mini-high,o3-mini-high,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/guides/reasoning#reasoning-effort -claude-3-7-sonnet-20250219,Claude 3.7 Sonnet,-,-,-,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-7-sonnet -hunyuan-large-2025-02-10,Hunyuan-Large-2025-02-10,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -hunyuan-standard-2025-02-10,Hunyuan-Standard-2025-02-10,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -gpt-4.5-preview-2025-02-27,GPT-4.5-Preview,-,-,-,Proprietary,OpenAI,https://openai.com/index/introducing-gpt-4-5/ -hunyuan-standard-vision-2024-12-31,Hunyuan-Standard-Vision-2024-12-31,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -grok-3-preview-02-24,Grok-3-Preview-02-24,-,-,-,Proprietary,xAI,https://x.ai/blog/grok-3 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/leaderboard_table_20250311.csv b/leaderboard_table_20250311.csv deleted file mode 100644 index 9027fed42ef69e73a8a467217b1994797140e425..0000000000000000000000000000000000000000 --- a/leaderboard_table_20250311.csv +++ /dev/null @@ -1,267 +0,0 @@ -key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link -wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0 -vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k -wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1 -tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b -guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged -openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor -wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0 -vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k -baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b -xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst -nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b -mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct -falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct -h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b -gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4 -claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo -claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2 -claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1 -gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -mixtral-8x7b-instruct-v0.1,Mixtral-8x7B-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/ -claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude -gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/ -gemini-pro-dev-api,Gemini-1.0-Pro-001,-,0.718,2023/4,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.0-pro -bard-jan-24-gemini-pro,Gemini App (2024-01-24),-,-,Online,Proprietary,Google,https://gemini.google.com/app -wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0 -vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3 -starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha -pplx-70b-online,pplx-70B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5 -openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7B,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B -gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5 -llama-2-70b-chat,Llama-2-70B-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf -solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0 -dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b -wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2 -zephyr-7b-beta,Zephyr-7B-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta -mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat -vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5 -qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat -zephyr-7b-alpha,Zephyr-7B-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha -codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf -falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat -guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged -llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf -mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 -pplx-7b-online,pplx-7B-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms -llama-2-7b-chat,Llama-2-7B-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf -vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5 -palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models -koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/ -chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b -gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy -mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat -chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b -RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven -alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html -oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 -chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b -fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0 -stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b -dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b -llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971 -mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/ -llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat -stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B -deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat -gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday -qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106 -nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO -gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo -mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat -mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/ -gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it -gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it -mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 -claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf -olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct -claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family -starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta -command-r,Command R (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r -qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/ -qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/ -command-r-plus,Command R+ (04-2024),-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/ -gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it -dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm -gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 -gemma-1.1-2b-it,Gemma-1.1-2b-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-2b-it -reka-flash-21b-20240226,Reka-Flash-21B,-,0.735,2023/11,Proprietary,Reka AI,https://www.reka.ai/news/reka-flash-efficient-and-capable-multimodal-language-models -reka-flash-21b-20240226-online,Reka-Flash-21B-online,-,-,Online,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -zephyr-orpo-141b-A35b-v0.1,Zephyr-ORPO-141b-A35b-v0.1,-,-,2024/4,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1 -mixtral-8x22b-instruct-v0.1,Mixtral-8x22b-Instruct-v0.1,-,0.778,2024/4,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-8x22b/ -llama-3-70b-instruct,Llama-3-70B-Instruct,-,0.820,2023/12,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -llama-3-8b-instruct,Llama-3-8B-Instruct,-,0.684,2023/3,Llama 3 Community,Meta,https://llama.meta.com/llama3/ -gemini-1.5-pro-api-0409-preview,Gemini-1.5-Pro-Preview-0409,-,0.819,2023/11,Proprietary,Google,https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/ -phi-3-mini-128k-instruct,Phi-3-Mini-128k-Instruct,-,0.681,2023/10,MIT,Microsoft,https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/ -snowflake-arctic-instruct,Snowflake Arctic Instruct,-,0.673,2024/4,Apache 2.0,Snowflake,https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/ -qwen-max-0428,Qwen-Max-0428,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/api-details -qwen1.5-110b-chat,Qwen1.5-110B-Chat,8.88,0.804,2024/4,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-110b/ -reka-core-20240501,Reka-Core-20240501,-,0.832,-,Proprietary,Reka AI,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model -gpt-4o-2024-05-13,GPT-4o-2024-05-13,-,0.887,2023/10,Proprietary,OpenAI,https://openai.com/index/hello-gpt-4o/ -phi-3-mini-4k-instruct,Phi-3-Mini-4k-Instruct,-,0.688,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -yi-large-preview,Yi-Large-preview,-,-,Unknown,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B -glm-4-0116,GLM-4-0116,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -gemini-advanced-0514,Gemini Advanced App (2024-05-14),-,-,Online,Proprietary,Google,https://gemini.google.com/advanced -gemini-1.5-pro-api-0514,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-pro-001,Gemini-1.5-Pro-001,-,0.859,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro -gemini-1.5-flash-api-0514,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -gemini-1.5-flash-001,Gemini-1.5-Flash-001,-,0.789,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash -yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat -yi-1.5-34b-chat,Yi-1.5-34B-Chat,-,0.768,2024/5,Apache-2.0,01 AI,https://huggingface.co/01-ai/Yi-1.5-34B-Chat -phi-3-small-8k-instruct,Phi-3-Small-8k-Instruct,-,0.757,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-small-8k-instruct -phi-3-medium-4k-instruct,Phi-3-Medium-4k-Instruct,-,0.780,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-medium-4k-instruct -qwen2-72b-instruct,Qwen2-72B-Instruct,9.12,0.842,2024/6,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen2/ -yi-large,Yi-Large,-,-,Unknown,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -nemotron-4-340b-instruct,Nemotron-4-340B-Instruct,-,-,2023/6,NVIDIA Open Model,Nvidia,https://huggingface.co/nvidia/Nemotron-4-340B-Instruct -reka-flash-preview-20240611,Reka-Flash-Preview-20240611,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/http-api.html#generation -glm-4-0520,GLM-4-0520,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/dev/api#language -deepseek-coder-v2,DeepSeek-Coder-V2-Instruct,-,-,2024/6,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct -claude-3-5-sonnet-20240620,Claude 3.5 Sonnet (20240620),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-5-sonnet -gemma-2-27b-it,Gemma-2-27B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-27b-it -gemma-2-9b-it,Gemma-2-9B-it,-,-,2024/6,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-9b-it -llava-v1.6-34b,LLaVA-v1.6-34B,-,-,2024/1,Apache 2.0,LLaVA,https://llava-vl.github.io/blog/2024-01-30-llava-next/ -phi-3-mini-4k-instruct-june-2024,Phi-3-Mini-4K-Instruct-June-24,-,0.709,2023/10,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-mini-4k-instruct -deepseek-v2-api-0628,Deepseek-v2-API-0628,-,-,-,DeepSeek,DeepSeek AI,https://platform.deepseek.com/api-docs/updates#deepseek-chat -cogvlm2-llama3-chat-19b,CogVLM2-llama3-chat-19b,-,-,2024/7,CogVLM2,Zhipu AI,https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B -gpt-4o-mini-2024-07-18,GPT-4o-mini-2024-07-18,-,0.820,2023/10,Proprietary,OpenAI,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/ -llama-3.1-405b-instruct-fp8,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct-bf16,Meta-Llama-3.1-405B-Instruct-bf16,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-405b-instruct,Meta-Llama-3.1-405B-Instruct-fp8,-,0.886,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-70b-instruct,Meta-Llama-3.1-70B-Instruct,-,0.860,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -llama-3.1-8b-instruct,Meta-Llama-3.1-8B-Instruct,-,0.730,2023/12,Llama 3.1 Community,Meta,https://ai.meta.com/blog/meta-llama-3-1/ -athene-70b-0725,Athene-70B,-,-,2024/7,CC-BY-NC-4.0,NexusFlow,https://huggingface.co/Nexusflow/Athene-70B -internvl2-26b,InternVL2-26B,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -gemma-2-2b-it,Gemma-2-2b-it,-,0.513,2024/7,Gemma license,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemma-2-2b-it -glm-4-air,GLM-4-AIR,-,-,Unknown,Proprietary,Zhipu AI,https://open.bigmodel.cn/ -snorkel-mistral-pairrm-dpo,Snorkel-Mistral-PairRM-DPO,-,-,2024/5,Apache 2.0,Snorkel AI,https://huggingface.co/snorkelai/Snorkel-Mistral-PairRM-DPO -mistral-large-2407,Mistral-Large-2407,-,-,2024/7,Mistral Research,Mistral,https://mistral.ai/news/mistral-large-2407/ -gemini-1.5-pro-exp-0801,Gemini-1.5-Pro-Exp-0801,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0801 -reka-core-20240722,Reka-Core-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240722,Reka-Flash-20240722,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -deepseek-coder-v2-0724,Deepseek-Coder-v2-0724,-,-,-,Proprietary,DeepSeek,https://platform.deepseek.com/api-docs/updates/#version-2024-07-24 -chatgpt-4o-latest,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -chatgpt-4o-latest-20240808,ChatGPT-4o-latest (2024-08-08),-,-,2023/10,Proprietary,OpenAI,https://x.com/OpenAIDevs/status/1823510395619000525 -gpt-4o-2024-08-06,GPT-4o-2024-08-06,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4o -jamba-1.5-large,Jamba-1.5-Large,-,0.812,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -jamba-1.5-mini,Jamba-1.5-Mini,-,0.697,2024/3,Jamba Open,AI21 Labs,https://www.ai21.com/jamba -minicpm-v-2_6,MiniCPM-v 2_6,-,-,2024/7,Apache 2.0,OpenBMB,https://huggingface.co/openbmb/MiniCPM-V-2_6 -phi-3-vision-128k-instruct,Phi-3-Vision-128K-Instruct,-,-,2024/3,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3-vision-128k-instruct -grok-2-2024-08-13,Grok-2-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -grok-2-mini-2024-08-13,Grok-2-Mini-08-13,-,-,2024/3,Proprietary,xAI,https://x.ai/blog/grok-2 -gemini-1.5-pro-exp-0827,Gemini-1.5-Pro-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-exp-0827 -gemini-1.5-flash-exp-0827,Gemini-1.5-Flash-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-exp-0827 -gemini-1.5-flash-8b-exp-0827,Gemini-1.5-Flash-8B-Exp-0827,-,-,2023/11,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-8b-exp-0827 -internvl2-4b,InternVL2-4b,-,-,2024/7,MIT,OpenGVLab,https://internvl.github.io/blog/2024-07-02-InternVL-2.0/ -command-r-plus-08-2024,Command R+ (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -command-r-08-2024,Command R (08-2024),-,-,2024/8,CC-BY-NC-4.0,Cohere,https://docs.cohere.com/docs/command-r-plus#model-details -gemma-2-9b-it-simpo,Gemma-2-9B-it-SimPO,-,-,2024/7,MIT,Princeton,https://huggingface.co/princeton-nlp/gemma-2-9b-it-SimPO -yi-vision,Yi-Vision,-,-,2024/7,Proprietary,01 AI,https://platform.01.ai/docs#models-and-pricing -llava-onevision-qwen2-72b-ov,LLaVA-OneVision-qwen2-72B-ov-sft,-,-,2024/8,Apache 2.0,LLaVA,https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov -phi-3.5-vision-instruct,Phi-3.5-vision-instruct,-,-,2024/8,MIT,Microsoft,https://huggingface.co/microsoft/Phi-3.5-vision-instruct -deepseek-v2.5,Deepseek-v2.5,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5 -qwen-plus-0828,Qwen-Plus-0828,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models -qwen2-vl-7b-instruct,Qwen2-VL-7B-Instruct,-,-,-,Apache 2.0,Aliaba,https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct -qwen-vl-max-0809,Qwen2-VL-72B,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -chatgpt-4o-latest-20240903,ChatGPT-4o-latest (2024-09-03),-,-,2023/10,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -o1-preview,o1-preview,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -o1-mini,o1-mini,-,-,2023/10,Proprietary,OpenAI,https://platform.openai.com/docs/models/o1 -qwen2.5-72b-instruct,Qwen2.5-72B-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/blog/qwen2.5/ -internlm2_5-20b-chat,InternLM2.5-20B-chat,-,-,2024/8,Other,InternLM,https://huggingface.co/internlm/internlm2_5-20b-chat -llama-3.2-3b-instruct,Meta-Llama-3.2-3B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-1b-instruct,Meta-Llama-3.2-1B-Instruct,-,-,2023/12,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-90b-instruct,Llama-3.2-90B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -llama-3.2-vision-11b-instruct,Llama-3.2-11B-Vision-Instruct,-,-,2023/11,Llama 3.2,Meta,https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/ -pixtral-12b-2409,Pixtral-12B-2409,-,-,2024/9,Apache 2.0,Mistral,https://mistral.ai/news/pixtral-12b/ -qwen2-vl-72b,Qwen2-VL-72b-Instruct,-,-,2024/9,Qwen,Alibaba,https://qwenlm.github.io/zh/blog/qwen2-vl/ -gemini-1.5-pro-002,Gemini-1.5-Pro-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-pro-002 -gemini-1.5-flash-002,Gemini-1.5-Flash-002,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-002 -gemini-1.5-flash-8b-001,Gemini-1.5-Flash-8B-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys&model=gemini-1.5-flash-8b -glm-4-plus,GLM-4-Plus,-,-,-,Proprietary,Zhipu AI,https://bigmodel.cn/dev/howuse/glm-4 -yi-lightning,Yi-Lightning,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -yi-lightning-lite,Yi-Lightning-lite,-,-,-,Proprietary,01 AI,https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 -qwen-max-0919,Qwen-Max-0919,-,-,-,Qwen,Alibaba,https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -llama-3.1-nemotron-70b-instruct,Llama-3.1-Nemotron-70B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct -llama-3.1-nemotron-51b-instruct,Llama-3.1-Nemotron-51B-Instruct,-,-,2023/12,Llama 3.1,Nvidia,https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct -claude-3-5-sonnet-20241022,Claude 3.5 Sonnet (20241022),-,0.887,2024/4,Proprietary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -molmo-72b-0924,Molmo-72B-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-72B-0924 -molmo-7b-d-0924,Molmo-7B-D-0924,-,-,-,Apache 2.0,AI2,https://huggingface.co/allenai/Molmo-7B-D-0924 -reka-core-20240904,Reka-Core-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -reka-flash-20240904,Reka-Flash-20240904,-,-,-,Proprietary,Reka AI,https://docs.reka.ai/available-models -hunyuan-standard-256k,Hunyuan-Standard-256K,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -ministral-8b-2410,Ministral-8B-2410,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Ministral-8B-Instruct-2410 -gemini-exp-1114,Gemini-Exp-1114,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1114&model=gemini-exp-1114 -chatgpt-4o-latest-20241120,ChatGPT-4o-latest (2024-11-20),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -qwen2.5-coder-32b-instruct,Qwen2.5-Coder-32B-Instruct,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct -granite-3.0-8b-instruct,Granite-3.0-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -granite-3.0-2b-instruct,Granite-3.0-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.0-2b-instruct -gemini-exp-1121,Gemini-Exp-1121,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-exp-1121 -step-1v-32k,Step-1V-32K,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -athene-v2-chat,Athene-v2-Chat-72B,-,-,-,NexusFlow,NexusFlow,https://huggingface.co/Nexusflow/Athene-V2-Chat -c4ai-aya-expanse-32b,Aya-Expanse-32B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-32b -mistral-large-2411,Mistral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 -pixtral-large-2411,Pixtral-Large-2411,-,-,-,MRL,Mistral,https://huggingface.co/mistralai/Pixtral-Large-Instruct-2411 -gemini-exp-1206,Gemini-Exp-1206,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-exp-1206 -gemini-2.0-flash-exp,Gemini-2.0-Flash-Exp,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-exp -claude-3-5-haiku-20241022,Claude 3.5 Haiku (20241022),-,-,-,Propretary,Anthropic,https://www.anthropic.com/news/3-5-models-and-computer-use -llama-3.3-70b-instruct,Llama-3.3-70B-Instruct,-,-,-,Llama-3.3,Meta,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct -qwq-32b-preview,QwQ-32B-Preview,-,-,-,Apache 2.0,Alibaba,https://huggingface.co/Qwen/QwQ-32B-Preview -amazon-nova-pro-v1.0,Amazon Nova Pro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-lite-v1.0,Amazon Nova Lite 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -amazon-nova-micro-v1.0,Amazon Nova Micro 1.0,-,-,-,Proprietary,Amazon,https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html -c4ai-aya-expanse-8b,Aya-Expanse-8B,-,-,-,CC-BY-NC-4.0,Cohere,https://huggingface.co/CohereForAI/aya-expanse-8b -gemini-2.0-flash-thinking-exp-1219,Gemini-2.0-Flash-Thinking-Exp-1219,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-1219 -qwen-vl-max-1119,Qwen-VL-Max-1119,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/user-guide/vision/?spm=a2c4g.11186623.0.0.33d259a8vPlZoe#f1cbd5b8a8k5w -deepseek-v2.5-1210,Deepseek-v2.5-1210,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V2.5-1210 -qwen2.5-plus-1127,Qwen2.5-plus-1127,-,-,-,Proprietary,Alibaba,https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.i7 -nvila-internal-15b-v1,NVILA-15B,-,-,-,-,NVIDIA,https://huggingface.co/Efficient-Large-Model/NVILA-15B -o1-2024-12-17,o1-2024-12-17,-,-,-,Proprietary,OpenAI,https://openai.com/index/o1-and-new-tools-for-developers/ -deepseek-v3,DeepSeek-V3,-,-,-,DeepSeek,DeepSeek,https://huggingface.co/deepseek-ai/DeepSeek-V3 -smollm2-1.7b-instruct,SmolLM2-1.7B-Instruct,-,-,-,Apache 2.0,HuggingFace,https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct -llama-3.1-tulu-3-8b,Llama-3.1-Tulu-3-8B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B -llama-3.1-tulu-3-70b,Llama-3.1-Tulu-3-70B,-,-,-,Llama 3.1,Ai2,https://huggingface.co/allenai/Llama-3.1-Tulu-3-70B -step-2-16k-exp-202412,Step-2-16K-Exp,-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/text -granite-3.1-8b-instruct,Granite-3.1-8B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-8b-instruct -granite-3.1-2b-instruct,Granite-3.1-2B-Instruct,-,-,-,Apache 2.0,IBM,https://huggingface.co/ibm-granite/granite-3.1-2b-instruct -phi-4,Phi-4,-,-,-,MIT,Microsoft,https://huggingface.co/microsoft/phi-4 -gemini-2.0-flash-thinking-exp-01-21,Gemini-2.0-Flash-Thinking-Exp-01-21,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-thinking-exp-01-21 -step-1o-vision-32k-highres,Step-1o-Vision-32k (highres),-,-,-,Proprietary,StepFun,https://platform.stepfun.com/docs/llm/vision -deepseek-r1,DeepSeek-R1,-,-,-,MIT,DeepSeek,https://api-docs.deepseek.com/news/news250120 -qwen2.5-max,Qwen2.5-Max,-,-,-,Proprietary,Alibaba,https://qwenlm.github.io/blog/qwen2.5-max/ -gemini-2.0-pro-exp-02-05,Gemini-2.0-Pro-Exp-02-05,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-pro-exp-02-05 -gemini-2.0-flash-001,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -gemini-2.0-flash-lite-preview-02-05,Gemini-2.0-Flash-Lite,-,-,-,Proprietary,Google,https://aistudio.google.com/prompts/new_chat?model=gemini-2.0-flash-lite -gemini-2.0-flash,Gemini-2.0-Flash-001,-,-,-,Proprietary,Google,https://aistudio.google.com/app/prompts/new_chat?instructions=lmsys-1121&model=gemini-2.0-flash-001 -o3-mini,o3-mini,-,-,-,Proprietary,OpenAI,https://openai.com/index/openai-o3-mini/ -glm-4-plus-0111,GLM-4-Plus-0111,-,-,-,Proprietary,Zhipu,https://bigmodel.cn/dev/howuse/glm-4 -qwen2.5-vl-72b-instruct,Qwen2.5-VL-72B-Instruct,-,-,-,Qwen,Alibaba,https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct -chatgpt-4o-latest-20250129,ChatGPT-4o-latest (2025-01-29),-,-,-,Proprietary,OpenAI,https://help.openai.com/en/articles/9624314-model-release-notes -mistral-small-24b-instruct-2501,Mistral-Small-24B-Instruct-2501,-,-,-,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501 -qwen-plus-0125,Qwen-Plus-0125,-,-,-,Proprietary,Alibaba,https://www.alibabacloud.com/help/en/model-studio/developer-reference/what-is-qwen-llm -early-grok-3,chocolate (Early Grok-3),-,-,-,Proprietary,xAI,https://x.com/lmarena_ai/status/1891706264800936307 -o3-mini-high,o3-mini-high,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/guides/reasoning#reasoning-effort -claude-3-7-sonnet-20250219,Claude 3.7 Sonnet,-,-,-,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-7-sonnet -hunyuan-large-2025-02-10,Hunyuan-Large-2025-02-10,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -hunyuan-standard-2025-02-10,Hunyuan-Standard-2025-02-10,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -gpt-4.5-preview-2025-02-27,GPT-4.5-Preview,-,-,-,Proprietary,OpenAI,https://openai.com/index/introducing-gpt-4-5/ -gemma-3-27b-it,Gemma-3-27B-it,-,-,-,Gemma,Google,http://aistudio.google.com/app/prompts/new_chat?model=gemma-3-27b-it -hunyuan-standard-vision-2024-12-31,Hunyuan-Standard-Vision-2024-12-31,-,-,-,Proprietary,Tencent,https://cloud.tencent.com/document/product/1729/104753 -grok-3-preview-02-24,Grok-3-Preview-02-24,-,-,-,Proprietary,xAI,https://x.ai/blog/grok-3 -flux-1.1-pro,FLUX1.1 [pro],-,-,-,Proprietary,Black Forest Labs,https://replicate.com/black-forest-labs/flux-1.1-pro -recraft-v3,Recraft V3,-,-,-,Proprietary,Recraft,https://www.recraft.ai/blog/recraft-introduces-a-revolutionary-ai-model-that-thinks-in-design-language -photon,Luma Photon,-,-,-,Proprietary,Luma AI,https://replicate.com/luma/photon -ideogram-v2,Ideogram 2.0,-,-,-,Proprietary,Ideogram,https://replicate.com/ideogram-ai/ideogram-v2 -stable-diffusion-v35-large,Stable Diffusion 3.5 Large,-,-,-,Open,Stability AI,https://fal.ai/models/fal-ai/stable-diffusion-v35-large -flux-1-dev-fp8,FLUX.1 [dev] (fp8),-,-,-,Open,Black Forest Labs,https://fireworks.ai/models/fireworks/flux-1-dev-fp8 -dall-e-3,DALLΒ·E 3,-,-,-,Proprietary,OpenAI,https://platform.openai.com/docs/models#dall-e -imagen-3.0-generate-002,Imagen-3.0-generate-002,-,-,-,Proprietary,Google,https://deepmind.google/technologies/imagen-3/ diff --git a/requirements.txt b/requirements.txt index c697e19933bbaf47656a32e0484e1b002890a257..d42d0ad03bdf8ecf9756a38df5cedf8fe431db79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1 @@ -plotly<=5.24.1 -git+https://github.com/lm-sys/FastChat.git@main#egg=fschat[model_worker,webui] -accelerate>=0.21 -peft -sentencepiece -torch -transformers>=4.31.0 -protobuf -scipy \ No newline at end of file +plotly \ No newline at end of file diff --git a/style.css b/style.css deleted file mode 100644 index 3455ab1e72cbc7a87efc66294f26dc70d8fc9dae..0000000000000000000000000000000000000000 --- a/style.css +++ /dev/null @@ -1,39 +0,0 @@ -body { - padding: 2rem; - font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif; - display: flex; - justify-content: center; - align-items: center; - height: 100vh; /* Make the body take the full viewport height */ - font-size: 30px; /* Increase the font size */ - text-align: center; /* Center the text */ -} - -h1 { - font-size: 16px; - margin-top: 0; -} - -p { - color: rgb(107, 114, 128); - font-size: 15px; - margin-bottom: 10px; - margin-top: 5px; -} - -.card { - max-width: 620px; - margin: 0 auto; - padding: 16px; - border: 1px solid lightgray; - border-radius: 16px; -} - -.card p:last-child { - margin-bottom: 0; -} - -/* Additional style to ensure the link is also affected by the font size */ -a { - font-size: inherit; /* Ensures that links inherit the body's font size */ -} \ No newline at end of file diff --git a/theme.json b/theme.json deleted file mode 100644 index 866df9e3d4d2432f715dd93710aec5f146231a67..0000000000000000000000000000000000000000 --- a/theme.json +++ /dev/null @@ -1 +0,0 @@ -{"theme": {"text_size": "20px", "background_fill_primary": "white", "background_fill_primary_dark": "*neutral_950", "background_fill_secondary": "*neutral_50", "background_fill_secondary_dark": "*neutral_900", "block_background_fill": "*background_fill_primary", "block_background_fill_dark": "*neutral_800", "block_border_color": "*border_color_primary", "block_border_color_dark": "*border_color_primary", "block_border_width": "1px", "block_border_width_dark": "1px", "block_info_text_color": "*body_text_color_subdued", "block_info_text_color_dark": "*body_text_color_subdued", "block_info_text_size": "*text_sm", "block_info_text_weight": "400", "block_label_background_fill": "*background_fill_primary", "block_label_background_fill_dark": "*background_fill_secondary", "block_label_border_color": "*border_color_primary", "block_label_border_color_dark": "*border_color_primary", "block_label_border_width": "1px", "block_label_border_width_dark": "1px", "block_label_margin": "0", "block_label_padding": "*spacing_sm *spacing_lg", "block_label_radius": "calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px) 0", "block_label_right_radius": "0 calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px)", "block_label_shadow": "*block_shadow", "block_label_text_color": "*neutral_500", "block_label_text_color_dark": "*neutral_200", "block_label_text_size": "*text_sm", "block_label_text_weight": "400", "block_padding": "*spacing_xl calc(*spacing_xl + 2px)", "block_radius": "*radius_lg", "block_shadow": "none", "block_shadow_dark": "none", "block_title_background_fill": "none", "block_title_background_fill_dark": "none", "block_title_border_color": "none", "block_title_border_color_dark": "none", "block_title_border_width": "0px", "block_title_border_width_dark": "0px", "block_title_padding": "0", "block_title_radius": "none", "block_title_text_color": "*neutral_500", "block_title_text_color_dark": "*neutral_200", "block_title_text_size": "*text_md", "block_title_text_weight": "400", "body_background_fill": "*background_fill_primary", "body_background_fill_dark": "*background_fill_primary", "body_text_color": "*neutral_700", "body_text_color_dark": "*neutral_200", "body_text_color_subdued": "*neutral_400", "body_text_color_subdued_dark": "*neutral_500", "body_text_size": "*text_md", "body_text_weight": "400", "border_color_accent": "*primary_300", "border_color_accent_dark": "*neutral_600", "border_color_primary": "*neutral_200", "border_color_primary_dark": "*neutral_700", "button_border_width": "*input_border_width", "button_border_width_dark": "*input_border_width", "button_cancel_background_fill": "*button_secondary_background_fill", "button_cancel_background_fill_dark": "*button_secondary_background_fill", "button_cancel_background_fill_hover": "*button_cancel_background_fill", "button_cancel_background_fill_hover_dark": "*button_cancel_background_fill", "button_cancel_border_color": "*button_secondary_border_color", "button_cancel_border_color_dark": "*button_secondary_border_color", "button_cancel_border_color_hover": "*button_cancel_border_color", "button_cancel_border_color_hover_dark": "*button_cancel_border_color", "button_cancel_text_color": "*button_secondary_text_color", "button_cancel_text_color_dark": "*button_secondary_text_color", "button_cancel_text_color_hover": "*button_cancel_text_color", "button_cancel_text_color_hover_dark": "*button_cancel_text_color", "button_large_padding": "*spacing_lg calc(2 * *spacing_lg)", "button_large_radius": "*radius_lg", "button_large_text_size": "*text_lg", "button_large_text_weight": "500", "button_primary_background_fill": "*primary_200", "button_primary_background_fill_dark": "*primary_700", "button_primary_background_fill_hover": "*button_primary_background_fill", "button_primary_background_fill_hover_dark": "*button_primary_background_fill", "button_primary_border_color": "*primary_200", "button_primary_border_color_dark": "*primary_600", "button_primary_border_color_hover": "*button_primary_border_color", "button_primary_border_color_hover_dark": "*button_primary_border_color", "button_primary_text_color": "*primary_600", "button_primary_text_color_dark": "white", "button_primary_text_color_hover": "*button_primary_text_color", "button_primary_text_color_hover_dark": "*button_primary_text_color", "button_secondary_background_fill": "*neutral_200", "button_secondary_background_fill_dark": "*neutral_600", "button_secondary_background_fill_hover": "*neutral_300", "button_secondary_background_fill_hover_dark": "*neutral_500", "button_secondary_border_color": "*neutral_200", "button_secondary_border_color_dark": "*neutral_600", "button_secondary_border_color_hover": "*button_secondary_border_color", "button_secondary_border_color_hover_dark": "*button_secondary_border_color", "button_secondary_text_color": "*neutral_700", "button_secondary_text_color_dark": "white", "button_secondary_text_color_hover": "*button_secondary_text_color", "button_secondary_text_color_hover_dark": "*button_secondary_text_color", "button_shadow": "none", "button_shadow_active": "none", "button_shadow_hover": "none", "button_small_padding": "*spacing_sm calc(2 * *spacing_sm)", "button_small_radius": "*radius_lg", "button_small_text_size": "*text_md", "button_small_text_weight": "400", "button_transition": "background-color 0.2s ease", "checkbox_background_color": "*background_fill_primary", "checkbox_background_color_dark": "*neutral_800", "checkbox_background_color_focus": "*checkbox_background_color", "checkbox_background_color_focus_dark": "*checkbox_background_color", "checkbox_background_color_hover": "*checkbox_background_color", "checkbox_background_color_hover_dark": "*checkbox_background_color", "checkbox_background_color_selected": "*secondary_600", "checkbox_background_color_selected_dark": "*secondary_600", "checkbox_border_color": "*neutral_300", "checkbox_border_color_dark": "*neutral_700", "checkbox_border_color_focus": "*secondary_500", "checkbox_border_color_focus_dark": "*secondary_500", "checkbox_border_color_hover": "*neutral_300", "checkbox_border_color_hover_dark": "*neutral_600", "checkbox_border_color_selected": "*secondary_600", "checkbox_border_color_selected_dark": "*secondary_600", "checkbox_border_radius": "*radius_sm", "checkbox_border_width": "*input_border_width", "checkbox_border_width_dark": "*input_border_width", "checkbox_check": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e\")", "checkbox_label_background_fill": "*button_secondary_background_fill", "checkbox_label_background_fill_dark": "*button_secondary_background_fill", "checkbox_label_background_fill_hover": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_hover_dark": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_selected": "*checkbox_label_background_fill", "checkbox_label_background_fill_selected_dark": "*checkbox_label_background_fill", "checkbox_label_border_color": "*border_color_primary", "checkbox_label_border_color_dark": "*border_color_primary", "checkbox_label_border_color_hover": "*checkbox_label_border_color", "checkbox_label_border_color_hover_dark": "*checkbox_label_border_color", "checkbox_label_border_width": "*input_border_width", "checkbox_label_border_width_dark": "*input_border_width", "checkbox_label_gap": "*spacing_lg", "checkbox_label_padding": "*spacing_md calc(2 * *spacing_md)", "checkbox_label_shadow": "none", "checkbox_label_text_color": "*body_text_color", "checkbox_label_text_color_dark": "*body_text_color", "checkbox_label_text_color_selected": "*checkbox_label_text_color", "checkbox_label_text_color_selected_dark": "*checkbox_label_text_color", "checkbox_label_text_size": "*text_md", "checkbox_label_text_weight": "400", "checkbox_shadow": "*input_shadow", "color_accent": "*primary_500", "color_accent_soft": "*primary_50", "color_accent_soft_dark": "*neutral_700", "container_radius": "*radius_lg", "embed_radius": "*radius_md", "error_background_fill": "#fee2e2", "error_background_fill_dark": "*background_fill_primary", "error_border_color": "#fecaca", "error_border_color_dark": "*border_color_primary", "error_border_width": "1px", "error_border_width_dark": "1px", "error_text_color": "#ef4444", "error_text_color_dark": "#ef4444", "form_gap_width": "0px", "input_background_fill": "*neutral_100", "input_background_fill_dark": "*neutral_700", "input_background_fill_focus": "*secondary_500", "input_background_fill_focus_dark": "*secondary_600", "input_background_fill_hover": "*input_background_fill", "input_background_fill_hover_dark": "*input_background_fill", "input_border_color": "*border_color_primary", "input_border_color_dark": "*border_color_primary", "input_border_color_focus": "*secondary_300", "input_border_color_focus_dark": "*neutral_700", "input_border_color_hover": "*input_border_color", "input_border_color_hover_dark": "*input_border_color", "input_border_width": "0px", "input_border_width_dark": "0px", "input_padding": "*spacing_xl", "input_placeholder_color": "*neutral_400", "input_placeholder_color_dark": "*neutral_500", "input_radius": "*radius_lg", "input_shadow": "none", "input_shadow_dark": "none", "input_shadow_focus": "*input_shadow", "input_shadow_focus_dark": "*input_shadow", "input_text_size": "*text_md", "input_text_weight": "400", "layout_gap": "*spacing_xxl", "link_text_color": "*secondary_600", "link_text_color_active": "*secondary_600", "link_text_color_active_dark": "*secondary_500", "link_text_color_dark": "*secondary_500", "link_text_color_hover": "*secondary_700", "link_text_color_hover_dark": "*secondary_400", "link_text_color_visited": "*secondary_500", "link_text_color_visited_dark": "*secondary_600", "loader_color": "*color_accent", "loader_color_dark": "*color_accent", "name": "base", "neutral_100": "#f5f5f4", "neutral_200": "#e7e5e4", "neutral_300": "#d6d3d1", "neutral_400": "#a8a29e", "neutral_50": "#fafaf9", "neutral_500": "#78716c", "neutral_600": "#57534e", "neutral_700": "#44403c", "neutral_800": "#292524", "neutral_900": "#1c1917", "neutral_950": "#0f0e0d", "panel_background_fill": "*background_fill_secondary", "panel_background_fill_dark": "*background_fill_secondary", "panel_border_color": "*border_color_primary", "panel_border_color_dark": "*border_color_primary", "panel_border_width": "0", "panel_border_width_dark": "0", "primary_100": "#e0f2fe", "primary_200": "#bae6fd", "primary_300": "#7dd3fc", "primary_400": "#38bdf8", "primary_50": "#f0f9ff", "primary_500": "#0ea5e9", "primary_600": "#0284c7", "primary_700": "#0369a1", "primary_800": "#075985", "primary_900": "#0c4a6e", "primary_950": "#0b4165", "prose_header_text_weight": "500", "prose_text_size": "*text_md", "prose_text_weight": "400", "radio_circle": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e\")", "radius_lg": "3px", "radius_md": "3px", "radius_sm": "3px", "radius_xl": "3px", "radius_xs": "3px", "radius_xxl": "3px", "radius_xxs": "3px", "secondary_100": "#e0f2fe", "secondary_200": "#bae6fd", "secondary_300": "#7dd3fc", "secondary_400": "#38bdf8", "secondary_50": "#f0f9ff", "secondary_500": "#0ea5e9", "secondary_600": "#0284c7", "secondary_700": "#0369a1", "secondary_800": "#075985", "secondary_900": "#0c4a6e", "secondary_950": "#0b4165", "section_header_text_size": "*text_md", "section_header_text_weight": "400", "shadow_drop": "rgba(0,0,0,0.05) 0px 1px 2px 0px", "shadow_drop_lg": "0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)", "shadow_inset": "rgba(0,0,0,0.05) 0px 2px 4px 0px inset", "shadow_spread": "3px", "shadow_spread_dark": "1px", "slider_color": "*primary_600", "slider_color_dark": "*primary_600", "spacing_lg": "8px", "spacing_md": "6px", "spacing_sm": "4px", "spacing_xl": "10px", "spacing_xs": "2px", "spacing_xxl": "16px", "spacing_xxs": "1px", "stat_background_fill": "*primary_300", "stat_background_fill_dark": "*primary_500", "table_border_color": "*neutral_300", "table_border_color_dark": "*neutral_700", "table_even_background_fill": "white", "table_even_background_fill_dark": "*neutral_950", "table_odd_background_fill": "*neutral_50", "table_odd_background_fill_dark": "*neutral_900", "table_radius": "*radius_lg", "table_row_focus": "*color_accent_soft", "table_row_focus_dark": "*color_accent_soft", "text_lg": "20px", "text_md": "16px", "text_sm": "14px", "text_xl": "24px", "text_xs": "12px", "text_xxl": "28px", "text_xxs": "10px"}, "version": "0.0.1"} \ No newline at end of file