import os import pandas as pd import requests import huggingface_hub import gradio as gr data = pd.read_csv("data.csv", dtype="str") webhook_url = os.environ.get("WEBHOOK_URL") archlinks = { "Mamba": "https://arxiv.org/abs/2312.00752", "Jamba": "https://arxiv.org/abs/2403.19887", "Based": "https://arxiv.org/abs/2402.18668", "RWKV-4": "https://arxiv.org/abs/2305.13048", "RWKV-5": "https://substack.recursal.ai/p/rwkv-v5-15b-achieves-sota-multi-lingual", # paper soon! "StripedHyena": "https://www.together.ai/blog/stripedhyena-7b", # no paper? } def filter_table(cols, name, type, arch, size): tmp = data # filter tmp = tmp[tmp["Name"].str.contains(name, case=False)] tmp = tmp[tmp["Type"].isin(type)] tmp = tmp[tmp["Architecture"].isin(arch)] tmp = tmp[tmp["Model Size"].isin(size)] # prettify tmp["Type"] = tmp["Type"].apply(lambda x: x[0]) tmp = tmp.rename({"Type": "T"}, axis=1) tmp["Name"] = tmp["Name"].apply(lambda x: f'{x}') tmp["Architecture"] = tmp["Architecture"].apply(lambda x: f'{x}') tmp["Base Model"] = tmp["Base Model"].apply(lambda x: f'{x}' if x != "base" else "") # show/hide tmp = tmp.drop(cols, axis=1) # done! return tmp def submit_model(name): try: huggingface_hub.hf_hub_download(repo_id=name, filename="config.json") # sanity check input except huggingface_hub.utils._errors.EntryNotFoundError: return "# ERROR: Model does not have a config.json file!" except huggingface_hub.utils._errors.RepositoryNotFoundError: return "# ERROR: Model could not be found on the Hugging Face Hub!" except requests.exceptions.HTTPError: return "# ERROR: Network error while validating model. Please try again later." except Exception as e: print(e) return "ERROR: Unexpected error. Please try again later." try: result = requests.post(webhook_url, json={"content":name}) except requests.exceptions.HTTPError: return "# ERROR: Network error while contacting queue. Please try again in a few minutes." except Exception as e: print(e) return "ERROR: Unexpected error. Please try again later." return "# SUCCESS: Please wait up to 24 hours for your model to be added to the queue." with gr.Blocks(css=".gradio-container{max-width:95%!important} .tab-buttons button{font-size:1.3em}") as demo: gr.HTML('

Subquadratic LLM Leaderboard

') gr.Markdown("**REMEMBER:** If you don't see an eligible model here, make sure to submit it! We hope to incentivize subquadratic/attention-free LLM development through friendly competition.") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.Tab("🏅 LLM Benchmark"): with gr.Row(): with gr.Column(): namefilter = gr.Textbox(max_lines=1, placeholder="Search by model name and hit Enter...", show_label=False) typefilter = gr.CheckboxGroup(show_label=False, choices=list(data["Type"].unique()), value=[n for n in data["Type"].unique() if n not in ["⏳ Pending"]]) with gr.Column(): archfilter = gr.CheckboxGroup(label="Filter by model architecture", choices=list(data["Architecture"].unique()), value=list(data["Architecture"].unique())) sizefilter = gr.CheckboxGroup(label="Filter by model size", choices=list(data["Model Size"].unique()), value=list(data["Model Size"].unique())) with gr.Column(): colfilter = gr.CheckboxGroup(label="Hide columns", choices=list(data.columns)[2:], value=["MT-Bench (coming soon!)","Architecture","Model Size","Base Model"]) table = gr.Dataframe(filter_table(["MT-Bench (coming soon!)","Architecture","Model Size","Base Model"],"",[n for n in data["Type"].unique() if n not in ["⏳ Pending"]],list(data["Architecture"].unique()),list(data["Model Size"].unique())), datatype="markdown") # actions namefilter.submit(filter_table, [colfilter,namefilter,typefilter,archfilter,sizefilter], table) for filter in [colfilter,typefilter,archfilter,sizefilter]: filter.input(filter_table, [colfilter,namefilter,typefilter,archfilter,sizefilter], table) with gr.Tab("📝 About"): gr.Markdown(""" The **Subquadratic LLM Leaderboard** evaluates LLMs with subquadratic/attention-free architectures (i.e. RWKV & Mamba) with the goal of providing open evaluation results while the architectures themselves are pending inclusion/release in the 🤗 Transformers library. The metrics are the same as the Open LLM Leaderboard: ARC 25-shot, HellaSwag 10-shot, MMLU 5-shot, TruthfulQA zeroshot, Winogrande 5-shot, and GSM8K 5-shot. This leaderboard is maintained by Devin Gulliver and is perpetually under construction, check back regularly for further improvements! Compute for evaluating RWKV models is generously provided by [Recursal AI](https://recursal.ai). """) with gr.Tab("🚀 Submit here!"): with gr.Group(): with gr.Row(): model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4) submit = gr.Button("Submit", variant="primary", scale=0) output = gr.Markdown("Enter a public HF repo id, then hit Submit to add it to the evaluation queue.") submit.click(fn=submit_model, inputs=model_name, outputs=output) demo.launch(show_api=False, allowed_paths=["data.csv"])