Spaces:

devingulliver
/

subquadratic-llm-leaderboard

Running

File size: 2,693 Bytes

b414398
 
0fade42
0dcfe74
 
b414398
 
 
7c67606
0fade42
8300932
7c67606
0fade42
7c67606
 
 
59d0aa9
 
 
7c67606
 
 
 
 
59d0aa9
 
 
387cdcd
b414398
 
0dcfe74
b414398
 
 
 
 
 
 
 
 
2d13ea1
 
b414398
 
 
 
 
 
 
 
 
 
 
 
0dcfe74

import os
import requests
import huggingface_hub
import gradio as gr

webhook_url = os.environ.get("WEBHOOK_URL")

def submit_model(name):
    try:
        huggingface_hub.hf_hub_download(repo_id=name, filename="config.json") # sanity check input
    except huggingface_hub.utils._errors.EntryNotFoundError:
        return "# ERROR: Model does not have a config.json file!"
    except huggingface_hub.utils._errors.RepositoryNotFoundError:
        return "# ERROR: Model could not be found on the Hugging Face Hub!"
    except requests.exceptions.HTTPError:
        return "# ERROR: Network error while validating model. Please try again later."
    except Exception as e:
        print(e)
        return "ERROR: Unexpected error. Please try again later."
    
    try:
        result = requests.post(webhook_url, json={"content":name})
    except requests.exceptions.HTTPError:
        return "# ERROR: Network error while contacting queue. Please try again in a few minutes."
    except Exception as e:
        print(e)
        return "ERROR: Unexpected error. Please try again later."
    
    return "# SUCCESS: Please wait up to 24 hours for your model to be added to the queue."

with gr.Blocks() as demo:
    gr.HTML('<style>.tab-buttons button{font-size:1.3em}</style><h1 style="text-align:center">Subquadratic LLM Leaderboard</h1>')

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.Tab("🏅 LLM Benchmark"):
            gr.Markdown("Table filters under construction")
            gr.Dataframe("data.csv")
        
        with gr.Tab("📝 About"):
            gr.Markdown("""
                The **Subquadratic LLM Leaderboard** evaluates LLMs with subquadratic architectures (ie RWKV & Mamba) with the goal of providing open evaluation results while the architectures themselves are pending inclusion in 🤗 Transformers.  
                The metrics are the same as the Open LLM Leaderboard: ARC 25-shot, HellaSwag 10-shot, MMLU 5-shot, TruthfulQA zeroshot, Winogrande 5-shot, and GSM8K 5-shot.  
                This leaderboard is maintained by Devin Gulliver and is still under construction, check back regularly for further improvements!
                """)
        
        with gr.Tab("🚀 Submit here!"):
            with gr.Group():
                with gr.Row():
                    model_name = gr.Textbox(max_lines=1, label="Model Name", scale=4)
                    submit = gr.Button("Submit", variant="primary", scale=0)
            
            output = gr.Markdown("Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
            submit.click(fn=submit_model, inputs=model_name, outputs=output)


demo.launch()