open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

sheonhan commited on May 31, 2023

Commit

2a73469

•

1 Parent(s): c131125

Add citation button

Browse files

Files changed (2) hide show

app.py +39 -8
content.py +25 -11

app.py CHANGED Viewed

@@ -17,6 +17,8 @@ LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
 IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
 api = HfApi()
 def restart_space():
     api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
@@ -32,10 +34,11 @@ def get_all_requested_models(requested_models_dir):
     return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
 repo = None
 requested_models = None
 if H4_TOKEN:
-    print("pulling repo")
     # try:
     #     shutil.rmtree("./evals/")
     # except:
@@ -111,9 +114,10 @@ def has_no_nan_values(df, columns):
 def has_nan_values(df, columns):
     return df[columns].isna().any(axis=1)
 def get_leaderboard():
     if repo:
-        print("pulling changes")
         repo.git_pull()
     all_data = get_eval_results_dicts(IS_PUBLIC)
@@ -166,8 +170,9 @@ def get_leaderboard():
 def get_eval_table():
     if repo:
-        print("pulling changes for eval")
         repo.git_pull()
     entries = [
         entry
         for entry in os.listdir("evals/eval_requests")
@@ -221,7 +226,7 @@ def is_model_on_hub(model_name, revision) -> bool:
         return True
     except Exception as e:
-        print("Could not get the model config from the hub")
         print(e)
         return False
@@ -293,24 +298,50 @@ def refresh():
     finished_eval_queue, running_eval_queue, pending_eval_queue = get_eval_table()
     return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
 custom_css = """
 #changelog-text {
     font-size: 18px !important;
 }
 .markdown-text {
     font-size: 16px !important;
 }
 """
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
-    with gr.Row():
-        gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Accordion("CHANGELOG", open=False):
-        changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
     leaderboard_table = gr.components.Dataframe(
         value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5

 IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
 api = HfApi()
 def restart_space():
     api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
     return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
 repo = None
 requested_models = None
 if H4_TOKEN:
+    print("Pulling evaluation requests and results.")
     # try:
     #     shutil.rmtree("./evals/")
     # except:
 def has_nan_values(df, columns):
     return df[columns].isna().any(axis=1)
 def get_leaderboard():
     if repo:
+        print("Pulling evaluation results for the leaderboard.")
         repo.git_pull()
     all_data = get_eval_results_dicts(IS_PUBLIC)
 def get_eval_table():
     if repo:
+        print("Pulling changes for the evaluation queue.")
         repo.git_pull()
     entries = [
         entry
         for entry in os.listdir("evals/eval_requests")
         return True
     except Exception as e:
+        print("Could not get the model config from the hub.")
         print(e)
         return False
     finished_eval_queue, running_eval_queue, pending_eval_queue = get_eval_table()
     return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
 custom_css = """
 #changelog-text {
+    font-size: 16px !important;
+}
+#changelog-text h2 {
     font-size: 18px !important;
 }
 .markdown-text {
     font-size: 16px !important;
 }
+#citation-button span {
+    font-size: 16px !important;
+}
+#citation-button textarea {
+    font-size: 16px !important;
+}
+#citation-button > label > button {
+    margin: 6px;
+    transform: scale(1.3);
+}
 """
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    with gr.Row():
+        with gr.Column():
+            with gr.Accordion("📙 Citation", open=False):
+                citation_button = gr.Textbox(
+                    value=CITATION_BUTTON_TEXT,
+                    label=CITATION_BUTTON_LABEL,
+                    elem_id="citation-button",
+                ).style(show_copy_button=True)
+        with gr.Column():
+            with gr.Accordion("✨ CHANGELOG", open=False):
+                changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
     leaderboard_table = gr.components.Dataframe(
         value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5

content.py CHANGED Viewed

@@ -1,29 +1,33 @@
 CHANGELOG_TEXT = f"""
 ## [2023-05-29]
-- Auto-restart every hour
 - Sync with the internal version (minor style changes)
 ## [2023-05-24]
-- Added a baseline that has 25.0 for all values.
-- Added CHANGELOG
 ## [2023-05-23]
-- Fixed a CSS issue that made the leaderboard hard to read in dark mode.
 ## [2023-05-22]
-- Display a success/error message after submitting evaluation requests.
-- Reject duplicate submission.
-- Do not display results that have incomplete results.
-- Display different queues for jobs that are RUNNING, PENDING, FINISHED status.
 ## [2023-05-15]
-- Fixed a typo: from "TruthQA" to "TruthfulQA"
 ## [2023-05-10]
-- Fixed a bug that prevented auto-refresh.
 ## [2023-05-10]
-- Released the leaderboard to public.
 """
 TITLE = """<h1 align="center" id="space-title">🤗 Open LLM Leaderboard</h1>"""
@@ -47,3 +51,13 @@ We chose these benchmarks as they test a variety of reasoning and general knowle
 EVALUATION_QUEUE_TEXT = f"""
 # Evaluation Queue for the 🤗 Open LLM Leaderboard, these models will be automatically evaluated on the 🤗 cluster
 """

 CHANGELOG_TEXT = f"""
+## [2023-05-30]
+- Add a citation button
+- Simplify Gradio layout
 ## [2023-05-29]
+- Auto-restart every hour for the latest results
 - Sync with the internal version (minor style changes)
 ## [2023-05-24]
+- Add a baseline that has 25.0 for all values
+- Add CHANGELOG
 ## [2023-05-23]
+- Fix a CSS issue that made the leaderboard hard to read in dark mode
 ## [2023-05-22]
+- Display a success/error message after submitting evaluation requests
+- Reject duplicate submission
+- Do not display results that have incomplete results
+- Display different queues for jobs that are RUNNING, PENDING, FINISHED status
 ## [2023-05-15]
+- Fix a typo: from "TruthQA" to "TruthfulQA"
 ## [2023-05-10]
+- Fix a bug that prevented auto-refresh
 ## [2023-05-10]
+- Release the leaderboard to public
 """
 TITLE = """<h1 align="center" id="space-title">🤗 Open LLM Leaderboard</h1>"""
 EVALUATION_QUEUE_TEXT = f"""
 # Evaluation Queue for the 🤗 Open LLM Leaderboard, these models will be automatically evaluated on the 🤗 cluster
 """
+CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r"""@misc{open-llm-leaderboard,
+  author = {Edward Beeching, Sheon Han, Nathan Lambert, Nazneen Rajani, Omar Sanseviero, Lewis Tunstall, Thomas Wolf},
+  title = {Open LLM Leaderboard},
+  year = {2023},
+  publisher = {Hugging Face},
+  howpublished = "{\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"}
+}"""