data_only_hallucination_leaderboard

Runtime error

pminervini commited on Nov 21, 2023

Commit

24eddae

1 Parent(s): 142beab

update

Files changed (6) hide show

app.py CHANGED Viewed

@@ -26,30 +26,25 @@ from src.display.utils import (
 from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
-from src.submission.check_validity import already_submitted_models
-from src.tools.collections import update_collections
 from src.tools.plots import (
     create_metric_plot_obj,
     create_plot_df,
     create_scores_df,
 )
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
 try:
     print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
-    )
 except Exception:
     restart_space()
 try:
     print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
-    )
 except Exception:
     restart_space()
@@ -60,11 +55,7 @@ leaderboard_df = original_df.copy()
 plot_df = create_plot_df(create_scores_df(raw_data))
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 # Searching and filtering

 from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
+# from src.submission.check_validity import already_submitted_models
+# from src.tools.collections import update_collections
 from src.tools.plots import (
     create_metric_plot_obj,
     create_plot_df,
     create_scores_df,
 )
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
 try:
     print(EVAL_REQUESTS_PATH)
+    snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
 except Exception:
     restart_space()
 try:
     print(EVAL_RESULTS_PATH)
+    snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
 except Exception:
     restart_space()
 plot_df = create_plot_df(create_scores_df(raw_data))
+(finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 # Searching and filtering

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 torch
 APScheduler==3.10.1
 black==23.11.0
 click==8.1.3

 torch
+colorama
 APScheduler==3.10.1
 black==23.11.0
 click==8.1.3

scripts/create_request_file.py CHANGED Viewed

@@ -9,7 +9,7 @@ from colorama import Fore
 from huggingface_hub import HfApi, snapshot_download
 EVAL_REQUESTS_PATH = "eval-queue"
-QUEUE_REPO = "open-llm-leaderboard/requests"
 precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
 model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")

 from huggingface_hub import HfApi, snapshot_download
 EVAL_REQUESTS_PATH = "eval-queue"
+QUEUE_REPO = "hallucinations-leaderboard/requests"
 precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
 model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")

src/display/about.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from src.display.utils import ModelType
-TITLE = """<h1 align="center" id="space-title">🤗 Open LLM Leaderboard</h1>"""
 INTRODUCTION_TEXT = """
-📐 The 🤗 Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.
 🤗 Submit a model for automated evaluation on the 🤗 GPU cluster on the "Submit" page!
 The leaderboard's backend runs the great [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) - read more details in the "About" page!

 from src.display.utils import ModelType
+TITLE = """<h1 align="center" id="space-title">🤗 Open Hallucinations Leaderboard</h1>"""
 INTRODUCTION_TEXT = """
+📐 The 🤗 Open Hallucinations Leaderboard aims to track, rank and evaluate hallucinations in LLMs and chatbots.
 🤗 Submit a model for automated evaluation on the 🤗 GPU cluster on the "Submit" page!
 The leaderboard's backend runs the great [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) - read more details in the "About" page!

src/envs.py CHANGED Viewed

@@ -14,7 +14,7 @@ PRIVATE_RESULTS_REPO = "hallucinations-leaderboard/private-results"
 IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
-CACHE_PATH=os.getenv("HF_HOME", ".")
 EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")

 IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
+CACHE_PATH = os.getenv("HF_HOME", ".")
 EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -5,8 +5,8 @@ import os
 from dataclasses import dataclass
 import dateutil
-from datetime import datetime
-from transformers import AutoConfig
 import numpy as np
 from src.display.formatting import make_clickable_model

 from dataclasses import dataclass
 import dateutil
+# from datetime import datetime
+# from transformers import AutoConfig
 import numpy as np
 from src.display.formatting import make_clickable_model