Clémentine
commited on
Commit
•
af9288c
1
Parent(s):
3e6770c
add more info
Browse files- README.md +5 -24
- app.py +2 -0
- requirements.txt +0 -10
README.md
CHANGED
@@ -10,29 +10,10 @@ pinned: true
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
{
|
18 |
-
"config": {
|
19 |
-
"model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
|
20 |
-
"model_name": "path of the model on the hub: org/model",
|
21 |
-
"model_sha": "revision on the hub",
|
22 |
-
},
|
23 |
-
"results": {
|
24 |
-
"task_name": {
|
25 |
-
"metric_name": score,
|
26 |
-
},
|
27 |
-
"task_name2": {
|
28 |
-
"metric_name": score,
|
29 |
-
}
|
30 |
-
}
|
31 |
-
}
|
32 |
-
```
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
|
37 |
-
|
38 |
-
If you want to run your own backend, you only need to change the logic in src/backend/run_eval_suite_..., which at the moment launches the Eleuther AI Harness or Lighteval, and edit the app.py to point to the correct file.
|
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
Depending on whether you want to use lighteval or lm_eval for your evaluations, you might need to complete the
|
14 |
+
requirements.txt file to contain relevant dependencies.
|
15 |
|
16 |
+
You'll also need to select, in app.py, whether you want to use the ligtheval or lm_eval by selecting the correct
|
17 |
+
import and commenting the other.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
All env variables that you should need to edit to launch the evaluations should be in `envs`.
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -9,6 +9,7 @@ from functools import partial
|
|
9 |
|
10 |
import gradio as gr
|
11 |
from main_backend_lighteval import run_auto_eval
|
|
|
12 |
from src.display.log_visualizer import log_file_to_html_string
|
13 |
from src.display.css_html_js import dark_mode_gradio_js
|
14 |
from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
|
@@ -25,6 +26,7 @@ This is a visual for the auto evaluator.
|
|
25 |
|
26 |
links_md = f"""
|
27 |
# Important links
|
|
|
28 |
| Description | Link |
|
29 |
|-----------------|------|
|
30 |
| Leaderboard | [{REPO_ID}](https://huggingface.co/spaces/{REPO_ID}) |
|
|
|
9 |
|
10 |
import gradio as gr
|
11 |
from main_backend_lighteval import run_auto_eval
|
12 |
+
# from main_backend_harness import run_auto_eval
|
13 |
from src.display.log_visualizer import log_file_to_html_string
|
14 |
from src.display.css_html_js import dark_mode_gradio_js
|
15 |
from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
|
|
|
26 |
|
27 |
links_md = f"""
|
28 |
# Important links
|
29 |
+
|
30 |
| Description | Link |
|
31 |
|-----------------|------|
|
32 |
| Leaderboard | [{REPO_ID}](https://huggingface.co/spaces/{REPO_ID}) |
|
requirements.txt
CHANGED
@@ -1,20 +1,10 @@
|
|
1 |
APScheduler==3.10.1
|
2 |
black==23.11.0
|
3 |
click==8.1.3
|
4 |
-
datasets==2.14.5
|
5 |
-
gradio==4.4.0 # will have to move to 4.19.2
|
6 |
-
gradio_client
|
7 |
huggingface-hub>=0.18.0
|
8 |
-
matplotlib==3.7.1
|
9 |
-
numpy==1.24.2
|
10 |
-
pandas==2.0.0
|
11 |
python-dateutil==2.8.2
|
12 |
requests==2.28.2
|
13 |
tqdm==4.65.0
|
14 |
-
transformers
|
15 |
-
tokenizers>=0.15.0
|
16 |
-
git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
|
17 |
-
git+https://github.com/huggingface/lighteval.git#egg=lighteval
|
18 |
accelerate==0.24.1
|
19 |
sentencepiece
|
20 |
|
|
|
1 |
APScheduler==3.10.1
|
2 |
black==23.11.0
|
3 |
click==8.1.3
|
|
|
|
|
|
|
4 |
huggingface-hub>=0.18.0
|
|
|
|
|
|
|
5 |
python-dateutil==2.8.2
|
6 |
requests==2.28.2
|
7 |
tqdm==4.65.0
|
|
|
|
|
|
|
|
|
8 |
accelerate==0.24.1
|
9 |
sentencepiece
|
10 |
|