Clémentine
commited on
Commit
•
6bc96ff
1
Parent(s):
8b88d2c
debug inference endpoint launch and requirements
Browse files- app.py +1 -1
- requirements.txt +5 -1
- src/backend/run_eval_suite_lighteval.py +22 -7
app.py
CHANGED
@@ -19,8 +19,8 @@ This is just a visual for the auto evaluator. Note that the lines of the log vis
|
|
19 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
20 |
with gr.Tab("Application"):
|
21 |
gr.Markdown(intro_md)
|
22 |
-
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
23 |
output = gr.HTML(log_file_to_html_string, every=10)
|
|
|
24 |
|
25 |
if __name__ == '__main__':
|
26 |
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|
|
|
19 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
20 |
with gr.Tab("Application"):
|
21 |
gr.Markdown(intro_md)
|
|
|
22 |
output = gr.HTML(log_file_to_html_string, every=10)
|
23 |
+
dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
|
24 |
|
25 |
if __name__ == '__main__':
|
26 |
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|
requirements.txt
CHANGED
@@ -18,7 +18,11 @@ git+https://github.com/huggingface/lighteval.git#egg=lighteval
|
|
18 |
accelerate==0.24.1
|
19 |
sentencepiece
|
20 |
|
|
|
|
|
|
|
|
|
21 |
# Log Visualizer
|
22 |
-
|
23 |
lxml==4.9.3
|
24 |
rich==13.3.4
|
|
|
18 |
accelerate==0.24.1
|
19 |
sentencepiece
|
20 |
|
21 |
+
# Evaluation suites
|
22 |
+
lighteval
|
23 |
+
lm_eval
|
24 |
+
|
25 |
# Log Visualizer
|
26 |
+
BeautifulSoup4==4.12.2
|
27 |
lxml==4.9.3
|
28 |
rich==13.3.4
|
src/backend/run_eval_suite_lighteval.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import json
|
2 |
-
import
|
3 |
import logging
|
4 |
from datetime import datetime
|
5 |
|
@@ -16,15 +16,18 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
16 |
if limit:
|
17 |
logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
18 |
|
19 |
-
|
20 |
-
|
|
|
21 |
"accelerator": accelerator,
|
22 |
"vendor": vendor,
|
23 |
"region": region,
|
24 |
"instance_size": instance_size,
|
25 |
"instance_type": instance_type,
|
26 |
-
"
|
27 |
-
"
|
|
|
|
|
28 |
"push_results_to_hub": True,
|
29 |
"save_details": True,
|
30 |
"push_details_to_hub": True,
|
@@ -32,10 +35,22 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
32 |
"cache_dir": CACHE_PATH,
|
33 |
"results_org": RESULTS_REPO,
|
34 |
"output_dir": local_dir,
|
|
|
|
|
35 |
"override_batch_size": batch_size,
|
36 |
"custom_tasks": "custom_tasks.py",
|
37 |
-
"tasks": task_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
}
|
|
|
39 |
|
40 |
try:
|
41 |
results = main(args)
|
@@ -47,7 +62,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
47 |
dumped = json.dumps(results, indent=2)
|
48 |
logger.info(dumped)
|
49 |
except Exception: # if eval failed, we force a cleanup
|
50 |
-
env_config = EnvConfig(token=TOKEN, cache_dir=args
|
51 |
|
52 |
model_config = create_model_config(args=args, accelerator=accelerator)
|
53 |
model, _ = load_model(config=model_config, env_config=env_config)
|
|
|
1 |
import json
|
2 |
+
import argparse
|
3 |
import logging
|
4 |
from datetime import datetime
|
5 |
|
|
|
16 |
if limit:
|
17 |
logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
18 |
|
19 |
+
args_dict = {
|
20 |
+
# Endpoint parameters
|
21 |
+
"endpoint_model_name":eval_request.model,
|
22 |
"accelerator": accelerator,
|
23 |
"vendor": vendor,
|
24 |
"region": region,
|
25 |
"instance_size": instance_size,
|
26 |
"instance_type": instance_type,
|
27 |
+
"reuse_existing": False,
|
28 |
+
"model_dtype": eval_request.precision,
|
29 |
+
"revision": eval_request.revision,
|
30 |
+
# Save parameters
|
31 |
"push_results_to_hub": True,
|
32 |
"save_details": True,
|
33 |
"push_details_to_hub": True,
|
|
|
35 |
"cache_dir": CACHE_PATH,
|
36 |
"results_org": RESULTS_REPO,
|
37 |
"output_dir": local_dir,
|
38 |
+
"job_id": str(datetime.now()),
|
39 |
+
# Experiment parameters
|
40 |
"override_batch_size": batch_size,
|
41 |
"custom_tasks": "custom_tasks.py",
|
42 |
+
"tasks": task_names,
|
43 |
+
"max_samples": limit,
|
44 |
+
"use_chat_template": False,
|
45 |
+
"system_prompt": None,
|
46 |
+
# Parameters which would be set to things by the kwargs if actually using argparse
|
47 |
+
"inference_server_address": None,
|
48 |
+
"model_args": None,
|
49 |
+
"num_fewshot_seeds": None,
|
50 |
+
"delta_weights": False,
|
51 |
+
"adapter_weights": False
|
52 |
}
|
53 |
+
args = argparse.Namespace(**args_dict)
|
54 |
|
55 |
try:
|
56 |
results = main(args)
|
|
|
62 |
dumped = json.dumps(results, indent=2)
|
63 |
logger.info(dumped)
|
64 |
except Exception: # if eval failed, we force a cleanup
|
65 |
+
env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
|
66 |
|
67 |
model_config = create_model_config(args=args, accelerator=accelerator)
|
68 |
model, _ = load_model(config=model_config, env_config=env_config)
|