Spaces:
Paused
Paused
Updated json dump + fix evaluation
Browse files- main_backend_lighteval.py +0 -46
- requirements.txt +2 -1
- src/backend/run_eval_suite_lighteval.py +5 -4
main_backend_lighteval.py
CHANGED
@@ -1,52 +1,6 @@
|
|
1 |
import logging
|
2 |
import pprint
|
3 |
|
4 |
-
import lighteval.models.endpoint_model
|
5 |
-
class GoodInferenceEndpointModel(lighteval.models.endpoint_model.InferenceEndpointModel):
|
6 |
-
|
7 |
-
@property
|
8 |
-
def add_special_tokens(self):
|
9 |
-
return True
|
10 |
-
|
11 |
-
def greedy_until(self, requests: list, *args, **kwargs):
|
12 |
-
for request in requests:
|
13 |
-
request.tokenized_context = self.tok_encode(request.context)
|
14 |
-
return super().greedy_until(requests, *args, **kwargs)
|
15 |
-
|
16 |
-
def _InferenceEndpointModel__process_batch_generate(self, requests: list, returns_logits: bool):
|
17 |
-
return super()._InferenceEndpointModel__process_batch_generate(requests)
|
18 |
-
|
19 |
-
@property
|
20 |
-
def disable_tqdm(self) -> bool:
|
21 |
-
return False
|
22 |
-
|
23 |
-
lighteval.models.endpoint_model.InferenceEndpointModel = GoodInferenceEndpointModel
|
24 |
-
|
25 |
-
orig_inf_endpoint_cfg_init = lighteval.models.endpoint_model.InferenceEndpointModelConfig.__init__
|
26 |
-
def patched_init(self, *args, **kwargs):
|
27 |
-
orig_inf_endpoint_cfg_init(self, *args, **kwargs)
|
28 |
-
self.name = self.name.replace('.', '-')
|
29 |
-
lighteval.models.endpoint_model.InferenceEndpointModelConfig.__init__ = patched_init
|
30 |
-
|
31 |
-
import lighteval.tasks.lighteval_task
|
32 |
-
orig_task_cfg_post_init = lighteval.tasks.lighteval_task.LightevalTaskConfig.__post_init__
|
33 |
-
def patched_post_init(self):
|
34 |
-
orig_task_cfg_post_init(self)
|
35 |
-
self.stop_sequence = list(self.stop_sequence) if self.stop_sequence is not None else None
|
36 |
-
lighteval.tasks.lighteval_task.LightevalTaskConfig.__post_init__ = patched_post_init
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# import huggingface_hub
|
41 |
-
# orig_create_endpoint = huggingface_hub.create_inference_endpoint
|
42 |
-
# def new_create_endpoint(*args, **kwargs):
|
43 |
-
# print('$$$$$$$$$$$$$$$$$ here 1')
|
44 |
-
# if 'custom_image' in kwargs and kwargs['custom_image']['url'] == "ghcr.io/huggingface/text-generation-inference:1.1.0":
|
45 |
-
# print('$$$$$$$$$$$$$ here 2')
|
46 |
-
# kwargs['custom_image']['url'] = "registry.internal.huggingface.tech/api-inference/community/text-generation-inference:gemma-ie"
|
47 |
-
# return orig_create_endpoint(*args, **kwargs)
|
48 |
-
# huggingface_hub.create_inference_endpoint = new_create_endpoint
|
49 |
-
|
50 |
from huggingface_hub import snapshot_download
|
51 |
|
52 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
|
1 |
import logging
|
2 |
import pprint
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from huggingface_hub import snapshot_download
|
5 |
|
6 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
requirements.txt
CHANGED
@@ -13,7 +13,8 @@ requests==2.28.2
|
|
13 |
tqdm==4.65.0
|
14 |
transformers
|
15 |
tokenizers>=0.15.0
|
16 |
-
git+https://github.com/huggingface/lighteval.git#egg=lighteval
|
|
|
17 |
accelerate==0.24.1
|
18 |
sentencepiece
|
19 |
Levenshtein
|
|
|
13 |
tqdm==4.65.0
|
14 |
transformers
|
15 |
tokenizers>=0.15.0
|
16 |
+
# git+https://github.com/huggingface/lighteval.git#egg=lighteval
|
17 |
+
git+https://github.com/shaltielshmid/lighteval.git@temp-for-heb-leaderboard#egg=lighteval
|
18 |
accelerate==0.24.1
|
19 |
sentencepiece
|
20 |
Levenshtein
|
src/backend/run_eval_suite_lighteval.py
CHANGED
@@ -7,6 +7,7 @@ from argparse import Namespace
|
|
7 |
from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
|
8 |
from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN, OWNER
|
9 |
from src.backend.manage_requests import EvalRequest
|
|
|
10 |
|
11 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
12 |
|
@@ -45,11 +46,11 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
45 |
try:
|
46 |
results = main(args)
|
47 |
|
48 |
-
results["
|
49 |
-
results["
|
50 |
-
results["
|
51 |
|
52 |
-
dumped = json.dumps(results, indent=2)
|
53 |
print(dumped)
|
54 |
except Exception as ex: # if eval failed, we force a cleanup
|
55 |
import traceback
|
|
|
7 |
from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
|
8 |
from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN, OWNER
|
9 |
from src.backend.manage_requests import EvalRequest
|
10 |
+
from lighteval.logging.evaluation_tracker import EnhancedJSONEncoder
|
11 |
|
12 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
13 |
|
|
|
46 |
try:
|
47 |
results = main(args)
|
48 |
|
49 |
+
results["config_general"]["model_dtype"] = eval_request.precision
|
50 |
+
results["config_general"]["model_name"] = eval_request.model
|
51 |
+
results["config_general"]["model_sha"] = eval_request.revision
|
52 |
|
53 |
+
dumped = json.dumps(results, cls=EnhancedJSONEncoder, indent=2)
|
54 |
print(dumped)
|
55 |
except Exception as ex: # if eval failed, we force a cleanup
|
56 |
import traceback
|