Shaltiel commited on
Commit
eb2a0ba
1 Parent(s): 7798457

Updated json dump + fix evaluation

Browse files
main_backend_lighteval.py CHANGED
@@ -1,52 +1,6 @@
1
  import logging
2
  import pprint
3
 
4
- import lighteval.models.endpoint_model
5
- class GoodInferenceEndpointModel(lighteval.models.endpoint_model.InferenceEndpointModel):
6
-
7
- @property
8
- def add_special_tokens(self):
9
- return True
10
-
11
- def greedy_until(self, requests: list, *args, **kwargs):
12
- for request in requests:
13
- request.tokenized_context = self.tok_encode(request.context)
14
- return super().greedy_until(requests, *args, **kwargs)
15
-
16
- def _InferenceEndpointModel__process_batch_generate(self, requests: list, returns_logits: bool):
17
- return super()._InferenceEndpointModel__process_batch_generate(requests)
18
-
19
- @property
20
- def disable_tqdm(self) -> bool:
21
- return False
22
-
23
- lighteval.models.endpoint_model.InferenceEndpointModel = GoodInferenceEndpointModel
24
-
25
- orig_inf_endpoint_cfg_init = lighteval.models.endpoint_model.InferenceEndpointModelConfig.__init__
26
- def patched_init(self, *args, **kwargs):
27
- orig_inf_endpoint_cfg_init(self, *args, **kwargs)
28
- self.name = self.name.replace('.', '-')
29
- lighteval.models.endpoint_model.InferenceEndpointModelConfig.__init__ = patched_init
30
-
31
- import lighteval.tasks.lighteval_task
32
- orig_task_cfg_post_init = lighteval.tasks.lighteval_task.LightevalTaskConfig.__post_init__
33
- def patched_post_init(self):
34
- orig_task_cfg_post_init(self)
35
- self.stop_sequence = list(self.stop_sequence) if self.stop_sequence is not None else None
36
- lighteval.tasks.lighteval_task.LightevalTaskConfig.__post_init__ = patched_post_init
37
-
38
-
39
-
40
- # import huggingface_hub
41
- # orig_create_endpoint = huggingface_hub.create_inference_endpoint
42
- # def new_create_endpoint(*args, **kwargs):
43
- # print('$$$$$$$$$$$$$$$$$ here 1')
44
- # if 'custom_image' in kwargs and kwargs['custom_image']['url'] == "ghcr.io/huggingface/text-generation-inference:1.1.0":
45
- # print('$$$$$$$$$$$$$ here 2')
46
- # kwargs['custom_image']['url'] = "registry.internal.huggingface.tech/api-inference/community/text-generation-inference:gemma-ie"
47
- # return orig_create_endpoint(*args, **kwargs)
48
- # huggingface_hub.create_inference_endpoint = new_create_endpoint
49
-
50
  from huggingface_hub import snapshot_download
51
 
52
  logging.getLogger("openai").setLevel(logging.WARNING)
 
1
  import logging
2
  import pprint
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from huggingface_hub import snapshot_download
5
 
6
  logging.getLogger("openai").setLevel(logging.WARNING)
requirements.txt CHANGED
@@ -13,7 +13,8 @@ requests==2.28.2
13
  tqdm==4.65.0
14
  transformers
15
  tokenizers>=0.15.0
16
- git+https://github.com/huggingface/lighteval.git#egg=lighteval
 
17
  accelerate==0.24.1
18
  sentencepiece
19
  Levenshtein
 
13
  tqdm==4.65.0
14
  transformers
15
  tokenizers>=0.15.0
16
+ # git+https://github.com/huggingface/lighteval.git#egg=lighteval
17
+ git+https://github.com/shaltielshmid/lighteval.git@temp-for-heb-leaderboard#egg=lighteval
18
  accelerate==0.24.1
19
  sentencepiece
20
  Levenshtein
src/backend/run_eval_suite_lighteval.py CHANGED
@@ -7,6 +7,7 @@ from argparse import Namespace
7
  from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
8
  from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN, OWNER
9
  from src.backend.manage_requests import EvalRequest
 
10
 
11
  logging.getLogger("openai").setLevel(logging.WARNING)
12
 
@@ -45,11 +46,11 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
45
  try:
46
  results = main(args)
47
 
48
- results["config"]["model_dtype"] = eval_request.precision
49
- results["config"]["model_name"] = eval_request.model
50
- results["config"]["model_sha"] = eval_request.revision
51
 
52
- dumped = json.dumps(results, indent=2)
53
  print(dumped)
54
  except Exception as ex: # if eval failed, we force a cleanup
55
  import traceback
 
7
  from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
8
  from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN, OWNER
9
  from src.backend.manage_requests import EvalRequest
10
+ from lighteval.logging.evaluation_tracker import EnhancedJSONEncoder
11
 
12
  logging.getLogger("openai").setLevel(logging.WARNING)
13
 
 
46
  try:
47
  results = main(args)
48
 
49
+ results["config_general"]["model_dtype"] = eval_request.precision
50
+ results["config_general"]["model_name"] = eval_request.model
51
+ results["config_general"]["model_sha"] = eval_request.revision
52
 
53
+ dumped = json.dumps(results, cls=EnhancedJSONEncoder, indent=2)
54
  print(dumped)
55
  except Exception as ex: # if eval failed, we force a cleanup
56
  import traceback