pminervini commited on
Commit
95cc038
·
1 Parent(s): 62679c8
cli/averitec-upload-cli.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ from datasets import load_dataset
4
+
5
+ path = 'pminervini/averitec'
6
+
7
+ ds = load_dataset("json",
8
+ data_files={
9
+ 'train': '/Users/pasquale/workspace/AVeriTeC/data/train.json',
10
+ 'dev': '/Users/pasquale/workspace/AVeriTeC/data/dev.json'
11
+ })
12
+ ds.push_to_hub(path)
cli/halueval-cli.py CHANGED
@@ -33,7 +33,13 @@ def main():
33
  eval_request = [r for r in eval_requests if 'bloom-560m' in r.model][0]
34
 
35
  # my_task = Task("memo-trap", "acc", "memo-trap", 0)
36
- my_task = Task("selfcheckgpt", "avg-selfcheckgpt", "SGPT", 2)
 
 
 
 
 
 
37
 
38
  TASKS_HARNESS = [my_task]
39
  # task_names = ['triviaqa']
@@ -48,7 +54,10 @@ def main():
48
 
49
  for task in TASKS_HARNESS:
50
  print(f"Selected Tasks: [{task}]")
51
- results = evaluator.simple_evaluate(model="hf", model_args=eval_request.get_model_args(), tasks=[task.benchmark], num_fewshot=4,
 
 
 
52
  batch_size=1, device="mps", use_cache=None, limit=10, write_out=True)
53
  print('AAA', results["results"])
54
 
 
33
  eval_request = [r for r in eval_requests if 'bloom-560m' in r.model][0]
34
 
35
  # my_task = Task("memo-trap", "acc", "memo-trap", 0)
36
+ # my_task = Task("selfcheckgpt", "avg-selfcheckgpt", "SGPT", 2)
37
+ # my_task = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
38
+ my_task = Task("fever10", "acc", "FEVER", 5)
39
+
40
+ eval_logger = utils.eval_logger
41
+ import logging
42
+ eval_logger.setLevel(getattr(logging, "DEBUG"))
43
 
44
  TASKS_HARNESS = [my_task]
45
  # task_names = ['triviaqa']
 
54
 
55
  for task in TASKS_HARNESS:
56
  print(f"Selected Tasks: [{task}]")
57
+ import torch
58
+
59
+ # breakpoint()
60
+ results = evaluator.simple_evaluate(model="hf", model_args=eval_request.get_model_args(), tasks=[task.benchmark], num_fewshot=task.num_fewshot,
61
  batch_size=1, device="mps", use_cache=None, limit=10, write_out=True)
62
  print('AAA', results["results"])
63
 
cli/submit-cli.py CHANGED
@@ -120,7 +120,10 @@ def main():
120
  model_lst = [m for m in model_lst]
121
 
122
  def custom_filter(m) -> bool:
123
- return m.pipeline_tag in {'text-generation'} and 'en' in m.tags and m.private is False
 
 
 
124
 
125
  filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
126
 
@@ -138,6 +141,8 @@ def main():
138
 
139
  requested_model_names = {e.model for e in eval_requests}
140
 
 
 
141
  for i in range(min(200, len(filtered_model_lst))):
142
  model = filtered_model_lst[i]
143
 
@@ -157,7 +162,7 @@ def main():
157
 
158
  if 'mage' not in model.id:
159
  add_new_eval(model=model.id, base_model='', revision='main', precision='float32', private=False, weight_type='Original', model_type=model_type)
160
- time.sleep(60)
161
  else:
162
  print(f'Model {model.id} already added, not adding it to the queue again.')
163
 
 
120
  model_lst = [m for m in model_lst]
121
 
122
  def custom_filter(m) -> bool:
123
+ # res = m.pipeline_tag in {'text-generation'} and 'en' in m.tags and m.private is False
124
+ # res = m.pipeline_tag in {'text-generation'} and 'en' in m.tags and m.private is False and 'mistralai/' in m.id
125
+ res = 'mistralai/' in m.id
126
+ return res
127
 
128
  filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
129
 
 
141
 
142
  requested_model_names = {e.model for e in eval_requests}
143
 
144
+ breakpoint()
145
+
146
  for i in range(min(200, len(filtered_model_lst))):
147
  model = filtered_model_lst[i]
148
 
 
162
 
163
  if 'mage' not in model.id:
164
  add_new_eval(model=model.id, base_model='', revision='main', precision='float32', private=False, weight_type='Original', model_type=model_type)
165
+ time.sleep(10)
166
  else:
167
  print(f'Model {model.id} already added, not adding it to the queue again.')
168