Miaoran000 commited on
Commit
7ef82ad
1 Parent(s): dcf13df

update backend

Browse files
src/backend/model_operations.py CHANGED
@@ -23,11 +23,14 @@ import anthropic
23
  import replicate
24
  # pip install -U google-generativeai
25
  import google.generativeai as genai
 
 
 
26
 
27
  import src.backend.util as util
28
  import src.envs as envs
29
 
30
- litellm.set_verbose=False
31
 
32
  # Set up basic configuration for logging
33
  logging.basicConfig(level=logging.INFO,
@@ -171,9 +174,11 @@ class SummaryGenerator:
171
  def generate_summary(self, system_prompt: str, user_prompt: str):
172
  # Using Together AI API
173
  using_together_api = False
174
- together_ai_api_models = ['mixtral', 'dbrx', 'wizardlm', 'llama-3-', 'qwen'] #, 'mistralai'
175
  using_replicate_api = False
176
  replicate_api_models = ['snowflake', 'llama-3.1-405b']
 
 
177
 
178
  for replicate_api_model in replicate_api_models:
179
  if replicate_api_model in self.model_id.lower():
@@ -186,7 +191,12 @@ class SummaryGenerator:
186
  using_together_api = True
187
  break
188
 
189
-
 
 
 
 
 
190
  # if 'mixtral' in self.model_id.lower() or 'dbrx' in self.model_id.lower() or 'wizardlm' in self.model_id.lower(): # For mixtral and dbrx models, use Together AI API
191
  if using_together_api:
192
  # print('using together api')
@@ -221,10 +231,11 @@ class SummaryGenerator:
221
  result = result["text"]
222
  result_candidates = [result_cancdidate for result_cancdidate in result.split('\n\n') if len(result_cancdidate) > 0]
223
  result = result_candidates[0]
224
- print(result)
225
  except:
226
- print(response)
227
  result = ''
 
228
  return result
229
 
230
  # Using OpenAI API
@@ -334,6 +345,24 @@ class SummaryGenerator:
334
  print(result)
335
  return result
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  # Using HF API or download checkpoints
338
  elif self.local_model is None and self.local_pipeline is None:
339
  try: # try use HuggingFace API
@@ -347,23 +376,24 @@ class SummaryGenerator:
347
  api_base=self.api_base,
348
  )
349
  result = response['choices'][0]['message']['content']
 
350
  print(result)
351
  return result
352
  except Exception as e:
353
- if 'Rate limit reached' in str(e):
354
  wait_time = 300
355
  current_time = datetime.now().strftime('%H:%M:%S')
356
  print(f"Rate limit hit at {current_time}. Waiting for 5 minutes before retrying...")
357
  time.sleep(wait_time)
358
  else:
359
- try:
360
  self.local_pipeline = pipeline(
361
  "text-generation",
362
  model=self.model_id,
363
  model_kwargs={"torch_dtype": torch.bfloat16},
364
  device_map="auto",
365
  )
366
- except:
367
  self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf" if 'openelm' in self.model_id.lower() else self.model_id, trust_remote_code=True)
368
  print("Tokenizer loaded")
369
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
@@ -372,6 +402,7 @@ class SummaryGenerator:
372
 
373
  # Using local model/pipeline
374
  if self.local_pipeline:
 
375
  messages=[
376
  {"role": "system", "content": system_prompt},
377
  {"role": "user", "content": user_prompt}
@@ -385,6 +416,7 @@ class SummaryGenerator:
385
  return result
386
 
387
  elif self.local_model: # cannot call API. using local model / pipeline
 
388
  if 'gemma' in self.model_id.lower() or 'mistral-7b' in self.model_id.lower():
389
  messages=[
390
  # gemma-1.1, mistral-7b does not accept system role
@@ -395,20 +427,29 @@ class SummaryGenerator:
395
  elif 'phi-2' in self.model_id.lower():
396
  prompt = system_prompt + '\n' + user_prompt
397
 
 
 
 
398
  else:
399
  messages=[
400
  {"role": "system", "content": system_prompt},
401
  {"role": "user", "content": user_prompt}
402
  ]
403
  prompt = self.tokenizer.apply_chat_template(messages,add_generation_prompt=True, tokenize=False)
404
- print(prompt)
405
- print('-'*50)
406
  input_ids = self.tokenizer(prompt, return_tensors="pt").to('cuda')
407
  with torch.no_grad():
408
  outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
 
 
409
  result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
410
  if 'gemma-2' in self.model_id.lower():
411
  result = result.split(user_prompt + '\nmodel')[-1].strip()
 
 
 
 
412
  else:
413
  result = result.replace(prompt.strip(), '')
414
 
@@ -486,7 +527,7 @@ class EvaluationModel:
486
  try:
487
  # summary_pieces = summary.split('\n')
488
  # summary = summary_pieces[0] if len(summary_pieces[0].strip()) > 0 else summary_pieces[1]
489
- summary = summary.replace('<bos>','').replace('<eos>','')
490
  score = self.model.predict([doc, summary])# [0]
491
  if not isinstance(score, float):
492
  try:
 
23
  import replicate
24
  # pip install -U google-generativeai
25
  import google.generativeai as genai
26
+ from mistralai.client import MistralClient
27
+ from mistralai.models.chat_completion import ChatMessage
28
+
29
 
30
  import src.backend.util as util
31
  import src.envs as envs
32
 
33
+ litellm.set_verbose=True
34
 
35
  # Set up basic configuration for logging
36
  logging.basicConfig(level=logging.INFO,
 
174
  def generate_summary(self, system_prompt: str, user_prompt: str):
175
  # Using Together AI API
176
  using_together_api = False
177
+ together_ai_api_models = ['mixtral', 'dbrx', 'wizardlm', 'llama-3-', 'qwen', 'zero-one-ai'] #, 'mistralai'
178
  using_replicate_api = False
179
  replicate_api_models = ['snowflake', 'llama-3.1-405b']
180
+ using_pipeline = False
181
+ pipeline_models = ['llama-3.1', 'phi-3-mini','falcon-7b']
182
 
183
  for replicate_api_model in replicate_api_models:
184
  if replicate_api_model in self.model_id.lower():
 
191
  using_together_api = True
192
  break
193
 
194
+ if not using_replicate_api and not using_together_api:
195
+ for pipeline_model in pipeline_models:
196
+ if pipeline_model in self.model_id.lower():
197
+ using_pipeline = True
198
+ break
199
+
200
  # if 'mixtral' in self.model_id.lower() or 'dbrx' in self.model_id.lower() or 'wizardlm' in self.model_id.lower(): # For mixtral and dbrx models, use Together AI API
201
  if using_together_api:
202
  # print('using together api')
 
231
  result = result["text"]
232
  result_candidates = [result_cancdidate for result_cancdidate in result.split('\n\n') if len(result_cancdidate) > 0]
233
  result = result_candidates[0]
234
+ # print(result)
235
  except:
236
+ # print(response)
237
  result = ''
238
+ print(result)
239
  return result
240
 
241
  # Using OpenAI API
 
345
  print(result)
346
  return result
347
 
348
+ elif 'mistral-large' in self.model_id.lower():
349
+ api_key = os.environ["MISTRAL_API_KEY"]
350
+ client = MistralClient(api_key=api_key)
351
+
352
+ messages = [
353
+ ChatMessage(role="system", content=system_prompt),
354
+ ChatMessage(role="user", content=user_prompt)
355
+ ]
356
+
357
+ # No streaming
358
+ chat_response = client.chat(
359
+ model=self.model_id,
360
+ messages=messages,
361
+ )
362
+ result = chat_response.choices[0].message.content
363
+ print(result)
364
+ return result
365
+
366
  # Using HF API or download checkpoints
367
  elif self.local_model is None and self.local_pipeline is None:
368
  try: # try use HuggingFace API
 
376
  api_base=self.api_base,
377
  )
378
  result = response['choices'][0]['message']['content']
379
+ result = result.split('<|im_end|>')[0]
380
  print(result)
381
  return result
382
  except Exception as e:
383
+ if 'Rate limit reached' in str(e) and 'yi-1.5' not in self.model_id.lower():
384
  wait_time = 300
385
  current_time = datetime.now().strftime('%H:%M:%S')
386
  print(f"Rate limit hit at {current_time}. Waiting for 5 minutes before retrying...")
387
  time.sleep(wait_time)
388
  else:
389
+ if using_pipeline:
390
  self.local_pipeline = pipeline(
391
  "text-generation",
392
  model=self.model_id,
393
  model_kwargs={"torch_dtype": torch.bfloat16},
394
  device_map="auto",
395
  )
396
+ else:
397
  self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf" if 'openelm' in self.model_id.lower() else self.model_id, trust_remote_code=True)
398
  print("Tokenizer loaded")
399
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
 
402
 
403
  # Using local model/pipeline
404
  if self.local_pipeline:
405
+ print('Using Transformers pipeline')
406
  messages=[
407
  {"role": "system", "content": system_prompt},
408
  {"role": "user", "content": user_prompt}
 
416
  return result
417
 
418
  elif self.local_model: # cannot call API. using local model / pipeline
419
+ print('Using local model')
420
  if 'gemma' in self.model_id.lower() or 'mistral-7b' in self.model_id.lower():
421
  messages=[
422
  # gemma-1.1, mistral-7b does not accept system role
 
427
  elif 'phi-2' in self.model_id.lower():
428
  prompt = system_prompt + '\n' + user_prompt
429
 
430
+ elif 'intel' in self.model_id.lower():
431
+ prompt = f"### System:\n{system_prompt}\n### User:\n{user_prompt}\n### Assistant:\n"
432
+
433
  else:
434
  messages=[
435
  {"role": "system", "content": system_prompt},
436
  {"role": "user", "content": user_prompt}
437
  ]
438
  prompt = self.tokenizer.apply_chat_template(messages,add_generation_prompt=True, tokenize=False)
439
+ # print(prompt)
440
+ # print('-'*50)
441
  input_ids = self.tokenizer(prompt, return_tensors="pt").to('cuda')
442
  with torch.no_grad():
443
  outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
444
+ if 'glm' in self.model_id.lower():
445
+ outputs = outputs[:, input_ids['input_ids'].shape[1]:]
446
  result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
447
  if 'gemma-2' in self.model_id.lower():
448
  result = result.split(user_prompt + '\nmodel')[-1].strip()
449
+
450
+ elif 'intel' in self.model_id.lower():
451
+ result = result.split("### Assistant:\n")[-1]
452
+
453
  else:
454
  result = result.replace(prompt.strip(), '')
455
 
 
527
  try:
528
  # summary_pieces = summary.split('\n')
529
  # summary = summary_pieces[0] if len(summary_pieces[0].strip()) > 0 else summary_pieces[1]
530
+ summary = summary.replace('<bos>','').replace('<eos>','').strip()
531
  score = self.model.predict([doc, summary])# [0]
532
  if not isinstance(score, float):
533
  try:
src/backend/run_eval_suite.py CHANGED
@@ -56,8 +56,10 @@ def run_evaluation(eval_request: EvalRequest, batch_size, device,
56
  path_in_repo=envs.LEADERBOARD_DATASET_PATH.split('/')[-1],
57
  repo_id=envs.LEADERBOARD_DATASET_REPO,
58
  repo_type="dataset",
 
59
  )
60
-
 
61
  except Exception as e:
62
  logging.error(f"Error during evaluation: {e}")
63
  raise
@@ -70,10 +72,10 @@ def run_evaluation(eval_request: EvalRequest, batch_size, device,
70
  os.makedirs(output_folder, exist_ok=True)
71
  with open(output_path, "w") as f:
72
  f.write(dumped)
73
- print(f"Results have been saved to{output_path}")
74
 
75
  if not need_check:
76
- print("Path in the repo:", f"{eval_request.model}/results_{datetime.now()}.json")
77
  envs.API.upload_file(
78
  path_or_fileobj=output_path,
79
  path_in_repo=f"{eval_request.model}/results_{datetime.now()}.json",
 
56
  path_in_repo=envs.LEADERBOARD_DATASET_PATH.split('/')[-1],
57
  repo_id=envs.LEADERBOARD_DATASET_REPO,
58
  repo_type="dataset",
59
+ commit_message=f"Update results for {eval_request.model}"
60
  )
61
+ logging.info(f"Leaderboard result dataset has been updated to {envs.LEADERBOARD_DATASET_PATH}/{envs.LEADERBOARD_DATASET_PATH.split('/')[-1]}")
62
+
63
  except Exception as e:
64
  logging.error(f"Error during evaluation: {e}")
65
  raise
 
72
  os.makedirs(output_folder, exist_ok=True)
73
  with open(output_path, "w") as f:
74
  f.write(dumped)
75
+ logging.info(f"Results have been saved to{output_path}")
76
 
77
  if not need_check:
78
+ logging.info(f"Path in the repo: {eval_request.model}/results_{datetime.now()}.json")
79
  envs.API.upload_file(
80
  path_or_fileobj=output_path,
81
  path_in_repo=f"{eval_request.model}/results_{datetime.now()}.json",