IliaLarchenko commited on
Commit
feab4b2
1 Parent(s): bb0f942

gpt-4o as default

Browse files
Files changed (3) hide show
  1. tests/analysis.py +4 -4
  2. tests/grader.py +1 -1
  3. tests/test_e2e.py +1 -1
tests/analysis.py CHANGED
@@ -119,7 +119,7 @@ def run_evaluation(
119
  if interview_types is None:
120
  interview_types = ["ml_design", "math", "ml_theory", "system_design", "sql", "coding"]
121
  if grader_models is None:
122
- grader_models = ["gpt-4-turbo"]
123
  if llm_configs is None:
124
  llm_configs = [None]
125
 
@@ -281,7 +281,7 @@ def filter_df(df, prefixes=["problem", "interviewer", "feedback"]):
281
  return valid_df
282
 
283
 
284
- def generate_analysis_report(df, folder, focus=None, model="gpt-4-turbo"):
285
 
286
  client = OpenAI(base_url="https://api.openai.com/v1")
287
 
@@ -341,7 +341,7 @@ def analyze_and_improve_segment(df, segment_to_improve=None):
341
  filtered_df = filtered_df[filtered_df[prefix_columns].mean(axis=1) < th_score]
342
 
343
  # Generating an analysis report
344
- comments_analysis = generate_analysis_report(filtered_df, None, focus=segment_to_improve, model="gpt-4-turbo")
345
 
346
  # Constructing improvement prompt
347
  improvement_prompt = """You want to improve the prompts for LLM interviewer.
@@ -364,7 +364,7 @@ You can add 1-3 lines to each of prompts if needed, but you can't change or remo
364
 
365
  # Making API call to OpenAI
366
  client = OpenAI(base_url="https://api.openai.com/v1")
367
- model = "gpt-4-turbo"
368
  messages = [
369
  {"role": "system", "content": improvement_prompt},
370
  {"role": "user", "content": current_prompts},
 
119
  if interview_types is None:
120
  interview_types = ["ml_design", "math", "ml_theory", "system_design", "sql", "coding"]
121
  if grader_models is None:
122
+ grader_models = ["gpt-4o"]
123
  if llm_configs is None:
124
  llm_configs = [None]
125
 
 
281
  return valid_df
282
 
283
 
284
+ def generate_analysis_report(df, folder, focus=None, model="gpt-4o"):
285
 
286
  client = OpenAI(base_url="https://api.openai.com/v1")
287
 
 
341
  filtered_df = filtered_df[filtered_df[prefix_columns].mean(axis=1) < th_score]
342
 
343
  # Generating an analysis report
344
+ comments_analysis = generate_analysis_report(filtered_df, None, focus=segment_to_improve, model="gpt-4o")
345
 
346
  # Constructing improvement prompt
347
  improvement_prompt = """You want to improve the prompts for LLM interviewer.
 
364
 
365
  # Making API call to OpenAI
366
  client = OpenAI(base_url="https://api.openai.com/v1")
367
+ model = "gpt-4o"
368
  messages = [
369
  {"role": "system", "content": improvement_prompt},
370
  {"role": "user", "content": current_prompts},
tests/grader.py CHANGED
@@ -5,7 +5,7 @@ from openai import OpenAI
5
  from tests.testing_prompts import grader_prompt
6
 
7
 
8
- def grade(json_file_path, model="gpt-4-turbo", suffix=""):
9
  client = OpenAI(base_url="https://api.openai.com/v1")
10
 
11
  with open(json_file_path) as file:
 
5
  from tests.testing_prompts import grader_prompt
6
 
7
 
8
+ def grade(json_file_path, model="gpt-4o", suffix=""):
9
  client = OpenAI(base_url="https://api.openai.com/v1")
10
 
11
  with open(json_file_path) as file:
tests/test_e2e.py CHANGED
@@ -5,7 +5,7 @@ from concurrent.futures import ThreadPoolExecutor
5
 
6
  def complete_and_grade_interview(interview_type):
7
  file_path, _ = complete_interview(interview_type, "test", model="gpt-3.5-turbo")
8
- feedback = grade(file_path, model="gpt-4-turbo")
9
  assert feedback["overall_score"] > 0.4
10
  return feedback["overall_score"]
11
 
 
5
 
6
  def complete_and_grade_interview(interview_type):
7
  file_path, _ = complete_interview(interview_type, "test", model="gpt-3.5-turbo")
8
+ feedback = grade(file_path, model="gpt-4o")
9
  assert feedback["overall_score"] > 0.4
10
  return feedback["overall_score"]
11