Spaces:
Sleeping
Sleeping
IliaLarchenko
commited on
Commit
•
feab4b2
1
Parent(s):
bb0f942
gpt-4o as default
Browse files- tests/analysis.py +4 -4
- tests/grader.py +1 -1
- tests/test_e2e.py +1 -1
tests/analysis.py
CHANGED
@@ -119,7 +119,7 @@ def run_evaluation(
|
|
119 |
if interview_types is None:
|
120 |
interview_types = ["ml_design", "math", "ml_theory", "system_design", "sql", "coding"]
|
121 |
if grader_models is None:
|
122 |
-
grader_models = ["gpt-
|
123 |
if llm_configs is None:
|
124 |
llm_configs = [None]
|
125 |
|
@@ -281,7 +281,7 @@ def filter_df(df, prefixes=["problem", "interviewer", "feedback"]):
|
|
281 |
return valid_df
|
282 |
|
283 |
|
284 |
-
def generate_analysis_report(df, folder, focus=None, model="gpt-
|
285 |
|
286 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
287 |
|
@@ -341,7 +341,7 @@ def analyze_and_improve_segment(df, segment_to_improve=None):
|
|
341 |
filtered_df = filtered_df[filtered_df[prefix_columns].mean(axis=1) < th_score]
|
342 |
|
343 |
# Generating an analysis report
|
344 |
-
comments_analysis = generate_analysis_report(filtered_df, None, focus=segment_to_improve, model="gpt-
|
345 |
|
346 |
# Constructing improvement prompt
|
347 |
improvement_prompt = """You want to improve the prompts for LLM interviewer.
|
@@ -364,7 +364,7 @@ You can add 1-3 lines to each of prompts if needed, but you can't change or remo
|
|
364 |
|
365 |
# Making API call to OpenAI
|
366 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
367 |
-
model = "gpt-
|
368 |
messages = [
|
369 |
{"role": "system", "content": improvement_prompt},
|
370 |
{"role": "user", "content": current_prompts},
|
|
|
119 |
if interview_types is None:
|
120 |
interview_types = ["ml_design", "math", "ml_theory", "system_design", "sql", "coding"]
|
121 |
if grader_models is None:
|
122 |
+
grader_models = ["gpt-4o"]
|
123 |
if llm_configs is None:
|
124 |
llm_configs = [None]
|
125 |
|
|
|
281 |
return valid_df
|
282 |
|
283 |
|
284 |
+
def generate_analysis_report(df, folder, focus=None, model="gpt-4o"):
|
285 |
|
286 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
287 |
|
|
|
341 |
filtered_df = filtered_df[filtered_df[prefix_columns].mean(axis=1) < th_score]
|
342 |
|
343 |
# Generating an analysis report
|
344 |
+
comments_analysis = generate_analysis_report(filtered_df, None, focus=segment_to_improve, model="gpt-4o")
|
345 |
|
346 |
# Constructing improvement prompt
|
347 |
improvement_prompt = """You want to improve the prompts for LLM interviewer.
|
|
|
364 |
|
365 |
# Making API call to OpenAI
|
366 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
367 |
+
model = "gpt-4o"
|
368 |
messages = [
|
369 |
{"role": "system", "content": improvement_prompt},
|
370 |
{"role": "user", "content": current_prompts},
|
tests/grader.py
CHANGED
@@ -5,7 +5,7 @@ from openai import OpenAI
|
|
5 |
from tests.testing_prompts import grader_prompt
|
6 |
|
7 |
|
8 |
-
def grade(json_file_path, model="gpt-
|
9 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
10 |
|
11 |
with open(json_file_path) as file:
|
|
|
5 |
from tests.testing_prompts import grader_prompt
|
6 |
|
7 |
|
8 |
+
def grade(json_file_path, model="gpt-4o", suffix=""):
|
9 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
10 |
|
11 |
with open(json_file_path) as file:
|
tests/test_e2e.py
CHANGED
@@ -5,7 +5,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
5 |
|
6 |
def complete_and_grade_interview(interview_type):
|
7 |
file_path, _ = complete_interview(interview_type, "test", model="gpt-3.5-turbo")
|
8 |
-
feedback = grade(file_path, model="gpt-
|
9 |
assert feedback["overall_score"] > 0.4
|
10 |
return feedback["overall_score"]
|
11 |
|
|
|
5 |
|
6 |
def complete_and_grade_interview(interview_type):
|
7 |
file_path, _ = complete_interview(interview_type, "test", model="gpt-3.5-turbo")
|
8 |
+
feedback = grade(file_path, model="gpt-4o")
|
9 |
assert feedback["overall_score"] > 0.4
|
10 |
return feedback["overall_score"]
|
11 |
|