Spaces:
Sleeping
Sleeping
IliaLarchenko
commited on
Commit
·
437e3cd
1
Parent(s):
6ea7ebc
Added retries for low rate limits models
Browse files- tests/analysis.py +17 -3
- tests/candidate.py +3 -1
tests/analysis.py
CHANGED
@@ -80,9 +80,23 @@ def complete_and_grade(interview_params, exp_name, grader_models, candidate_mode
|
|
80 |
interview_type, attempt_num, llm_config = interview_params
|
81 |
|
82 |
feedback_list = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
try:
|
84 |
-
file_path, _ = complete_interview(interview_type, exp_name, llm_config, model=candidate_model)
|
85 |
-
print(f"Attempt {attempt_num + 1} interview simulation of {interview_type} by {llm_config.name} completed successfully")
|
86 |
for i, grader_model in enumerate(grader_models):
|
87 |
feedback = grade_attempt(file_path, grader_model, i)
|
88 |
if feedback:
|
@@ -91,7 +105,7 @@ def complete_and_grade(interview_params, exp_name, grader_models, candidate_mode
|
|
91 |
print(f"Overall score: {feedback['overall_score']}")
|
92 |
|
93 |
except Exception as e:
|
94 |
-
print(f"
|
95 |
|
96 |
if len(feedback_list) == 0:
|
97 |
print(f"Attempt {attempt_num + 1} of {interview_type} by {llm_config.name} returned an empty list")
|
|
|
80 |
interview_type, attempt_num, llm_config = interview_params
|
81 |
|
82 |
feedback_list = []
|
83 |
+
attempt_successful = False
|
84 |
+
for attempt in range(3): # Retry up to 3 times
|
85 |
+
try:
|
86 |
+
file_path, _ = complete_interview(interview_type, exp_name, llm_config, model=candidate_model, pause=attempt * 5)
|
87 |
+
print(
|
88 |
+
f"Attempt {attempt_num + 1}, retry {attempt + 1} interview simulation of {interview_type} by {llm_config.name} completed successfully"
|
89 |
+
)
|
90 |
+
attempt_successful = True
|
91 |
+
break
|
92 |
+
except Exception as e:
|
93 |
+
print(f"Retry {attempt + 1} for attempt {attempt_num + 1} of {interview_type} by {llm_config.name} failed with error: {e}")
|
94 |
+
|
95 |
+
if not attempt_successful:
|
96 |
+
print(f"All retries failed for attempt {attempt_num + 1} of {interview_type} by {llm_config.name}")
|
97 |
+
return feedback_list
|
98 |
+
|
99 |
try:
|
|
|
|
|
100 |
for i, grader_model in enumerate(grader_models):
|
101 |
feedback = grade_attempt(file_path, grader_model, i)
|
102 |
if feedback:
|
|
|
105 |
print(f"Overall score: {feedback['overall_score']}")
|
106 |
|
107 |
except Exception as e:
|
108 |
+
print(f"Grading for attempt {attempt_num + 1} of {interview_type} by {llm_config.name} failed with error: {e}")
|
109 |
|
110 |
if len(feedback_list) == 0:
|
111 |
print(f"Attempt {attempt_num + 1} of {interview_type} by {llm_config.name} returned an empty list")
|
tests/candidate.py
CHANGED
@@ -15,7 +15,7 @@ from resources.prompts import prompts
|
|
15 |
from tests.testing_prompts import candidate_prompt
|
16 |
|
17 |
|
18 |
-
def complete_interview(interview_type, exp_name, llm_config=None, requirements="", difficulty="", topic="", model="gpt-3.5-turbo"):
|
19 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
20 |
config = Config()
|
21 |
if llm_config:
|
@@ -98,6 +98,8 @@ def complete_interview(interview_type, exp_name, llm_config=None, requirements="
|
|
98 |
if len(message_split) > 1:
|
99 |
interview_data["transcript"].append(f"INTERVIEWER HIDDEN NOTE: {message_split[1]}")
|
100 |
|
|
|
|
|
101 |
interview_data["feedback"] = llm.end_interview_full(problem_statement_text, messages_interviewer, interview_type)
|
102 |
interview_data["average_response_time_seconds"] = round(sum(response_times) / len(response_times), 2) if response_times else 0
|
103 |
|
|
|
15 |
from tests.testing_prompts import candidate_prompt
|
16 |
|
17 |
|
18 |
+
def complete_interview(interview_type, exp_name, llm_config=None, requirements="", difficulty="", topic="", model="gpt-3.5-turbo", pause=0):
|
19 |
client = OpenAI(base_url="https://api.openai.com/v1")
|
20 |
config = Config()
|
21 |
if llm_config:
|
|
|
98 |
if len(message_split) > 1:
|
99 |
interview_data["transcript"].append(f"INTERVIEWER HIDDEN NOTE: {message_split[1]}")
|
100 |
|
101 |
+
time.sleep(pause) # to prevent exceeding rate limits
|
102 |
+
|
103 |
interview_data["feedback"] = llm.end_interview_full(problem_statement_text, messages_interviewer, interview_type)
|
104 |
interview_data["average_response_time_seconds"] = round(sum(response_times) / len(response_times), 2) if response_times else 0
|
105 |
|