Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,840 Bytes
fb03edc 324d83a 1aaf3fc a49fb5a e12b285 1aaf3fc e12b285 a49fb5a e12b285 a49fb5a e12b285 a49fb5a 1aaf3fc fb03edc e12b285 a49fb5a e12b285 1aaf3fc e12b285 1aaf3fc a49fb5a 1aaf3fc a49fb5a 1aaf3fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
from tests.candidate import complete_interview
from tests.grader import grade
from concurrent.futures import ThreadPoolExecutor
import random
from typing import List
def complete_and_grade_interview(interview_type: str, mode: str = "normal", min_score=0.4) -> float:
"""
Complete an interview and return the overall score.
:param interview_type: Type of the interview.
:param mode: Mode of the interview ("normal", "empty", "gibberish", "repeat").
:return: Overall score of the interview.
"""
file_path, _ = complete_interview(interview_type, "test", model="gpt-3.5-turbo", mode=mode)
feedback = grade(file_path, model="gpt-4-turbo")
assert feedback["overall_score"] > min_score
return feedback["overall_score"]
def test_complete_interview() -> None:
"""
Test the complete interview process for various interview types, including edge cases.
"""
interview_types = ["ml_design", "math", "ml_theory", "system_design", "sql", "coding"]
scores: List[float] = []
with ThreadPoolExecutor(max_workers=3) as executor:
# Test normal interviews
futures = [executor.submit(complete_and_grade_interview, it) for it in interview_types]
# Test edge cases: empty, gibberish, repeat for one random interview type each
futures.append(executor.submit(complete_and_grade_interview, random.choice(interview_types), mode="empty", min_score=0.0))
futures.append(executor.submit(complete_and_grade_interview, random.choice(interview_types), mode="gibberish", min_score=0.0))
futures.append(executor.submit(complete_and_grade_interview, random.choice(interview_types), mode="repeat", min_score=0.0))
for future in futures:
score = future.result()
scores.append(score)
assert sum(scores) / len(scores) > 0.6
|