IliaLarchenko commited on
Commit
a5bc3b5
1 Parent(s): 89d9c22

Added notes to the grading transcript

Browse files
Files changed (2) hide show
  1. tests/candidate.py +8 -1
  2. tests/testing_prompts.py +3 -1
tests/candidate.py CHANGED
@@ -92,7 +92,14 @@ def complete_interview(interview_type, exp_name, requirements="", difficulty="",
92
  response_times.append(time.time() - send_time)
93
 
94
  messages_candidate.append({"role": "user", "content": chat_display[-1][1]})
95
- interview_data["transcript"].append(f"INTERVIEWER MESSAGE: {chat_display[-1][1]}")
 
 
 
 
 
 
 
96
 
97
  interview_data["feedback"] = llm.end_interview_full(problem_statement_text, messages_interviewer, interview_type)
98
  interview_data["average_response_time_seconds"] = round(sum(response_times) / len(response_times), 2) if response_times else 0
 
92
  response_times.append(time.time() - send_time)
93
 
94
  messages_candidate.append({"role": "user", "content": chat_display[-1][1]})
95
+
96
+ message_split = messages_interviewer[-1]["content"].split("#NOTES#")
97
+ interviewer_answer = message_split[0]
98
+ interview_data["transcript"].append(f"INTERVIEWER MESSAGE: {interviewer_answer}")
99
+
100
+ if len(message_split) > 1:
101
+ interviewer_note = message_split[1]
102
+ interview_data["transcript"].append(f"INTERVIEWER HIDDEN NOTE: {interviewer_note}")
103
 
104
  interview_data["feedback"] = llm.end_interview_full(problem_statement_text, messages_interviewer, interview_type)
105
  interview_data["average_response_time_seconds"] = round(sum(response_times) / len(response_times), 2) if response_times else 0
tests/testing_prompts.py CHANGED
@@ -18,6 +18,7 @@ You are reviewing an interview. Your goal is to evaluate the performance of the
18
  Be extremely critical and strict, you have highest quality standards.
19
  Even a slight mistake should lead to a negative evaluation. If in doubt about any criteria, give a negative evaluation.
20
  Analyze the JSON file with the interview transcript and provide your feedback.
 
21
 
22
  You should evaluate the following aspects and return a JSON with these keys:
23
 
@@ -39,8 +40,9 @@ You should evaluate the following aspects and return a JSON with these keys:
39
  "interviewer_hallucinations": "The interviewer didn't say anything non-relevant or strange.",
40
  "interviewer_summary": "The interviewer doesn't repeat or summarize what the candidate just said.",
41
  "interviewer_gaslighting": "The interviewer refrained from gaslitgting the candidate: didn't claim any candidates errors or missed facts that he didn't make.",
42
- "interviewer_leaks": "The interviewer didn't leak any hidden notes to candidate.",
43
  "interviewer_empty": "The interviewer didn't send any empty messages.",
 
44
 
45
  "feedback_quality": "The feedback was constructive and offered actionable insights.",
46
  "feedback_overview": "The feedback contains the recap of main mistakes and good ideas of the candidate.",
 
18
  Be extremely critical and strict, you have highest quality standards.
19
  Even a slight mistake should lead to a negative evaluation. If in doubt about any criteria, give a negative evaluation.
20
  Analyze the JSON file with the interview transcript and provide your feedback.
21
+ JSON contains, problem description, interview transcript (messages, code and hodden notes not visible to candidate), and feedback.
22
 
23
  You should evaluate the following aspects and return a JSON with these keys:
24
 
 
40
  "interviewer_hallucinations": "The interviewer didn't say anything non-relevant or strange.",
41
  "interviewer_summary": "The interviewer doesn't repeat or summarize what the candidate just said.",
42
  "interviewer_gaslighting": "The interviewer refrained from gaslitgting the candidate: didn't claim any candidates errors or missed facts that he didn't make.",
43
+ "interviewer_leaks": "The interviewer didn't leak any hidden notes to candidate during the main part of the interview.",
44
  "interviewer_empty": "The interviewer didn't send any empty messages.",
45
+ "interviewer_notes": "The interviewer made reasonable notes catching candidates mistakes and important facts.",
46
 
47
  "feedback_quality": "The feedback was constructive and offered actionable insights.",
48
  "feedback_overview": "The feedback contains the recap of main mistakes and good ideas of the candidate.",