|
import asyncio |
|
import json |
|
import re |
|
from datetime import datetime |
|
|
|
from .utils_evaluate import evaluate_answers, evaluate_objections |
|
from .utils_prep import offer_initial_actions |
|
async def display_llm_responses(cl, session_state): |
|
output = f"**Responses**" |
|
await cl.Message(content=output).send() |
|
for query, response in zip(session_state.queries, session_state.llm_responses): |
|
query_display = { |
|
"command": query["command"], |
|
"message": query["message"], |
|
"mood_score": query["mood_score"], |
|
"previous_question": query["previous_question"], |
|
"rep_answer": query["rep_answer"], |
|
"next_question": query["next_question"], |
|
} |
|
query_json = json.dumps(query_display, indent=2) |
|
await cl.Message(content="Query:").send() |
|
await cl.Message(content=query_json).send() |
|
await cl.Message(content="Response:").send() |
|
await cl.Message(content=response).send() |
|
|
|
remaining_queries = session_state.queries[len(session_state.llm_responses):] |
|
remaining_responses = session_state.llm_responses[len(session_state.queries):] |
|
|
|
for query in remaining_queries: |
|
await cl.Message(content=f"**Query:** {query}").send() |
|
|
|
for response in remaining_responses: |
|
await cl.Message(content=f"**Response:** {response}").send() |
|
|
|
def format_score(score): |
|
if isinstance(score, (int, float)): |
|
return f"{score*100:.1f}%" |
|
return score |
|
|
|
def format_rogue_score(score): |
|
if isinstance(score, str): |
|
match = re.search(r'precision=([\d.]+), recall=([\d.]+), fmeasure=([\d.]+)', score) |
|
if match: |
|
precision = float(match.group(1)) |
|
recall = float(match.group(2)) |
|
fmeasure = float(match.group(3)) |
|
return f"Precision: {precision*100:.1f}%, Recall: {recall*100:.1f}%, FMeasure: {fmeasure*100:.1f}%" |
|
else: |
|
precision = score.precision |
|
recall = score.recall |
|
fmeasure = score.fmeasure |
|
return f"Precision: {precision*100:.1f}%, Recall: {recall*100:.1f}%, FMeasure: {fmeasure*100:.1f}%" |
|
return score |
|
|
|
def format_datetime(dt): |
|
if isinstance(dt, datetime): |
|
return dt.strftime("%Y-%m-%d %H:%M") |
|
return str(dt) |
|
|
|
async def display_evaluation_results(cl, session_state): |
|
out_text = "*Preparing evaluation results ...*" |
|
await cl.Message(content=out_text).send() |
|
print("Checking evaluation and objection flags") |
|
print(session_state.do_evaluation) |
|
print(session_state.add_objections_to_analysis) |
|
hit_miss_ratio = "N/A" |
|
hit_miss_score = 0 |
|
if session_state.do_evaluation: |
|
evaluate_answers(session_state) |
|
elif session_state.add_objections_to_analysis: |
|
await evaluate_objections(session_state) |
|
for resp in session_state.responses: |
|
if resp.get('evaluation_score', 'N/A') == 1 or resp.get('evaluation_score', 'N/A') == 0: |
|
hit_miss = resp.get('evaluation_score', 0) |
|
hit_miss_score += hit_miss |
|
hit_miss_ratio = (hit_miss_score / len(session_state.responses)) * 100 |
|
await asyncio.sleep(1) |
|
|
|
output = f"**Session Summary**" |
|
await cl.Message(content=output).send() |
|
output = f"**Start Time:** {format_datetime(session_state.start_time)} \n" |
|
output = output + f"**End Time:** {format_datetime(session_state.end_time)} \n" |
|
output = output + f"**Duration:** {session_state.duration_minutes} minutes \n" |
|
output = output + f"**Total Number of Questions:** {len(session_state.questions)} \n" |
|
output = output + f"**Total Questions Answered:** {len(session_state.responses)} \n" |
|
await cl.Message(content=output).send() |
|
|
|
if session_state.do_ragas_evaluation: |
|
results_df = session_state.ragas_results.to_pandas() |
|
columns_to_average = ['answer_relevancy', 'answer_correctness'] |
|
averages = results_df[columns_to_average].mean() |
|
|
|
await cl.Message(content="**Overall Summary (By SalesBuddy)**").send() |
|
output = f"**SalesBuddy Grade (1-10):** {session_state.responses[-1]['overall_score']} \n" |
|
output = output + f"**SalesBuddy Evaluation:** {session_state.responses[-1]['overall_evaluation']} \n" |
|
output = output + f"**SalesBuddy Final Mood Score (1-10):** {session_state.responses[-1]['mood_score']} \n" |
|
output = output + f"**Hit/Miss Ratio:** {hit_miss_ratio:.1f}% \n" |
|
await cl.Message(content=output).send() |
|
|
|
if session_state.do_ragas_evaluation: |
|
await cl.Message(content="**Average Scores - Based on RAGAS**").send() |
|
output = "Answer Relevancy: " + str(format_score(averages['answer_relevancy'])) + "\n" |
|
output = output + "Answer Correctness: " + str(format_score(averages['answer_correctness'])) + "\n" |
|
await cl.Message(content=output).send() |
|
|
|
await cl.Message(content="**Individual Question Scores**").send() |
|
|
|
for index, resp in enumerate(session_state.responses): |
|
eval_score = resp.get('evaluation_score', 0) |
|
if eval_score == 1: |
|
eval_output = "Hit" |
|
elif eval_score == 0: |
|
eval_output = "Miss" |
|
else: |
|
eval_output = "N/A" |
|
output = f"**Question:** {resp.get('question', 'N/A')}\n" |
|
output = output + f"**Answer:** {resp.get('response', 'N/A')}\n" |
|
output = output + f"**SalesBuddy Evaluation:** {resp.get('response_evaluation', 'N/A')}\n" |
|
output = output + f"**Hit/Miss:** {eval_output}\n" |
|
|
|
if session_state.do_ragas_evaluation: |
|
scores = session_state.scores[index] |
|
relevancy = scores.get('answer_relevancy', 'N/A') |
|
correctness = scores.get('answer_correctness', 'N/A') |
|
bleu_score = scores.get('bleu_score', 'N/A') |
|
rouge1_score = scores.get('rouge_score', {}).get('rouge1', 'N/A') |
|
rouge1_output = format_rogue_score(rouge1_score) |
|
rougeL_score = scores.get('rouge_score', {}).get('rougeL', 'N/A') |
|
rougeL_output = format_rogue_score(rougeL_score) |
|
semantic_similarity_score = scores.get('semantic_similarity_score', 'N/A') |
|
numbers = f""" |
|
**Answer Relevancy:** {format_score(relevancy)} |
|
**Answer Correctness:** {format_score(correctness)} |
|
**BLEU Score:** {format_score(bleu_score)} |
|
**ROUGE 1 Score:** {rouge1_output} |
|
**ROUGE L Score:** {rougeL_output} |
|
**Semantic Similarity Score:** {format_score(semantic_similarity_score)} |
|
""" |
|
await cl.Message(content=output).send() |
|
await cl.Message(content=numbers).send() |
|
else: |
|
await cl.Message(content=output).send() |
|
|
|
await offer_initial_actions() |
|
|