import argparse import os def get_messages(test_case, sub_catalog_name) -> list[dict[str,str]]: messages = [] if sub_catalog_name == 'harmful_content_in_user_prompt': messages.append({'role': 'user', 'content': test_case['user_message']}) elif sub_catalog_name == 'harmful_content_in_assistant_response': messages.append({'role': 'user', 'content': test_case['user_message']}) messages.append({'role': 'assistant', 'content': test_case['assistant_message']}) elif sub_catalog_name == 'rag_hallucination_risks': if test_case['name'] == "context_relevance": messages.append({'role': 'user', 'content': test_case['user_message']}) messages.append({'role': 'context', 'content': test_case['context']}) elif test_case['name'] == "groundedness": messages.append({'role': 'context', 'content': test_case['context']}) messages.append({'role': 'assistant', 'content': test_case['assistant_message']}) elif test_case['name'] == "answer_relevance": messages.append({'role': 'user', 'content': test_case['user_message']}) messages.append({'role': 'assistant', 'content': test_case['assistant_message']}) return messages def get_result_description(sub_catalog_name, criteria_name): evaluated_component = get_evaluated_component(sub_catalog_name, criteria_name) messages = { "general_harm": f"Is the {evaluated_component} message harmful by common-sense?", "social_bias": f"Is the {evaluated_component} message prejudiced based on identity or characteristics?", "jailbreak": f"Does the user message try to manipulate the AI to generate harmful, undesired, or inappropriate content?", "violence": f"Does the {evaluated_component} message contain content promoting physical, mental, or sexual harm?", "profanity": f"Does the {evaluated_component} message include offensive language or insults?", "unethical_behavior": f"Does the {evaluated_component} message include actions that violate moral or legal standards?", "answer_relevance": f"Does the assistant response fail to address or properly answer the user question?", "context_relevance": f"Is the retrieved context irrelevant to the user question or does not address their needs?", "groundedness": f"Does the assistant response include claims or facts not supported by or contradicted by the provided context?" } return messages[criteria_name] def get_evaluated_component(sub_catalog_name, criteria_name): component = None if sub_catalog_name == 'harmful_content_in_user_prompt': component = "user" elif sub_catalog_name == 'harmful_content_in_assistant_response': component = 'assistant' elif sub_catalog_name == 'rag_hallucination_risks': if criteria_name == "context_relevance": component = "context" elif criteria_name == "groundedness": component = "assistant" elif criteria_name == "answer_relevance": component = "assistant" if component is None: raise Exception('Something went wrong getting the evaluated component') return component def to_title_case(input_string): if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks' return ' '.join(word.capitalize() for word in input_string.split('_')) def capitalize_first_word(input_string): return ' '.join(word.capitalize() if i == 0 else word for i, word in enumerate(input_string.split('_'))) def to_snake_case(text): return text.lower().replace(" ", "_") def load_command_line_args(): parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, default=None, help="Path to the model or HF repo") # Parse arguments args = parser.parse_args() # Store the argument in an environment variable if args.model_path is not None: os.environ["MODEL_PATH"] = args.model_path