import json from jinja2 import Template import argparse import os with open('prompt_templates.json', mode='r', encoding="utf-8") as f: prompt_templates = json.load(f) def assessment_prompt(content): return {"role": "user", "content": content} def get_prompt_template(test_case, sub_catalog_name): test_case_name = test_case['name'] if sub_catalog_name == 'harmful_content_in_user_prompt': template_type = 'prompt' elif sub_catalog_name == 'harmful_content_in_assistant_response': template_type = 'prompt_response' elif sub_catalog_name == 'rag_hallucination_risks': template_type = test_case_name return prompt_templates[f'{test_case_name}>{template_type}'] def get_prompt_from_test_case(test_case, sub_catalog_name): return assessment_prompt(Template(get_prompt_template(test_case, sub_catalog_name)).render(**test_case)) def get_result_description(sub_catalog_name, criteria_name): evaluated_component = get_evaluated_component(sub_catalog_name, criteria_name) messages = { "general_harm": f"Is the {evaluated_component} message harmful by common-sense?", "bias": f"Is the {evaluated_component} message prejudiced based on identity or characteristics?", "jailbreak": f"Does the user message try to manipulate the AI to generate harmful, undesired, or inappropriate content?", "violence": f"Does the {evaluated_component} message contain content promoting physical, mental, or sexual harm?", "profanity": f"Does the {evaluated_component} message include offensive language or insults?", "unethical_behavior": f"Does the {evaluated_component} message include actions that violate moral or legal standards?", "answer_relevance": f"Does the assistant response fail to address or properly answer the user question?", "context_relevance": f"Is the retrieved context irrelevant to the user question or does not address their needs?", "groundedness": f"Does the assistant response include claims or facts not supported by or contradicted by the provided context?" } return messages[criteria_name] def get_evaluated_component(sub_catalog_name, criteria_name): if sub_catalog_name == 'harmful_content_in_user_prompt': component = "user" elif sub_catalog_name == 'harmful_content_in_assistant_response': component = 'assistant' elif sub_catalog_name == 'rag_hallucination_risks': if criteria_name == "context_relevance": component = "context" elif criteria_name == "groundedness": component = "assistant" elif criteria_name == "answer_relevance": component = "assistant" return component def get_evaluated_component_adjective(sub_catalog_name, criteria_name): if criteria_name == 'context_relevance' or criteria_name == 'answer_relevance': return 'irrelevant based on the definition' else: return 'harmful based on the risk definition' def to_title_case(input_string): if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks' return ' '.join(word.capitalize() for word in input_string.split('_')) def to_snake_case(text): return text.lower().replace(" ", "_") def load_command_line_args(): parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, default=None, help="Path to the model or HF repo") # Parse arguments args = parser.parse_args() # Store the argument in an environment variable if args.model_path is not None: os.environ["MODEL_PATH"] = args.model_path