Spaces:
Running
on
Zero
Running
on
Zero
import json | |
from jinja2 import Template | |
import argparse | |
import os | |
with open('prompt_templates.json', mode='r', encoding="utf-8") as f: | |
prompt_templates = json.load(f) | |
def assessment_prompt(content): | |
return {"role": "user", "content": content} | |
def get_prompt_template(test_case, sub_catalog_name): | |
test_case_name = test_case['name'] | |
if sub_catalog_name == 'harmful_content_in_user_prompt': | |
template_type = 'prompt' | |
elif sub_catalog_name == 'harmful_content_in_assistant_response': | |
template_type = 'prompt_response' | |
elif sub_catalog_name == 'rag_hallucination_risks': | |
template_type = test_case_name | |
return prompt_templates[f'{test_case_name}>{template_type}'] | |
def get_prompt_from_test_case(test_case, sub_catalog_name): | |
return assessment_prompt(Template(get_prompt_template(test_case, sub_catalog_name)).render(**test_case)) | |
def get_result_description(sub_catalog_name, criteria_name): | |
evaluated_component = get_evaluated_component(sub_catalog_name, criteria_name) | |
messages = { | |
"general_harm": f"Is the {evaluated_component} message harmful by common-sense?", | |
"bias": f"Is the {evaluated_component} message prejudiced based on identity or characteristics?", | |
"jailbreak": f"Does the user message try to manipulate the AI to generate harmful, undesired, or inappropriate content?", | |
"violence": f"Does the {evaluated_component} message contain content promoting physical, mental, or sexual harm?", | |
"profanity": f"Does the {evaluated_component} message include offensive language or insults?", | |
"unethical_behavior": f"Does the {evaluated_component} message include actions that violate moral or legal standards?", | |
"answer_relevance": f"Does the assistant response fail to address or properly answer the user question?", | |
"context_relevance": f"Is the retrieved context irrelevant to the user question or does not address their needs?", | |
"groundedness": f"Does the assistant response include claims or facts not supported by or contradicted by the provided context?" | |
} | |
return messages[criteria_name] | |
def get_evaluated_component(sub_catalog_name, criteria_name): | |
if sub_catalog_name == 'harmful_content_in_user_prompt': | |
component = "user" | |
elif sub_catalog_name == 'harmful_content_in_assistant_response': | |
component = 'assistant' | |
elif sub_catalog_name == 'rag_hallucination_risks': | |
if criteria_name == "context_relevance": | |
component = "context" | |
elif criteria_name == "groundedness": | |
component = "assistant" | |
elif criteria_name == "answer_relevance": | |
component = "assistant" | |
return component | |
def get_evaluated_component_adjective(sub_catalog_name, criteria_name): | |
if criteria_name == 'context_relevance' or criteria_name == 'answer_relevance': | |
return 'irrelevant based on the definition' | |
else: return 'harmful based on the risk definition' | |
def to_title_case(input_string): | |
if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks' | |
return ' '.join(word.capitalize() for word in input_string.split('_')) | |
def to_snake_case(text): | |
return text.lower().replace(" ", "_") | |
def load_command_line_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--model_path", type=str, default=None, help="Path to the model or HF repo") | |
# Parse arguments | |
args = parser.parse_args() | |
# Store the argument in an environment variable | |
if args.model_path is not None: | |
os.environ["MODEL_PATH"] = args.model_path | |