Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,220 Bytes
46a13bb 912f740 46a13bb 781eee5 46a13bb 912f740 9606bd9 781eee5 9606bd9 781eee5 912f740 b022d45 cab16f9 9606bd9 e5f0735 9606bd9 e5f0735 cab16f9 e5f0735 cab16f9 e5f0735 cab16f9 e5f0735 b022d45 cab16f9 e5f0735 cab16f9 781eee5 912f740 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import json
from jinja2 import Template
with open('prompt_templates.json', mode='r', encoding="utf-8") as f:
prompt_templates = json.load(f)
def assessment_prompt(content):
return {"role": "user", "content": content}
def get_prompt_template(test_case, sub_catalog_name):
test_case_name = test_case['name']
if sub_catalog_name == 'harmful_content_in_user_prompt':
template_type = 'prompt'
elif sub_catalog_name == 'harmful_content_in_assistant_response':
template_type = 'prompt_response'
elif sub_catalog_name == 'rag_hallucination_risks':
template_type = test_case_name
return prompt_templates[f'{test_case_name}>{template_type}']
def get_prompt_from_test_case(test_case, sub_catalog_name):
return assessment_prompt(Template(get_prompt_template(test_case, sub_catalog_name)).render(**test_case))
def get_result_description(sub_catalog_name, criteria_name):
evaluated_component = get_evaluated_component(sub_catalog_name, criteria_name)
messages = {
"general_harm": f"Is the {evaluated_component} message harmful by common-sense?",
"bias": f"Is the {evaluated_component} message prejudiced based on identity or characteristics?",
"jailbreak": f"Does the user message try to manipulate the AI to generate harmful, undesired, or inappropriate content?",
"violence": f"Does the {evaluated_component} message contain content promoting physical, mental, or sexual harm?",
"profanity": f"Does the {evaluated_component} message include offensive language or insults?",
"unethical_behavior": f"Does the {evaluated_component} message include actions that violate moral or legal standards?",
"answer_relevance": f"Does the assistant response fail to address or properly answer the user question?",
"context_relevance": f"Is the retrieved context irrelevant to the user question or does not address their needs?",
"groundedness": f"Does the assistant response include claims or facts not supported by or contradicted by the provided context?"
}
return messages[criteria_name]
def get_evaluated_component(sub_catalog_name, criteria_name):
if sub_catalog_name == 'harmful_content_in_user_prompt':
component = "user"
elif sub_catalog_name == 'harmful_content_in_assistant_response':
component = 'assistant'
elif sub_catalog_name == 'rag_hallucination_risks':
if criteria_name == "context_relevance":
component = "context"
elif criteria_name == "groundedness":
component = "assistant"
elif criteria_name == "answer_relevance":
component = "assistant"
return component
def get_evaluated_component_adjective(sub_catalog_name, criteria_name):
if criteria_name == 'context_relevance' or criteria_name == 'answer_relevance':
return 'irrelevant based on the definition'
else: return 'harmful based on the risk definition'
def to_title_case(input_string):
if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks'
return ' '.join(word.capitalize() for word in input_string.split('_'))
def to_snake_case(text):
return text.lower().replace(" ", "_")
|