File size: 3,612 Bytes
46a13bb
912f740
2cecaad
 
46a13bb
781eee5
 
 
46a13bb
 
 
912f740
 
9606bd9
781eee5
9606bd9
781eee5
 
 
 
912f740
 
 
 
b022d45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cab16f9
9606bd9
e5f0735
9606bd9
e5f0735
cab16f9
 
e5f0735
cab16f9
e5f0735
cab16f9
e5f0735
b022d45
cab16f9
 
e5f0735
 
 
cab16f9
781eee5
912f740
 
 
 
 
2cecaad
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json
from jinja2 import Template
import argparse
import os

with open('prompt_templates.json', mode='r', encoding="utf-8") as f:
    prompt_templates = json.load(f)

def assessment_prompt(content):
    return {"role": "user", "content": content}

def get_prompt_template(test_case, sub_catalog_name):
    test_case_name = test_case['name']
    if sub_catalog_name == 'harmful_content_in_user_prompt':
        template_type = 'prompt'
    elif sub_catalog_name == 'harmful_content_in_assistant_response':
        template_type = 'prompt_response'
    elif sub_catalog_name == 'rag_hallucination_risks':
        template_type = test_case_name
    return prompt_templates[f'{test_case_name}>{template_type}']

def get_prompt_from_test_case(test_case, sub_catalog_name):
    return assessment_prompt(Template(get_prompt_template(test_case, sub_catalog_name)).render(**test_case))

def get_result_description(sub_catalog_name, criteria_name):
    evaluated_component = get_evaluated_component(sub_catalog_name, criteria_name)
    messages = {
        "general_harm": f"Is the {evaluated_component} message harmful by common-sense?",
        "bias": f"Is the {evaluated_component} message prejudiced based on identity or characteristics?",
        "jailbreak": f"Does the user message try to manipulate the AI to generate harmful, undesired, or inappropriate content?",
        "violence": f"Does the {evaluated_component} message contain content promoting physical, mental, or sexual harm?",
        "profanity": f"Does the {evaluated_component} message include offensive language or insults?",
        "unethical_behavior": f"Does the {evaluated_component} message include actions that violate moral or legal standards?",
        "answer_relevance": f"Does the assistant response fail to address or properly answer the user question?",
        "context_relevance": f"Is the retrieved context irrelevant to the user question or does not address their needs?",
        "groundedness": f"Does the assistant response include claims or facts not supported by or contradicted by the provided context?"
    }
    return messages[criteria_name]

def get_evaluated_component(sub_catalog_name, criteria_name):
    if sub_catalog_name == 'harmful_content_in_user_prompt':
        component = "user"
    elif sub_catalog_name == 'harmful_content_in_assistant_response':
        component = 'assistant'
    elif sub_catalog_name == 'rag_hallucination_risks':
        if criteria_name == "context_relevance":
            component = "context"
        elif criteria_name == "groundedness":
            component = "assistant"
        elif criteria_name == "answer_relevance":
            component = "assistant"
    return component

def get_evaluated_component_adjective(sub_catalog_name, criteria_name):
    if criteria_name == 'context_relevance' or criteria_name == 'answer_relevance':
        return 'irrelevant based on the definition'
    else: return 'harmful based on the risk definition'

def to_title_case(input_string):
    if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks'
    return ' '.join(word.capitalize() for word in input_string.split('_'))

def to_snake_case(text):
    return text.lower().replace(" ", "_")


def load_command_line_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_path", type=str, default=None, help="Path to the model or HF repo")

    # Parse arguments
    args = parser.parse_args()

    # Store the argument in an environment variable
    if args.model_path is not None:
        os.environ["MODEL_PATH"] = args.model_path