Spaces:

ibm-granite
/

granite-guardian

Running on Zero

Martín Santillán Cooper

Update result messages

b022d45 2 months ago

3.22 kB

	import json
	from jinja2 import Template

	with open('prompt_templates.json', mode='r', encoding="utf-8") as f:
	prompt_templates = json.load(f)

	def assessment_prompt(content):
	return {"role": "user", "content": content}

	def get_prompt_template(test_case, sub_catalog_name):
	test_case_name = test_case['name']
	if sub_catalog_name == 'harmful_content_in_user_message':
	template_type = 'prompt'
	elif sub_catalog_name == 'harmful_content_in_assistant_message':
	template_type = 'prompt_response'
	elif sub_catalog_name == 'rag_hallucination_risks':
	template_type = test_case_name
	return prompt_templates[f'{test_case_name}>{template_type}']

	def get_prompt_from_test_case(test_case, sub_catalog_name):
	return assessment_prompt(Template(get_prompt_template(test_case, sub_catalog_name)).render(**test_case))

	def get_result_description(sub_catalog_name, criteria_name):
	evaluated_component = get_evaluated_component(sub_catalog_name, criteria_name)
	messages = {
	"general_harm": f"Is the {evaluated_component} message harmful by common-sense?",
	"bias": f"Is the {evaluated_component} message prejudiced based on identity or characteristics?",
	"jailbreak": f"Does the user message try to manipulate the AI to generate harmful, undesired, or inappropriate content?",
	"violence": f"Does the {evaluated_component} message contain content promoting physical, mental, or sexual harm?",
	"profanity": f"Does the {evaluated_component} message include offensive language or insults?",
	"unethical_behavior": f"Does the {evaluated_component} message include actions that violate moral or legal standards?",
	"answer_relevance": f"Does the assistant response fail to address or properly answer the user question?",
	"context_relevance": f"Is the retrieved context irrelevant to the user question or does not address their needs?",
	"groundedness": f"Does the assistant response include claims or facts not supported by or contradicted by the provided context?"
	}
	return messages[criteria_name]

	def get_evaluated_component(sub_catalog_name, criteria_name):
	if sub_catalog_name == 'harmful_content_in_user_message':
	component = "user"
	elif sub_catalog_name == 'harmful_content_in_assistant_message':
	component = 'assistant'
	elif sub_catalog_name == 'rag_hallucination_risks':
	if criteria_name == "context_relevance":
	component = "context"
	elif criteria_name == "groundedness":
	component = "assistant"
	elif criteria_name == "answer_relevance":
	component = "assistant"
	return component

	def get_evaluated_component_adjective(sub_catalog_name, criteria_name):
	if criteria_name == 'context_relevance' or criteria_name == 'answer_relevance':
	return 'irrelevant based on the definition'
	else: return 'harmful based on the risk definition'

	def to_title_case(input_string):
	if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks'
	return ' '.join(word.capitalize() for word in input_string.split('_'))

	def to_snake_case(text):
	return text.lower().replace(" ", "_")