Spaces:
Running
on
Zero
Running
on
Zero
Martín Santillán Cooper
commited on
Commit
•
9606bd9
1
Parent(s):
37449d4
Fix bugs
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ def on_test_case_click(state: gr.State):
|
|
31 |
test_case_name: f'<h2>{to_title_case(selected_test_case["name"])}</h2>',
|
32 |
criteria: selected_test_case['criteria'],
|
33 |
context: gr.update(value=selected_test_case['context'], visible=True) if selected_test_case['context'] is not None else gr.update(visible=False, value=''),
|
34 |
-
user_message: gr.update(value=selected_test_case['user_message'], interactive=True, elem_classes=['input-box']) if selected_sub_catalog != '
|
35 |
assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
|
36 |
result_text: gr.update(value='', visible=False),
|
37 |
result_container: gr.update(visible=False),
|
@@ -102,7 +102,7 @@ with gr.Blocks(
|
|
102 |
with gr.Row(elem_classes='title-row'):
|
103 |
with gr.Column(scale=4):
|
104 |
gr.HTML('<h2>IBM Granite Guardian 3.0</h2>', elem_classes='title')
|
105 |
-
gr.HTML(elem_classes='system-description', value='<p>Granite Guardian models are specialized language models in the Granite family that allow you to detect harms and risks in generative AI systems. The Granite Guardian models can be used with any other large language models to make interactions with generative AI systems safe. Select an example in the left panel to see how the model evaluates harms and risks in user prompts, assistant
|
106 |
with gr.Row(elem_classes='column-gap'):
|
107 |
with gr.Column(scale=0, elem_classes='no-gap'):
|
108 |
title_display_left = gr.HTML("<h2>Harms & Risks</h2>", elem_classes=['subtitle', 'subtitle-harms'])
|
|
|
31 |
test_case_name: f'<h2>{to_title_case(selected_test_case["name"])}</h2>',
|
32 |
criteria: selected_test_case['criteria'],
|
33 |
context: gr.update(value=selected_test_case['context'], visible=True) if selected_test_case['context'] is not None else gr.update(visible=False, value=''),
|
34 |
+
user_message: gr.update(value=selected_test_case['user_message'], interactive=True, elem_classes=['input-box']) if selected_sub_catalog != 'harmful_content_in_assistant_response' else gr.update(value=selected_test_case['user_message'], interactive=False, elem_classes=['read-only', 'input-box']),
|
35 |
assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
|
36 |
result_text: gr.update(value='', visible=False),
|
37 |
result_container: gr.update(visible=False),
|
|
|
102 |
with gr.Row(elem_classes='title-row'):
|
103 |
with gr.Column(scale=4):
|
104 |
gr.HTML('<h2>IBM Granite Guardian 3.0</h2>', elem_classes='title')
|
105 |
+
gr.HTML(elem_classes='system-description', value='<p>Granite Guardian models are specialized language models in the Granite family that allow you to detect harms and risks in generative AI systems. The Granite Guardian models can be used with any other large language models to make interactions with generative AI systems safe. Select an example in the left panel to see how the model evaluates harms and risks in user prompts, assistant responses, and for hallucinations in retrieval-augmented generation. In this demo, we use granite-guardian-3.0-8B.</p>')
|
106 |
with gr.Row(elem_classes='column-gap'):
|
107 |
with gr.Column(scale=0, elem_classes='no-gap'):
|
108 |
title_display_left = gr.HTML("<h2>Harms & Risks</h2>", elem_classes=['subtitle', 'subtitle-harms'])
|
utils.py
CHANGED
@@ -9,9 +9,9 @@ def assessment_prompt(content):
|
|
9 |
|
10 |
def get_prompt_template(test_case, sub_catalog_name):
|
11 |
test_case_name = test_case['name']
|
12 |
-
if sub_catalog_name == '
|
13 |
template_type = 'prompt'
|
14 |
-
elif sub_catalog_name == '
|
15 |
template_type = 'prompt_response'
|
16 |
elif sub_catalog_name == 'rag_hallucination_risks':
|
17 |
template_type = test_case_name
|
@@ -36,9 +36,9 @@ def get_result_description(sub_catalog_name, criteria_name):
|
|
36 |
return messages[criteria_name]
|
37 |
|
38 |
def get_evaluated_component(sub_catalog_name, criteria_name):
|
39 |
-
if sub_catalog_name == '
|
40 |
component = "user"
|
41 |
-
elif sub_catalog_name == '
|
42 |
component = 'assistant'
|
43 |
elif sub_catalog_name == 'rag_hallucination_risks':
|
44 |
if criteria_name == "context_relevance":
|
|
|
9 |
|
10 |
def get_prompt_template(test_case, sub_catalog_name):
|
11 |
test_case_name = test_case['name']
|
12 |
+
if sub_catalog_name == 'harmful_content_in_user_prompt':
|
13 |
template_type = 'prompt'
|
14 |
+
elif sub_catalog_name == 'harmful_content_in_assistant_response':
|
15 |
template_type = 'prompt_response'
|
16 |
elif sub_catalog_name == 'rag_hallucination_risks':
|
17 |
template_type = test_case_name
|
|
|
36 |
return messages[criteria_name]
|
37 |
|
38 |
def get_evaluated_component(sub_catalog_name, criteria_name):
|
39 |
+
if sub_catalog_name == 'harmful_content_in_user_prompt':
|
40 |
component = "user"
|
41 |
+
elif sub_catalog_name == 'harmful_content_in_assistant_response':
|
42 |
component = 'assistant'
|
43 |
elif sub_catalog_name == 'rag_hallucination_risks':
|
44 |
if criteria_name == "context_relevance":
|