Spaces:
Running
on
Zero
Running
on
Zero
Martín Santillán Cooper
commited on
Commit
•
2f382a0
1
Parent(s):
aeabe15
More updates
Browse files- app.py +61 -56
- send.svg +0 -1
- styles.css +12 -2
app.py
CHANGED
@@ -31,9 +31,11 @@ def on_test_case_click(state: gr.State):
|
|
31 |
test_case_name: f'<h2>{to_title_case(selected_test_case["name"])}</h2>',
|
32 |
criteria: selected_test_case['criteria'],
|
33 |
context: gr.update(value=selected_test_case['context'], visible=True) if selected_test_case['context'] is not None else gr.update(visible=False, value=''),
|
34 |
-
user_message: gr.update(value=selected_test_case['user_message'], interactive=True)
|
35 |
assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
|
36 |
result_text: gr.update(value='', visible=False),
|
|
|
|
|
37 |
}
|
38 |
|
39 |
def change_button_color(event: gr.EventData):
|
@@ -54,7 +56,7 @@ def on_submit(criteria, context, user_message, assistant_message, state):
|
|
54 |
# result_str = f"Is the {evaluated_component} message harmful based on the risk definition: {result_label}"
|
55 |
html_str = f"<p>Is the {evaluated_component} {evaluated_component_adjective}: <strong>{result_label}</strong></p>"
|
56 |
# html_str = f"Is the {evaluated_component} {evaluated_component_adjective}: {result_label}"
|
57 |
-
return gr.update(value=html_str)
|
58 |
|
59 |
def on_show_prompt_click(criteria, context, user_message, assistant_message, state):
|
60 |
prompt = get_prompt_from_test_case({
|
@@ -90,56 +92,59 @@ with gr.Blocks(
|
|
90 |
font=[gr.themes.GoogleFont("IBM Plex Sans"), gr.themes.GoogleFont('Source Sans 3')]),
|
91 |
css='styles.css') as demo:
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
gr.
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
|
|
|
|
143 |
|
144 |
show_propt_button.click(
|
145 |
on_show_prompt_click,
|
@@ -149,17 +154,17 @@ with gr.Blocks(
|
|
149 |
|
150 |
submit_button \
|
151 |
.click(
|
152 |
-
lambda: gr.update(visible=True),
|
153 |
inputs=None,
|
154 |
-
outputs=result_text) \
|
155 |
.then(
|
156 |
on_submit,
|
157 |
inputs=[criteria, context, user_message, assistant_message, state],
|
158 |
-
outputs=result_text)
|
159 |
|
160 |
for button in [t for sub_catalog_name, sub_catalog_buttons in catalog_buttons.items() for t in sub_catalog_buttons.values()]:
|
161 |
button.click(update_selected_test_case, inputs=[button, state], outputs=[state])\
|
162 |
-
.then(on_test_case_click, inputs=state, outputs={test_case_name, criteria, context, user_message, assistant_message, result_text}) \
|
163 |
.then(change_button_color, None, [v for c in catalog_buttons.values() for v in c.values()])
|
164 |
|
165 |
demo.launch(server_name='0.0.0.0')
|
|
|
31 |
test_case_name: f'<h2>{to_title_case(selected_test_case["name"])}</h2>',
|
32 |
criteria: selected_test_case['criteria'],
|
33 |
context: gr.update(value=selected_test_case['context'], visible=True) if selected_test_case['context'] is not None else gr.update(visible=False, value=''),
|
34 |
+
user_message: gr.update(value=selected_test_case['user_message'], interactive=True, elem_classes=['input-box']) if selected_sub_catalog != 'harmful_content_in_assistant_message' else gr.update(value=selected_test_case['user_message'], interactive=False, elem_classes=['read-only', 'input-box']),
|
35 |
assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
|
36 |
result_text: gr.update(value='', visible=False),
|
37 |
+
result_container: gr.update(visible=False),
|
38 |
+
evaluation_results_label: gr.update(visible=False)
|
39 |
}
|
40 |
|
41 |
def change_button_color(event: gr.EventData):
|
|
|
56 |
# result_str = f"Is the {evaluated_component} message harmful based on the risk definition: {result_label}"
|
57 |
html_str = f"<p>Is the {evaluated_component} {evaluated_component_adjective}: <strong>{result_label}</strong></p>"
|
58 |
# html_str = f"Is the {evaluated_component} {evaluated_component_adjective}: {result_label}"
|
59 |
+
return [gr.update(value=html_str), gr.update(visible=True), gr.update(visible=True)]
|
60 |
|
61 |
def on_show_prompt_click(criteria, context, user_message, assistant_message, state):
|
62 |
prompt = get_prompt_from_test_case({
|
|
|
92 |
font=[gr.themes.GoogleFont("IBM Plex Sans"), gr.themes.GoogleFont('Source Sans 3')]),
|
93 |
css='styles.css') as demo:
|
94 |
|
95 |
+
with gr.Blocks(elem_classes="roott"):
|
96 |
+
|
97 |
+
state = gr.State(value={
|
98 |
+
'selected_sub_catalog': 'harmful_content_in_user_message',
|
99 |
+
'selected_criteria_name': 'general_harm'
|
100 |
+
})
|
101 |
+
|
102 |
+
starting_test_case = [t for sub_catalog_name, sub_catalog in catalog.items() for t in sub_catalog if t['name'] == state.value['selected_criteria_name'] and sub_catalog_name == state.value['selected_sub_catalog']][0]
|
103 |
+
|
104 |
+
with gr.Row(elem_classes='title-row'):
|
105 |
+
with gr.Column(scale=4):
|
106 |
+
gr.HTML('<h2>IBM Granite Guardian 3.0</h2>', elem_classes='title')
|
107 |
+
gr.HTML(elem_classes='system-description', value='<p>Granite Guardian models are specialized language models in the Granite family that allow you to detect harms and risks in generative AI systems. The Granite Guardian models can be used with any other large language models to make interactions with generative AI systems safe. Select an example in the left panel to see how the model evaluates harms and risks in user messages, assistant messages, and for hallucinations in retrieval-augmented generation. In this demo, we use granite-guardian-3.0-8B.</p>')
|
108 |
+
with gr.Row(elem_classes='column-gap'):
|
109 |
+
with gr.Column(scale=0, elem_classes='no-gap'):
|
110 |
+
title_display_left = gr.HTML("<h2>Harms & Risks</h2>", elem_classes=['subtitle', 'subtitle-harms'])
|
111 |
+
accordions = []
|
112 |
+
catalog_buttons: dict[str,dict[str,gr.Button]] = {}
|
113 |
+
for i, (sub_catalog_name, sub_catalog) in enumerate(catalog.items()):
|
114 |
+
with gr.Accordion(to_title_case(sub_catalog_name), open=(i==0), elem_classes='accordion') as accordion:
|
115 |
+
for test_case in sub_catalog:
|
116 |
+
elem_classes=['catalog-button']
|
117 |
+
elem_id=f"{sub_catalog_name}---{test_case['name']}"
|
118 |
+
if starting_test_case == test_case:
|
119 |
+
elem_classes.append('selected')
|
120 |
+
|
121 |
+
if not sub_catalog_name in catalog_buttons:
|
122 |
+
catalog_buttons[sub_catalog_name] = {}
|
123 |
+
|
124 |
+
catalog_buttons[sub_catalog_name][test_case['name']] = \
|
125 |
+
gr.Button(to_title_case(test_case['name']), elem_classes=elem_classes, variant='secondary', size='sm', elem_id=elem_id)
|
126 |
+
|
127 |
+
accordions.append(accordion)
|
128 |
+
|
129 |
+
with gr.Column(visible=True, scale=1) as test_case_content:
|
130 |
+
with gr.Row():
|
131 |
+
test_case_name = gr.HTML(f'<h2>{to_title_case(starting_test_case["name"])}</h2>', elem_classes='subtitle')
|
132 |
+
show_propt_button = gr.Button('Show prompt', size='sm', scale=0, min_width=110)
|
133 |
+
|
134 |
+
criteria = gr.Textbox(label="Evaluation Criteria", lines=3, interactive=False, value=starting_test_case['criteria'], elem_classes=['read-only', 'input-box', 'margin-bottom'])
|
135 |
+
gr.HTML(elem_classes=['block', 'content-gap'])
|
136 |
+
context = gr.Textbox(label="Context", lines=3, interactive=True, value=starting_test_case['context'], visible=False, elem_classes=['input-box'])
|
137 |
+
user_message = gr.Textbox(label="User Message", lines=3, interactive=True, value=starting_test_case['user_message'], elem_classes=['input-box'])
|
138 |
+
assistant_message = gr.Textbox(label="Assistant Message", lines=3, interactive=True, visible=False, value=starting_test_case['assistant_message'], elem_classes=['input-box'])
|
139 |
+
|
140 |
+
submit_button = gr.Button("Evaluate", variant='primary') #,icon=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'send.svg'))
|
141 |
+
|
142 |
+
with gr.Column(elem_classes="result-container", visible=False) as result_container:
|
143 |
+
evaluation_results_label = gr.HTML("<span>Results</span>", elem_classes='result-title', visible=False)
|
144 |
+
result_text = gr.HTML(label="Result", elem_classes=['result-text', 'input-box', 'read-only', 'block'], visible=False, value='')
|
145 |
+
|
146 |
+
with Modal(visible=False, elem_classes='modal') as modal:
|
147 |
+
prompt = gr.Markdown('')
|
148 |
|
149 |
show_propt_button.click(
|
150 |
on_show_prompt_click,
|
|
|
154 |
|
155 |
submit_button \
|
156 |
.click(
|
157 |
+
lambda: [gr.update(visible=True), gr.update(visible=True)],
|
158 |
inputs=None,
|
159 |
+
outputs=[result_container, result_text]) \
|
160 |
.then(
|
161 |
on_submit,
|
162 |
inputs=[criteria, context, user_message, assistant_message, state],
|
163 |
+
outputs=[result_text, evaluation_results_label, result_container])
|
164 |
|
165 |
for button in [t for sub_catalog_name, sub_catalog_buttons in catalog_buttons.items() for t in sub_catalog_buttons.values()]:
|
166 |
button.click(update_selected_test_case, inputs=[button, state], outputs=[state])\
|
167 |
+
.then(on_test_case_click, inputs=state, outputs={test_case_name, criteria, context, user_message, assistant_message, result_text, result_container, evaluation_results_label}) \
|
168 |
.then(change_button_color, None, [v for c in catalog_buttons.values() for v in c.values()])
|
169 |
|
170 |
demo.launch(server_name='0.0.0.0')
|
send.svg
DELETED
styles.css
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
.title-row {
|
2 |
margin-bottom: 0.75rem;
|
3 |
}
|
@@ -16,8 +20,9 @@
|
|
16 |
text-align: justify;
|
17 |
text-justify: inter-word;
|
18 |
font-weight: 400;
|
19 |
-
font-size:
|
20 |
line-height: 20px;
|
|
|
21 |
}
|
22 |
|
23 |
.subtitle h2 {
|
@@ -65,6 +70,7 @@
|
|
65 |
font-weight: 400;
|
66 |
font-size: 14px;
|
67 |
line-height: 18px;
|
|
|
68 |
}
|
69 |
|
70 |
.read-only label textarea,input {
|
@@ -85,11 +91,11 @@
|
|
85 |
|
86 |
.result-text p {
|
87 |
box-shadow: none;
|
88 |
-
padding: var(--input-padding);
|
89 |
}
|
90 |
|
91 |
.result-container {
|
92 |
background-color: var(--block-background-fill);
|
|
|
93 |
padding: var(--block-padding);
|
94 |
}
|
95 |
|
@@ -121,4 +127,8 @@
|
|
121 |
|
122 |
.submit-button::after {
|
123 |
content: url('./send--alt.svg');
|
|
|
|
|
|
|
|
|
124 |
}
|
|
|
1 |
+
.roott {
|
2 |
+
width: 500px !important;
|
3 |
+
}
|
4 |
+
|
5 |
.title-row {
|
6 |
margin-bottom: 0.75rem;
|
7 |
}
|
|
|
20 |
text-align: justify;
|
21 |
text-justify: inter-word;
|
22 |
font-weight: 400;
|
23 |
+
font-size: 14px;
|
24 |
line-height: 20px;
|
25 |
+
font-style: normal;
|
26 |
}
|
27 |
|
28 |
.subtitle h2 {
|
|
|
70 |
font-weight: 400;
|
71 |
font-size: 14px;
|
72 |
line-height: 18px;
|
73 |
+
/* height: 30px !important; */
|
74 |
}
|
75 |
|
76 |
.read-only label textarea,input {
|
|
|
91 |
|
92 |
.result-text p {
|
93 |
box-shadow: none;
|
|
|
94 |
}
|
95 |
|
96 |
.result-container {
|
97 |
background-color: var(--block-background-fill);
|
98 |
+
gap: var(--block-padding);
|
99 |
padding: var(--block-padding);
|
100 |
}
|
101 |
|
|
|
127 |
|
128 |
.submit-button::after {
|
129 |
content: url('./send--alt.svg');
|
130 |
+
}
|
131 |
+
|
132 |
+
.subtitle-harms {
|
133 |
+
padding-bottom: 8px;
|
134 |
}
|