Martín Santillán Cooper commited on
Commit
e5f0735
1 Parent(s): 2cb730a

UX improvemnts

Browse files
Files changed (4) hide show
  1. app.py +13 -10
  2. model.py +1 -1
  3. styles.css +29 -1
  4. utils.py +9 -8
app.py CHANGED
@@ -34,6 +34,7 @@ def on_test_case_click(state: gr.State):
34
  user_message: gr.update(value=selected_test_case['user_message'], elem_classes=[], interactive=True),# if selected_sub_catalog != 'harmful_content_in_assistant_message' else gr.update(value=selected_test_case['user_message'], interactive=False, elem_classes=['read-only']),
35
  assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
36
  result_text: gr.update(value='', visible=False),
 
37
  }
38
 
39
  def change_button_color(event: gr.EventData):
@@ -47,12 +48,13 @@ def on_submit(criteria, context, user_message, assistant_message, state):
47
  'user_message': user_message,
48
  'assistant_message': assistant_message,
49
  }, state['selected_sub_catalog'])
50
- evaluated_component = to_title_case(get_evaluated_component(state['selected_sub_catalog'], state['selected_criteria_name']))
51
  evaluated_component_adjective = get_evaluated_component_adjective(state['selected_sub_catalog'], state['selected_criteria_name'])
52
  logger.debug(f"Starting evaluation for subcatelog {state['selected_sub_catalog']} and criteria name {state['selected_criteria_name']}")
53
  result_label = generate_text(prompt)['assessment'] # Yes or No
54
- result_str = f"Is the {evaluated_component} message harmful based on the risk definition: {result_label}"
55
- return gr.update(value=result_str)
 
56
 
57
  def on_show_prompt_click(criteria, context, user_message, assistant_message, state):
58
  prompt = get_prompt_from_test_case({
@@ -114,11 +116,12 @@ with gr.Blocks(
114
  assistant_message = gr.Textbox(label="Assistant Message", lines=3, interactive=True, visible=False, value=starting_test_case['assistant_message'])
115
 
116
  submit_button = gr.Button("Evaluate", variant='primary')
117
-
118
- evaluation_results_label = gr.HTML("Evaluation results", elem_classes='subtitle', visible=False)
119
- result_text = gr.Textbox(label="Result", interactive=False, elem_classes=['read-only', "result-text"], max_lines=1, visible=False)
120
 
121
- with Modal(visible=False) as modal:
 
 
 
 
122
  prompt = gr.Markdown("Hello world!")
123
 
124
  show_propt_button.click(
@@ -127,14 +130,14 @@ with gr.Blocks(
127
  outputs=prompt
128
  ).then(lambda: gr.update(visible=True), None, modal)
129
 
130
- submit_button.click(lambda: gr.update(visible=True), inputs=None, outputs=result_text).then(
131
  on_submit,
132
  inputs=[criteria, context, user_message, assistant_message, state],
133
- outputs=[result_text])
134
 
135
  for button in [t for sub_catalog_name, sub_catalog_buttons in catalog_buttons.items() for t in sub_catalog_buttons.values()]:
136
  button.click(update_selected_test_case, inputs=[button, state], outputs=[state])\
137
- .then(on_test_case_click, inputs=state, outputs={test_case_name, criteria, context, user_message, assistant_message, result_text}) \
138
  .then(change_button_color, None, [v for c in catalog_buttons.values() for v in c.values()])
139
 
140
  demo.launch(server_name='0.0.0.0')
 
34
  user_message: gr.update(value=selected_test_case['user_message'], elem_classes=[], interactive=True),# if selected_sub_catalog != 'harmful_content_in_assistant_message' else gr.update(value=selected_test_case['user_message'], interactive=False, elem_classes=['read-only']),
35
  assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
36
  result_text: gr.update(value='', visible=False),
37
+ result_container: gr.update(visible=False)
38
  }
39
 
40
  def change_button_color(event: gr.EventData):
 
48
  'user_message': user_message,
49
  'assistant_message': assistant_message,
50
  }, state['selected_sub_catalog'])
51
+ evaluated_component = get_evaluated_component(state['selected_sub_catalog'], state['selected_criteria_name'])
52
  evaluated_component_adjective = get_evaluated_component_adjective(state['selected_sub_catalog'], state['selected_criteria_name'])
53
  logger.debug(f"Starting evaluation for subcatelog {state['selected_sub_catalog']} and criteria name {state['selected_criteria_name']}")
54
  result_label = generate_text(prompt)['assessment'] # Yes or No
55
+ # result_str = f"Is the {evaluated_component} message harmful based on the risk definition: {result_label}"
56
+ html_str = f"<p>Is the {evaluated_component} {evaluated_component_adjective}: <strong>{result_label}</strong></p>"
57
+ return gr.update(value=html_str)
58
 
59
  def on_show_prompt_click(criteria, context, user_message, assistant_message, state):
60
  prompt = get_prompt_from_test_case({
 
116
  assistant_message = gr.Textbox(label="Assistant Message", lines=3, interactive=True, visible=False, value=starting_test_case['assistant_message'])
117
 
118
  submit_button = gr.Button("Evaluate", variant='primary')
 
 
 
119
 
120
+ with gr.Column(elem_classes="result-container", visible=False) as result_container:
121
+ evaluation_results_label = gr.HTML("<span>Results</span>", elem_classes='result-title', visible=False)
122
+ result_text = gr.HTML(label="Result", elem_classes=['read-only', "result-text"], visible=False)
123
+
124
+ with Modal(visible=False, elem_classes='modal') as modal:
125
  prompt = gr.Markdown("Hello world!")
126
 
127
  show_propt_button.click(
 
130
  outputs=prompt
131
  ).then(lambda: gr.update(visible=True), None, modal)
132
 
133
+ submit_button.click(lambda: [gr.update(visible=True, value=''), gr.update(visible=True), gr.update(visible=True)], inputs=None, outputs=[result_text, evaluation_results_label, result_container]).then(
134
  on_submit,
135
  inputs=[criteria, context, user_message, assistant_message, state],
136
+ outputs=result_text)
137
 
138
  for button in [t for sub_catalog_name, sub_catalog_buttons in catalog_buttons.items() for t in sub_catalog_buttons.values()]:
139
  button.click(update_selected_test_case, inputs=[button, state], outputs=[state])\
140
+ .then(on_test_case_click, inputs=state, outputs={test_case_name, criteria, context, user_message, assistant_message, result_text, result_container}) \
141
  .then(change_button_color, None, [v for c in catalog_buttons.values() for v in c.values()])
142
 
143
  demo.launch(server_name='0.0.0.0')
model.py CHANGED
@@ -58,7 +58,7 @@ def generate_text(prompt):
58
  mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
59
  if mock_model_call:
60
  logger.debug('Returning mocked model result.')
61
- sleep(1)
62
  return {'assessment': 'Yes', 'certainty': 0.97}
63
  else:
64
  start = time()
 
58
  mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
59
  if mock_model_call:
60
  logger.debug('Returning mocked model result.')
61
+ sleep(3)
62
  return {'assessment': 'Yes', 'certainty': 0.97}
63
  else:
64
  start = time()
styles.css CHANGED
@@ -46,6 +46,34 @@
46
  cursor: not-allowed !important;
47
  }
48
 
49
- .result-text label input {
50
  box-shadow: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
 
46
  cursor: not-allowed !important;
47
  }
48
 
49
+ .result-text p {
50
  box-shadow: none;
51
+ padding: var(--input-padding);
52
+ }
53
+
54
+ .result-container {
55
+ background-color: var(--block-background-fill);
56
+ padding: var(--block-padding);
57
+ }
58
+
59
+ .result-title span{
60
+ display: inline-block;
61
+ position: relative;
62
+ z-index: var(--layer-4);
63
+ border: solid var(--block-title-border-width) var(--block-title-border-color);
64
+ border-radius: var(--block-title-radius);
65
+ background: var(--block-title-background-fill);
66
+ padding: var(--block-title-padding);
67
+ color: var(--block-title-text-color);
68
+ font-weight: var(--block-title-text-weight);
69
+ font-size: var(--block-title-text-size);
70
+ line-height: var(--line-sm);
71
+ }
72
+
73
+ .modal .modal-container .modal-block{
74
+ padding: 1.5rem;
75
+ }
76
+
77
+ .result-text .pending {
78
+ padding-top: -4rem
79
  }
utils.py CHANGED
@@ -22,21 +22,22 @@ def get_prompt_from_test_case(test_case, sub_catalog_name):
22
 
23
  def get_evaluated_component(sub_catalog_name, criteria_name):
24
  if sub_catalog_name == 'harmful_content_in_user_message':
25
- return "user"
26
  elif sub_catalog_name == 'harmful_content_in_assistant_message':
27
- return 'assistant'
28
  elif sub_catalog_name == 'rag_hallucination_risks':
29
  if criteria_name == "context_relevance":
30
- return "context"
31
  elif criteria_name == "groundedness":
32
- return "assistant"
33
  elif criteria_name == "answer_relevance":
34
- return "assistant"
 
35
 
36
  def get_evaluated_component_adjective(sub_catalog_name, criteria_name):
37
- if criteria_name == 'context_relevance':
38
- return 'relevant'
39
- else: return 'harmful'
40
 
41
  def to_title_case(input_string):
42
  if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks'
 
22
 
23
  def get_evaluated_component(sub_catalog_name, criteria_name):
24
  if sub_catalog_name == 'harmful_content_in_user_message':
25
+ component = "user"
26
  elif sub_catalog_name == 'harmful_content_in_assistant_message':
27
+ component = 'assistant'
28
  elif sub_catalog_name == 'rag_hallucination_risks':
29
  if criteria_name == "context_relevance":
30
+ component = "context"
31
  elif criteria_name == "groundedness":
32
+ component = "assistant"
33
  elif criteria_name == "answer_relevance":
34
+ component = "assistant"
35
+ return f"'{to_title_case(component)} message'"
36
 
37
  def get_evaluated_component_adjective(sub_catalog_name, criteria_name):
38
+ if criteria_name == 'context_relevance' or criteria_name == 'answer_relevance':
39
+ return 'irrelevant based on the definition'
40
+ else: return 'harmful based on the risk definition'
41
 
42
  def to_title_case(input_string):
43
  if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks'