Martín Santillán Cooper commited on
Commit
cab16f9
1 Parent(s): ee37b9e

Convert the results into a string

Browse files
Files changed (4) hide show
  1. app.py +9 -9
  2. model.py +1 -1
  3. styles.css +4 -0
  4. utils.py +18 -0
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from dotenv import load_dotenv
3
 
4
- from utils import to_title_case, get_prompt_from_test_case, to_snake_case
5
  load_dotenv()
6
  import json
7
  from model import generate_text
@@ -34,7 +34,6 @@ def on_test_case_click(state: gr.State):
34
  user_message: gr.update(value=selected_test_case['user_message'], elem_classes=[], interactive=True),# if selected_sub_catalog != 'harmful_content_in_assistant_message' else gr.update(value=selected_test_case['user_message'], interactive=False, elem_classes=['read-only']),
35
  assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
36
  result_text: gr.update(value=''),
37
- result_certainty: gr.update(value='')
38
  }
39
 
40
  def change_button_color(event: gr.EventData):
@@ -48,8 +47,11 @@ def on_submit(criteria, context, user_message, assistant_message, state):
48
  'user_message': user_message,
49
  'assistant_message': assistant_message,
50
  }, state['selected_sub_catalog'])
51
- result = generate_text(prompt)
52
- return result['assessment'], result['certainty']
 
 
 
53
 
54
  def on_show_prompt_click(criteria, context, user_message, assistant_message, state):
55
  prompt = get_prompt_from_test_case({
@@ -111,9 +113,7 @@ with gr.Blocks(
111
  submit_button = gr.Button("Evaluate", variant='primary')
112
  gr.HTML("Evaluation results", elem_classes='subtitle')
113
 
114
- with gr.Row():
115
- result_text = gr.Textbox(label="Result", interactive=False, elem_classes=['read-only'])
116
- result_certainty = gr.Number(label="Certainty", interactive=False, value='', elem_classes=['read-only'])
117
 
118
  show_propt_button = gr.Button('Show prompt', size='sm', scale=0)
119
 
@@ -129,11 +129,11 @@ with gr.Blocks(
129
  submit_button.click(
130
  on_submit,
131
  inputs=[criteria, context, user_message, assistant_message, state],
132
- outputs=[result_text, result_certainty])
133
 
134
  for button in [t for sub_catalog_name, sub_catalog_buttons in catalog_buttons.items() for t in sub_catalog_buttons.values()]:
135
  button.click(update_selected_test_case, inputs=[button, state], outputs=[state])\
136
- .then(on_test_case_click, inputs=state, outputs={test_case_name, criteria, context, user_message, assistant_message, result_text, result_certainty}) \
137
  .then(change_button_color, None, [v for c in catalog_buttons.values() for v in c.values()])
138
 
139
  demo.launch(server_name='0.0.0.0')
 
1
  import gradio as gr
2
  from dotenv import load_dotenv
3
 
4
+ from utils import get_evaluated_component, get_evaluated_component_adjective, to_title_case, get_prompt_from_test_case, to_snake_case
5
  load_dotenv()
6
  import json
7
  from model import generate_text
 
34
  user_message: gr.update(value=selected_test_case['user_message'], elem_classes=[], interactive=True),# if selected_sub_catalog != 'harmful_content_in_assistant_message' else gr.update(value=selected_test_case['user_message'], interactive=False, elem_classes=['read-only']),
35
  assistant_message: gr.update(value=selected_test_case['assistant_message'], visible=True) if selected_test_case['assistant_message'] is not None else gr.update(visible=False, value=''),
36
  result_text: gr.update(value=''),
 
37
  }
38
 
39
  def change_button_color(event: gr.EventData):
 
47
  'user_message': user_message,
48
  'assistant_message': assistant_message,
49
  }, state['selected_sub_catalog'])
50
+ evaluated_component = to_title_case(get_evaluated_component(state['selected_sub_catalog'], state['selected_criteria_name']))
51
+ evaluated_component_adjective = get_evaluated_component_adjective(state['selected_sub_catalog'], state['selected_criteria_name'])
52
+ result_label = generate_text(prompt)['assessment'] # Yes or No
53
+ result_str = f"Is the {evaluated_component} message harmful based on the risk definition: {result_label}"
54
+ return result_str
55
 
56
  def on_show_prompt_click(criteria, context, user_message, assistant_message, state):
57
  prompt = get_prompt_from_test_case({
 
113
  submit_button = gr.Button("Evaluate", variant='primary')
114
  gr.HTML("Evaluation results", elem_classes='subtitle')
115
 
116
+ result_text = gr.Textbox(label="Result", interactive=False, elem_classes=['read-only', "result-text"], max_lines=1)
 
 
117
 
118
  show_propt_button = gr.Button('Show prompt', size='sm', scale=0)
119
 
 
129
  submit_button.click(
130
  on_submit,
131
  inputs=[criteria, context, user_message, assistant_message, state],
132
+ outputs=[result_text])
133
 
134
  for button in [t for sub_catalog_name, sub_catalog_buttons in catalog_buttons.items() for t in sub_catalog_buttons.values()]:
135
  button.click(update_selected_test_case, inputs=[button, state], outputs=[state])\
136
+ .then(on_test_case_click, inputs=state, outputs={test_case_name, criteria, context, user_message, assistant_message, result_text}) \
137
  .then(change_button_color, None, [v for c in catalog_buttons.values() for v in c.values()])
138
 
139
  demo.launch(server_name='0.0.0.0')
model.py CHANGED
@@ -63,7 +63,7 @@ def generate_text(prompt):
63
  mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
64
  if mock_model_call:
65
  logger.debug('Returning mocked model result.')
66
- sleep(3)
67
  return {'assessment': 'Yes', 'certainty': 0.97}
68
  else:
69
  start = time()
 
63
  mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
64
  if mock_model_call:
65
  logger.debug('Returning mocked model result.')
66
+ sleep(1)
67
  return {'assessment': 'Yes', 'certainty': 0.97}
68
  else:
69
  start = time()
styles.css CHANGED
@@ -44,4 +44,8 @@
44
 
45
  .read-only label textarea,input {
46
  cursor: not-allowed !important;
 
 
 
 
47
  }
 
44
 
45
  .read-only label textarea,input {
46
  cursor: not-allowed !important;
47
+ }
48
+
49
+ .result-text label input {
50
+ box-shadow: none;
51
  }
utils.py CHANGED
@@ -20,6 +20,24 @@ def get_prompt_template(test_case, sub_catalog_name):
20
  def get_prompt_from_test_case(test_case, sub_catalog_name):
21
  return assessment_prompt(Template(get_prompt_template(test_case, sub_catalog_name)).render(**test_case))
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def to_title_case(input_string):
24
  if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks'
25
  return ' '.join(word.capitalize() for word in input_string.split('_'))
 
20
  def get_prompt_from_test_case(test_case, sub_catalog_name):
21
  return assessment_prompt(Template(get_prompt_template(test_case, sub_catalog_name)).render(**test_case))
22
 
23
+ def get_evaluated_component(sub_catalog_name, criteria_name):
24
+ if sub_catalog_name == 'harmful_content_in_user_message':
25
+ return "user"
26
+ elif sub_catalog_name == 'harmful_content_in_assistant_message':
27
+ return 'assistant'
28
+ elif sub_catalog_name == 'rag_hallucination_risks':
29
+ if criteria_name == "context_relevance":
30
+ return "context"
31
+ elif criteria_name == "groundedness":
32
+ return "assistant"
33
+ elif criteria_name == "answer_relevance":
34
+ return "assistant"
35
+
36
+ def get_evaluated_component_adjective(sub_catalog_name, criteria_name):
37
+ if criteria_name == 'context_relevance':
38
+ return 'relevant'
39
+ else: return 'harmful'
40
+
41
  def to_title_case(input_string):
42
  if input_string == 'rag_hallucination_risks': return 'RAG Hallucination Risks'
43
  return ' '.join(word.capitalize() for word in input_string.split('_'))