Spaces:

ibm-granite
/

granite-guardian-3.1-8b

Running on Zero

App Files Files Community

Martín Santillán Cooper commited on 5 days ago

Commit

6047b61

•

1 Parent(s): a89905d

Continue adaptation

Browse files

Files changed (9) hide show

.dockerignore +1 -1
.env.example +1 -1
.gitignore +1 -1
catalog.json +1 -1
convert_to_string.py +7 -13
src/app.py +38 -30
src/model.py +85 -84
src/styles.css +1 -1
src/utils.py +4 -3

.dockerignore CHANGED Viewed

@@ -5,4 +5,4 @@
 *.sh
 *.md
 __pycache__/
-flagged/

 *.sh
 *.md
 __pycache__/
+flagged/

.env.example CHANGED Viewed

@@ -1,4 +1,4 @@
 MODEL_PATH='ibm-granite/granite-guardian-3.1-8b'
 INFERENCE_ENGINE='VLLM' # one of [WATSONX, MOCK, VLLM]
 WATSONX_API_KEY=''
-WATSONX_PROJECT_ID=''

 MODEL_PATH='ibm-granite/granite-guardian-3.1-8b'
 INFERENCE_ENGINE='VLLM' # one of [WATSONX, MOCK, VLLM]
 WATSONX_API_KEY=''
+WATSONX_PROJECT_ID=''

.gitignore CHANGED Viewed

@@ -4,4 +4,4 @@ parse.py
 unparsed_catalog.json
 __pycache__/
 logs.txt
-secrets.yaml

 unparsed_catalog.json
 __pycache__/
 logs.txt
+secrets.yaml

catalog.json CHANGED Viewed

@@ -112,4 +112,4 @@
         "context": null
     }
 ]
-}

         "context": null
     }
 ]
+}

convert_to_string.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 def dict_to_json_with_newlines(data):
     """
     Converts a dictionary into a JSON string with explicit newlines (\n) added.
@@ -12,28 +13,21 @@ def dict_to_json_with_newlines(data):
     """
     # Convert the dictionary to a pretty-printed JSON string
     pretty_json = json.dumps(data, indent=2)
     # Replace actual newlines with escaped newlines (\n)
     json_with_newlines = pretty_json.replace("\n", "\\n")
     # Escape double quotes for embedding inside other JSON
     json_with_newlines = json_with_newlines.replace('"', '\\"')
     return json_with_newlines
 # Example dictionary
-example_dict =[
-  {
-    "name": "comment_list",
-    "arguments": {
-      "video_id": 456789123,
-      "count": 15
-    }
-  }
-]
 # Convert the dictionary
 result = dict_to_json_with_newlines(example_dict)
 print("Resulting JSON string:")
-print(result)

 import json
 def dict_to_json_with_newlines(data):
     """
     Converts a dictionary into a JSON string with explicit newlines (\n) added.
     """
     # Convert the dictionary to a pretty-printed JSON string
     pretty_json = json.dumps(data, indent=2)
     # Replace actual newlines with escaped newlines (\n)
     json_with_newlines = pretty_json.replace("\n", "\\n")
     # Escape double quotes for embedding inside other JSON
     json_with_newlines = json_with_newlines.replace('"', '\\"')
     return json_with_newlines
 # Example dictionary
+example_dict = [{"name": "comment_list", "arguments": {"video_id": 456789123, "count": 15}}]
 # Convert the dictionary
 result = dict_to_json_with_newlines(example_dict)
 print("Resulting JSON string:")
+print(result)

src/app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from dotenv import load_dotenv
 from gradio_modal import Modal
 from logger import logger
-from model import generate_text, get_prompt
 from utils import (
     get_messages,
     get_result_description,
@@ -20,7 +20,7 @@ load_dotenv()
 catalog = {}
-toy_json =  '{"name": "John"}'
 with open("catalog.json") as f:
     logger.debug("Loading catalog from json.")
@@ -63,12 +63,9 @@ def on_test_case_click(state: gr.State):
     # update context field:
     if is_context_editable:
         context = gr.update(
-            value=selected_test_case["context"],
-            interactive=True,
-            visible=True,
-            elem_classes=["input-box"]
         )
-    else:
         context = gr.update(
             visible=selected_test_case["context"] is not None,
             value=selected_test_case["context"],
@@ -85,19 +82,13 @@ def on_test_case_click(state: gr.State):
     # update user message field
     if is_user_message_editable:
         user_message = gr.update(
-            value=selected_test_case["user_message"],
-            visible=True,
-            interactive=True,
-            elem_classes=["input-box"]
         )
     else:
         user_message = gr.update(
-            value=selected_test_case["user_message"],
-            interactive=False,
-            elem_classes=["read-only", "input-box"]
         )
     # update assistant message field
     if is_tools_present:
         assistant_message_json = gr.update(
@@ -124,8 +115,16 @@ def on_test_case_click(state: gr.State):
         assistant_message_json = gr.update(visible=False)
     result_text = gr.update(visible=False, value="")
-    return test_case_name,criteria,context,user_message,assistant_message_text,assistant_message_json,tools,result_text
 def change_button_color(event: gr.EventData):
@@ -144,14 +143,15 @@ def on_submit(criteria, context, user_message, assistant_message_text, assistant
     criteria_name = state["selected_criteria_name"]
     if criteria_name == "function_calling_hallucination":
         assistant_message = assistant_message_json
-    else: assistant_message = assistant_message_text
     test_case = {
         "name": criteria_name,
         "criteria": criteria,
         "context": context,
         "user_message": user_message,
         "assistant_message": assistant_message,
-        "tools": tools
     }
     messages = get_messages(test_case=test_case, sub_catalog_name=state["selected_sub_catalog"])
@@ -160,7 +160,7 @@ def on_submit(criteria, context, user_message, assistant_message_text, assistant
         f"Starting evaluation for subcatelog {state['selected_sub_catalog']} and criteria name {state['selected_criteria_name']}"
     )
-    result_label = generate_text(messages=messages, criteria_name=criteria_name)["assessment"]  # Yes or No
     html_str = f"<p>{get_result_description(state['selected_sub_catalog'], state['selected_criteria_name'])} <strong>{result_label}</strong></p>"
     # html_str = f"{get_result_description(state['selected_sub_catalog'], state['selected_criteria_name'])} {result_label}"
@@ -171,7 +171,8 @@ def on_show_prompt_click(criteria, context, user_message, assistant_message_text
     criteria_name = state["selected_criteria_name"]
     if criteria_name == "function_calling_hallucination":
         assistant_message = assistant_message_json
-    else: assistant_message = assistant_message_text
     test_case = {
         "name": criteria_name,
         "criteria": criteria,
@@ -247,7 +248,7 @@ with gr.Blocks(
             gr.HTML("<h2>IBM Granite Guardian 3.1</h2>", elem_classes="title")
             gr.HTML(
                 elem_classes="system-description",
-                value="<p>Granite Guardian models are specialized language models in the Granite family that can detect harms and risks in generative AI systems. They can be used with any large language model to make interactions with generative AI systems safe. Select an example in the left panel to see how the Granite Guardian model evaluates harms and risks in user prompts, assistant responses, and for hallucinations in retrieval-augmented generation. In this demo, we use granite-guardian-3.1-8b.</p>",
             )
     with gr.Row(elem_classes="column-gap"):
         with gr.Column(scale=0, elem_classes="no-gap"):
@@ -301,11 +302,7 @@ with gr.Blocks(
                 elem_classes=["input-box"],
             )
-            tools = gr.Code(
-                label="API Definition (Tools)",
-                visible=False,
-                language='json'
-            )
             user_message = gr.Textbox(
                 label="User Prompt",
@@ -327,7 +324,7 @@ with gr.Blocks(
             assistant_message_json = gr.Code(
                 label="Assistant Response",
                 visible=False,
-                language='json',
                 value=None,
                 elem_classes=["input-box"],
             )
@@ -350,7 +347,9 @@ with gr.Blocks(
     # events
     show_propt_button.click(
-        on_show_prompt_click, inputs=[criteria, context, user_message, assistant_message_text, assistant_message_json, tools, state], outputs=prompt
     ).then(lambda: gr.update(visible=True), None, modal)
     submit_button.click(lambda: gr.update(visible=True, value=""), None, result_text).then(
@@ -368,7 +367,16 @@ with gr.Blocks(
         ).then(update_selected_test_case, inputs=[button, state], outputs=[state]).then(
             on_test_case_click,
             inputs=state,
-            outputs=[test_case_name, criteria, context, user_message, assistant_message_text, assistant_message_json, tools, result_text],
         )
 demo.launch(server_name="0.0.0.0")

 from gradio_modal import Modal
 from logger import logger
+from model import get_guardian_response, get_prompt
 from utils import (
     get_messages,
     get_result_description,
 catalog = {}
+toy_json = '{"name": "John"}'
 with open("catalog.json") as f:
     logger.debug("Loading catalog from json.")
     # update context field:
     if is_context_editable:
         context = gr.update(
+            value=selected_test_case["context"], interactive=True, visible=True, elem_classes=["input-box"]
         )
+    else:
         context = gr.update(
             visible=selected_test_case["context"] is not None,
             value=selected_test_case["context"],
     # update user message field
     if is_user_message_editable:
         user_message = gr.update(
+            value=selected_test_case["user_message"], visible=True, interactive=True, elem_classes=["input-box"]
         )
     else:
         user_message = gr.update(
+            value=selected_test_case["user_message"], interactive=False, elem_classes=["read-only", "input-box"]
         )
     # update assistant message field
     if is_tools_present:
         assistant_message_json = gr.update(
         assistant_message_json = gr.update(visible=False)
     result_text = gr.update(visible=False, value="")
+    return (
+        test_case_name,
+        criteria,
+        context,
+        user_message,
+        assistant_message_text,
+        assistant_message_json,
+        tools,
+        result_text,
+    )
 def change_button_color(event: gr.EventData):
     criteria_name = state["selected_criteria_name"]
     if criteria_name == "function_calling_hallucination":
         assistant_message = assistant_message_json
+    else:
+        assistant_message = assistant_message_text
     test_case = {
         "name": criteria_name,
         "criteria": criteria,
         "context": context,
         "user_message": user_message,
         "assistant_message": assistant_message,
+        "tools": tools,
     }
     messages = get_messages(test_case=test_case, sub_catalog_name=state["selected_sub_catalog"])
         f"Starting evaluation for subcatelog {state['selected_sub_catalog']} and criteria name {state['selected_criteria_name']}"
     )
+    result_label = get_guardian_response(messages=messages, criteria_name=criteria_name)["assessment"]  # Yes or No
     html_str = f"<p>{get_result_description(state['selected_sub_catalog'], state['selected_criteria_name'])} <strong>{result_label}</strong></p>"
     # html_str = f"{get_result_description(state['selected_sub_catalog'], state['selected_criteria_name'])} {result_label}"
     criteria_name = state["selected_criteria_name"]
     if criteria_name == "function_calling_hallucination":
         assistant_message = assistant_message_json
+    else:
+        assistant_message = assistant_message_text
     test_case = {
         "name": criteria_name,
         "criteria": criteria,
             gr.HTML("<h2>IBM Granite Guardian 3.1</h2>", elem_classes="title")
             gr.HTML(
                 elem_classes="system-description",
+                value="<p>Granite Guardian models are specialized language models in the Granite family that can detect harms and risks in generative AI systems. They can be used with any large language model to make interactions with generative AI systems safe. Select an example in the left panel to see how the Granite Guardian model evaluates harms and risks in user prompts, assistant responses, and for hallucinations in retrival-augmented generation and function calling. In this demo, we use granite-guardian-3.1-8b.</p>",
             )
     with gr.Row(elem_classes="column-gap"):
         with gr.Column(scale=0, elem_classes="no-gap"):
                 elem_classes=["input-box"],
             )
+            tools = gr.Code(label="API Definition (Tools)", visible=False, language="json")
             user_message = gr.Textbox(
                 label="User Prompt",
             assistant_message_json = gr.Code(
                 label="Assistant Response",
                 visible=False,
+                language="json",
                 value=None,
                 elem_classes=["input-box"],
             )
     # events
     show_propt_button.click(
+        on_show_prompt_click,
+        inputs=[criteria, context, user_message, assistant_message_text, assistant_message_json, tools, state],
+        outputs=prompt,
     ).then(lambda: gr.update(visible=True), None, modal)
     submit_button.click(lambda: gr.update(visible=True, value=""), None, result_text).then(
         ).then(update_selected_test_case, inputs=[button, state], outputs=[state]).then(
             on_test_case_click,
             inputs=state,
+            outputs=[
+                test_case_name,
+                criteria,
+                context,
+                user_message,
+                assistant_message_text,
+                assistant_message_json,
+                tools,
+                result_text,
+            ],
         )
 demo.launch(server_name="0.0.0.0")

src/model.py CHANGED Viewed

@@ -9,9 +9,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from logger import logger
-# from vllm import LLM, SamplingParams
 safe_token = "No"
 risky_token = "Yes"
 nlogprobs = 20
@@ -21,9 +18,10 @@ logger.debug(f"Inference engine is: '{inference_engine}'")
 if inference_engine == "VLLM":
     import torch
-    device = torch.device("cuda")
-    model_path = os.getenv("MODEL_PATH", "ibm-granite/granite-guardian-3.1-8b")
     logger.debug(f"model_path is {model_path}")
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     model = AutoModelForCausalLM.from_pretrained(model_path)
@@ -41,17 +39,32 @@ elif inference_engine == "WATSONX":
     model_id = "ibm/granite-guardian-3-8b"  # 8B Model: "ibm/granite-guardian-3-8b"
     model = ModelInference(model_id=model_id, api_client=client)
-def parse_output(output, input_len):
     label, prob_of_risk = None, None
     if nlogprobs > 0:
-        list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
-                                 for token_i in list(output.scores)[:-1]]
-        if list_index_logprobs_i is not None:
-            prob = get_probablities(list_index_logprobs_i)
-            prob_of_risk = prob[1]
-    res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
     if risky_token.lower() == res.lower():
         label = risky_token
     elif safe_token.lower() == res.lower():
@@ -59,7 +72,27 @@ def parse_output(output, input_len):
     else:
         label = "Failed"
-    return label, prob_of_risk.item()
 def get_probablities(logprobs):
     safe_token_prob = 1e-50
@@ -72,87 +105,61 @@ def get_probablities(logprobs):
             if decoded_token.strip().lower() == risky_token.lower():
                 unsafe_token_prob += math.exp(logprob)
-    probabilities = torch.softmax(
-        torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
-    )
     return probabilities
-def softmax(values):
-    exp_values = [math.exp(v) for v in values]
-    total = sum(exp_values)
-    return [v / total for v in exp_values]
-def get_probablities_watsonx(top_tokens_list):
-    safe_token_prob = 1e-50
-    risky_token_prob = 1e-50
-    for top_tokens in top_tokens_list:
-        for token in top_tokens:
-            if token["text"].strip().lower() == safe_token.lower():
-                safe_token_prob += math.exp(token["logprob"])
-            if token["text"].strip().lower() == risky_token.lower():
-                risky_token_prob += math.exp(token["logprob"])
-    probabilities = softmax([math.log(safe_token_prob), math.log(risky_token_prob)])
-    return probabilities
 def get_prompt(messages, criteria_name, tokenize=False, add_generation_prompt=False, return_tensors=None):
     if criteria_name == "general_harm":
         criteria_name = "harm"
     elif criteria_name == "function_calling_hallucination":
         criteria_name = "function_call"
     guardian_config = {"risk_name": criteria_name if criteria_name != "general_harm" else "harm"}
     prompt = tokenizer.apply_chat_template(
         messages,
         guardian_config=guardian_config,
         tokenize=tokenize,
         add_generation_prompt=add_generation_prompt,
-        return_tensors=return_tensors
     )
-    logger.debug(f'prompt is\n{prompt}')
     return prompt
 @spaces.GPU
-def generate_tokens(prompt):
-    result = model.generate(
-        prompt=[prompt],
-        params={
-            "decoding_method": "greedy",
-            "max_new_tokens": 20,
-            "temperature": 0,
-            "return_options": {"token_logprobs": True, "generated_tokens": True, "input_text": True, "top_n_tokens": 5},
-        },
-    )
-    return result[0]["results"][0]["generated_tokens"]
-def parse_output_watsonx(generated_tokens_list):
-    label, prob_of_risk = None, None
-    if nlogprobs > 0:
-        top_tokens_list = [generated_tokens["top_tokens"] for generated_tokens in generated_tokens_list]
-        prob = get_probablities_watsonx(top_tokens_list)
-        prob_of_risk = prob[1]
-    res = next(iter(generated_tokens_list))["text"].strip()
-    if risky_token.lower() == res.lower():
-        label = risky_token
-    elif safe_token.lower() == res.lower():
-        label = safe_token
-    else:
-        label = "Failed"
-    return label, prob_of_risk
-@spaces.GPU
-def generate_text(messages, criteria_name):
     logger.debug(f"Messages used to create the prompt are: \n{messages}")
     start = time()
     if inference_engine == "MOCK":
@@ -163,27 +170,20 @@ def generate_text(messages, criteria_name):
     elif inference_engine == "WATSONX":
         chat = get_prompt(messages, criteria_name)
         logger.debug(f"Prompt is \n{chat}")
-        generated_tokens = generate_tokens(chat)
         label, prob_of_risk = parse_output_watsonx(generated_tokens)
     elif inference_engine == "VLLM":
-        # input_ids = get_prompt(
-        #     messages=messages,
-        #     criteria_name=criteria_name,
-        #     tokenize=True,
-        #     add_generation_prompt=True,
-        #     return_tensors="pt").to(model.device)
-        guardian_config = {"risk_name": criteria_name if criteria_name != "general_harm" else "harm"}
-        logger.debug(f'guardian_config is: {guardian_config}')
-        input_ids = tokenizer.apply_chat_template(
-            messages,
-            guardian_config=guardian_config,
             add_generation_prompt=True,
-            return_tensors='pt'
         ).to(model.device)
         logger.debug(f"input_ids are: {input_ids}")
         input_len = input_ids.shape[1]
-        logger.debug(f"input_len are: {input_len}")
         with torch.no_grad():
             # output = model.generate(chat, sampling_params, use_tqdm=False)
@@ -192,7 +192,8 @@ def generate_text(messages, criteria_name):
                 do_sample=False,
                 max_new_tokens=nlogprobs,
                 return_dict_in_generate=True,
-                output_scores=True,)
             logger.debug(f"model output is:\n{output}")
             label, prob_of_risk = parse_output(output, input_len)

 from logger import logger
 safe_token = "No"
 risky_token = "Yes"
 nlogprobs = 20
 if inference_engine == "VLLM":
     import torch
+    device = torch.device("cpu")
+    model_path = os.getenv("MODEL_PATH", "ibm-granite/granite-guardian-3.1-2b")
     logger.debug(f"model_path is {model_path}")
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     model = AutoModelForCausalLM.from_pretrained(model_path)
     model_id = "ibm/granite-guardian-3-8b"  # 8B Model: "ibm/granite-guardian-3-8b"
     model = ModelInference(model_id=model_id, api_client=client)
+def get_probablities_watsonx(top_tokens_list):
+    safe_token_prob = 1e-50
+    risky_token_prob = 1e-50
+    for top_tokens in top_tokens_list:
+        for token in top_tokens:
+            if token["text"].strip().lower() == safe_token.lower():
+                safe_token_prob += math.exp(token["logprob"])
+            if token["text"].strip().lower() == risky_token.lower():
+                risky_token_prob += math.exp(token["logprob"])
+    probabilities = softmax([math.log(safe_token_prob), math.log(risky_token_prob)])
+    return probabilities
+def parse_output_watsonx(generated_tokens_list):
     label, prob_of_risk = None, None
     if nlogprobs > 0:
+        top_tokens_list = [generated_tokens["top_tokens"] for generated_tokens in generated_tokens_list]
+        prob = get_probablities_watsonx(top_tokens_list)
+        prob_of_risk = prob[1]
+    res = next(iter(generated_tokens_list))["text"].strip()
     if risky_token.lower() == res.lower():
         label = risky_token
     elif safe_token.lower() == res.lower():
     else:
         label = "Failed"
+    return label, prob_of_risk
+def generate_tokens_watsonx(prompt):
+    result = model.generate(
+        prompt=[prompt],
+        params={
+            "decoding_method": "greedy",
+            "max_new_tokens": 20,
+            "temperature": 0,
+            "return_options": {"token_logprobs": True, "generated_tokens": True, "input_text": True, "top_n_tokens": 5},
+        },
+    )
+    return result[0]["results"][0]["generated_tokens"]
+def softmax(values):
+    exp_values = [math.exp(v) for v in values]
+    total = sum(exp_values)
+    return [v / total for v in exp_values]
 def get_probablities(logprobs):
     safe_token_prob = 1e-50
             if decoded_token.strip().lower() == risky_token.lower():
                 unsafe_token_prob += math.exp(logprob)
+    probabilities = torch.softmax(torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0)
     return probabilities
+def parse_output(output, input_len):
+    label, prob_of_risk = None, None
+    if nlogprobs > 0:
+        list_index_logprobs_i = [
+            torch.topk(token_i, k=nlogprobs, largest=True, sorted=True) for token_i in list(output.scores)[:-1]
+        ]
+        if list_index_logprobs_i is not None:
+            prob = get_probablities(list_index_logprobs_i)
+            prob_of_risk = prob[1]
+    res = tokenizer.decode(output.sequences[:, input_len:][0], skip_special_tokens=True).strip()
+    if risky_token.lower() == res.lower():
+        label = risky_token
+    elif safe_token.lower() == res.lower():
+        label = safe_token
+    else:
+        label = "Failed"
+    return label, prob_of_risk.item()
+@spaces.GPU
 def get_prompt(messages, criteria_name, tokenize=False, add_generation_prompt=False, return_tensors=None):
+    logger.debug("Creating prompt for the model.")
+    logger.debug(f"Messages used to create the prompt are: \n{messages}")
+    logger.debug("Criteria name is: " + criteria_name)
     if criteria_name == "general_harm":
         criteria_name = "harm"
     elif criteria_name == "function_calling_hallucination":
         criteria_name = "function_call"
+    logger.debug("Criteria name was changed too: " + criteria_name)
+    logger.debug(f"Tokenize: {tokenize}")
+    logger.debug(f"add_generation_prompt: {add_generation_prompt}")
+    logger.debug(f"return_tensors: {return_tensors}")
     guardian_config = {"risk_name": criteria_name if criteria_name != "general_harm" else "harm"}
+    logger.debug(f"guardian_config is: {guardian_config}")
     prompt = tokenizer.apply_chat_template(
         messages,
         guardian_config=guardian_config,
         tokenize=tokenize,
         add_generation_prompt=add_generation_prompt,
+        return_tensors=return_tensors,
     )
+    logger.debug(f"Prompt (type {type(prompt)}) is: {prompt}")
     return prompt
 @spaces.GPU
+def get_guardian_response(messages, criteria_name):
     logger.debug(f"Messages used to create the prompt are: \n{messages}")
     start = time()
     if inference_engine == "MOCK":
     elif inference_engine == "WATSONX":
         chat = get_prompt(messages, criteria_name)
         logger.debug(f"Prompt is \n{chat}")
+        generated_tokens = generate_tokens_watsonx(chat)
         label, prob_of_risk = parse_output_watsonx(generated_tokens)
     elif inference_engine == "VLLM":
+        input_ids = get_prompt(
+            messages=messages,
+            criteria_name=criteria_name,
+            tokenize=True,
             add_generation_prompt=True,
+            return_tensors="pt",
         ).to(model.device)
         logger.debug(f"input_ids are: {input_ids}")
         input_len = input_ids.shape[1]
+        logger.debug(f"input_len is: {input_len}")
         with torch.no_grad():
             # output = model.generate(chat, sampling_params, use_tqdm=False)
                 do_sample=False,
                 max_new_tokens=nlogprobs,
                 return_dict_in_generate=True,
+                output_scores=True,
+            )
             logger.debug(f"model output is:\n{output}")
             label, prob_of_risk = parse_output(output, input_len)

src/styles.css CHANGED Viewed

@@ -135,4 +135,4 @@
 .no-stretch {
     align-items: flex-start;
-}

 .no-stretch {
     align-items: flex-start;
+}

src/utils.py CHANGED Viewed

@@ -5,6 +5,7 @@ import os
 def create_message(role, content):
     return [{"role": role, "content": content}]
 def get_messages(test_case, sub_catalog_name) -> list[dict[str, str]]:
     messages = []
@@ -27,8 +28,6 @@ def get_messages(test_case, sub_catalog_name) -> list[dict[str, str]]:
         messages += create_message("tools", test_case["tools"])
         messages += create_message("user", test_case["user_message"])
         messages += create_message("assistant", test_case["assistant_message"])
-    print('Messages are')
-    print(messages)
     return messages
@@ -53,7 +52,9 @@ def get_evaluated_component(sub_catalog_name, criteria_name):
     component = None
     if sub_catalog_name == "harmful_content_in_user_prompt":
         component = "user"
-    elif sub_catalog_name == "harmful_content_in_assistant_response" or sub_catalog_name == "risks_in_agentic_workflows":
         component = "assistant"
     elif sub_catalog_name == "rag_hallucination_risks":
         if criteria_name == "context_relevance":

 def create_message(role, content):
     return [{"role": role, "content": content}]
 def get_messages(test_case, sub_catalog_name) -> list[dict[str, str]]:
     messages = []
         messages += create_message("tools", test_case["tools"])
         messages += create_message("user", test_case["user_message"])
         messages += create_message("assistant", test_case["assistant_message"])
     return messages
     component = None
     if sub_catalog_name == "harmful_content_in_user_prompt":
         component = "user"
+    elif (
+        sub_catalog_name == "harmful_content_in_assistant_response" or sub_catalog_name == "risks_in_agentic_workflows"
+    ):
         component = "assistant"
     elif sub_catalog_name == "rag_hallucination_risks":
         if criteria_name == "context_relevance":