Spaces:

eaglelandsonce
/

hhem

Runtime error

File size: 12,211 Bytes

import streamlit as st
import requests
import json
import os
import pandas as pd
from sentence_transformers import CrossEncoder
import numpy as np
import re

from textwrap import dedent
import google.generativeai as genai


# Tool import
from crewai.tools.gemini_tools import GeminiSearchTools
from crewai.tools.mixtral_tools import MixtralSearchTools
from crewai.tools.zephyr_tools import ZephyrSearchTools
from crewai.tools.phi2_tools import Phi2SearchTools


# Google Langchain
from langchain_google_genai import GoogleGenerativeAI

#Crew imports
from crewai import Agent, Task, Crew, Process

# Retrieve API Key from Environment Variable
GOOGLE_AI_STUDIO = os.environ.get('GOOGLE_API_KEY')

# Ensure the API key is available
if not GOOGLE_AI_STUDIO:
    raise ValueError("API key not found. Please set the GOOGLE_AI_STUDIO2 environment variable.")

# Set gemini_llm
gemini_llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_AI_STUDIO)

# CrewAI +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

def crewai_process_gemini(research_topic):
    # Define your agents with roles and goals
    GeminiAgent = Agent(
        role='Summary Evaluator',
        goal='To learn how to manage her anxiety in social situations through group therapy.',
        backstory="""Skilled in running query evaluation""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                GeminiSearchTools.gemini_search
                   
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Summarize {research_topic}""",
        agent=GeminiAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[GeminiAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result



def crewai_process_mixtral_crazy(research_topic):
    # Define your agents with roles and goals
    MixtralCrazyAgent = Agent(
        role='Summary Evaluator',
        goal='Evaluate the summary using the HHEM-Victara Tuner',
        backstory="""Skilled in running query evaluation""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                MixtralSearchTools.mixtral_crazy      
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Summarize {research_topic}""",
        agent=MixtralCrazyAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[MixtralCrazyAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result


def crewai_process_mixtral_normal(research_topic):
    # Define your agents with roles and goals
    MixtralNormalAgent = Agent(
        role='Summary Evaluator',
        goal='Evaluate the summary using the HHEM-Victara Tuner',
        backstory="""Skilled in running query evaluation""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                MixtralSearchTools.mixtral_normal      
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Summarize {research_topic}""",
        agent=MixtralNormalAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[MixtralNormalAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result


def crewai_process_zephyr_normal(research_topic):
    # Define your agents with roles and goals
    ZephrNormalAgent = Agent(
        role='Summary Evaluator',
        goal='Evaluate the summary using the HHEM-Victara Tuner',
        backstory="""Skilled in running query evaluation""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                ZephyrSearchTools.zephyr_normal     
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Summarize {research_topic}""",
        agent=ZephrNormalAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[ZephrNormalAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result


def crewai_process_phi2(research_topic):
    # Define your agents with roles and goals
    Phi2Agent = Agent(
        role='Emily Mental Patient Graphic Designer Anxiety',
        goal='Evaluate the summary using the HHEM-Victara Tuner',
        backstory="""Skilled in running query evaluation""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                Phi2SearchTools.phi2_search     
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Summarize {research_topic}""",
        agent=Phi2Agent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[Phi2Agent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result



# Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

corpus_id = os.environ['VECTARA_CORPUS_ID']
customer_id = os.environ['VECTARA_CUSTOMER_ID']
api_key = os.environ['VECTARA_API_KEY']


# Get Data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


def get_post_headers() -> dict:
    """Returns headers that should be attached to each post request."""
    return {
        "x-api-key": api_key,
        "customer-id": customer_id,
        "Content-Type": "application/json",
    }

def query_vectara(query: str, filter_str="", lambda_val=0.0) -> str:
    corpus_key = {
        "customerId": customer_id,
        "corpusId": corpus_id,
        "lexicalInterpolationConfig": {"lambda": lambda_val},
    }
    if filter_str:
        corpus_key["metadataFilter"] = filter_str

    data = {
        "query": [
            {
                "query": query,
                "start": 0,
                "numResults": 10,
                "contextConfig": {
                    "sentencesBefore": 2,
                    "sentencesAfter": 2
                },
                "corpusKey": [corpus_key],
                "summary": [
                    {
                        "responseLang": "eng",
                        "maxSummarizedResults": 5,
                        "summarizerPromptName": "vectara-summary-ext-v1.2.0"
                    },
                ]                    
            }
        ]
    }

    response = requests.post(
        "https://api.vectara.io/v1/query",
        headers=get_post_headers(),
        data=json.dumps(data),
        timeout=130,
    )

    if response.status_code != 200:
        st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
        return ""

    result = response.json()

    answer = result["responseSet"][0]["summary"][0]["text"]
    return re.sub(r'\[\d+(,\d+){0,5}\]', '', answer)



# Initialize the HHEM model +++++++++++++++++++++++++++++++++++++++++++++++
model = CrossEncoder('vectara/hallucination_evaluation_model')

# Function to compute HHEM scores
def compute_hhem_scores(texts, summary):
    pairs = [[text, summary] for text in texts]
    scores = model.predict(pairs)
    return scores

# Define the Vectara query function
def vectara_query(query: str, config: dict):
    corpus_key = [{
        "customerId": config["customer_id"],
        "corpusId": config["corpus_id"],
        "lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
    }]
    data = {
        "query": [{
            "query": query,
            "start": 0,
            "numResults": config.get("top_k", 10),
            "contextConfig": {
                "sentencesBefore": 2,
                "sentencesAfter": 2,
            },
            "corpusKey": corpus_key,
            "summary": [{
                "responseLang": "eng",
                "maxSummarizedResults": 5,
            }]
        }]
    }

    headers = {
        "x-api-key": config["api_key"],
        "customer-id": config["customer_id"],
        "Content-Type": "application/json",
    }
    response = requests.post(
        headers=headers,
        url="https://api.vectara.io/v1/query",
        data=json.dumps(data),
    )
    if response.status_code != 200:
        st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
        return [], ""

    result = response.json()
    responses = result["responseSet"][0]["response"]
    summary = result["responseSet"][0]["summary"][0]["text"]

    res = [[r['text'], r['score']] for r in responses]
    return res, summary


# Create the main app with three tabs
tab1, tab2, tab3, tab4 = st.tabs(["Synthetic Data", "Data Query", "HHEM-Victara Query Tuner", "Model Evaluation"])

with tab1:
    st.header("Synthetic Data")
    st.link_button("Create Synthetic Medical Data", "https://chat.openai.com/g/g-XyHciw52w-synthetic-clinical-data")
    
   

with tab2:
    st.header("Data Query")
    st.link_button("Query & Summarize Data", "https://chat.openai.com/g/g-9tWqg4gRY-explore-summarize-medical-data")
   
with tab3:
    
    st.header("HHEM-Victara Query Tuner")
    
    # User inputs
    query = st.text_area("Enter your text for query tuning", "", height=75)
    lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
    top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)
    
    
    if st.button("Query Vectara"):
        config = {
    
            "api_key": os.environ.get("VECTARA_API_KEY", ""),
            "customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
            "corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),      
    
            "lambda_val": lambda_val,
            "top_k": top_k,
        }
    
        results, summary = vectara_query(query, config)
    
        if results:
            st.subheader("Summary")
            st.write(summary)
            
            st.subheader("Top Results")
            
            # Extract texts from results
            texts = [r[0] for r in results[:5]]
            
            # Compute HHEM scores
            scores = compute_hhem_scores(texts, summary)
            
            # Prepare and display the dataframe
            df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
            st.dataframe(df)
        else:
            st.write("No results found.")

with tab4:
    
    st.header("Model Evaluation")

    # User input for the research topic
    research_topic = st.text_input('Enter your research topic:', '')

    # Selection box for the function to execute
    process_selection = st.selectbox(
        'Choose the process to run:',
        ('crewai_process_gemini', 'crewai_process_mixtral_crazy', 'crewai_process_mixtral_normal', 'crewai_process_zephyr_normal', 'crewai_process_phi2')
    )

    # Button to execute the chosen function
    if st.button('Run Process'):
        if research_topic:  # Ensure there's a topic provided
            if process_selection == 'crewai_process_gemini':
                result = crewai_process_gemini(research_topic)
            elif process_selection == 'crewai_process_mixtral_crazy':
                result = crewai_process_mixtral_crazy(research_topic)
            elif process_selection == 'crewai_process_mixtral_normal':
                result = crewai_process_mixtral_normal(research_topic)
            elif process_selection == 'crewai_process_zephyr_normal':
                result = crewai_process_zephyr_normal(research_topic)
            elif process_selection == 'crewai_process_phi2':
                result = crewai_process_phi2(research_topic)
            st.write(result)
        else:
            st.warning('Please enter a research topic.')