hhem / app.py
eaglelandsonce's picture
Update app.py
5b06c04 verified
raw
history blame
5.77 kB
import streamlit as st
import requests
import json
import os
import pandas as pd
from sentence_transformers import CrossEncoder
import numpy as np
import re
# Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
corpus_id = os.environ['VECTARA_CORPUS_ID']
customer_id = os.environ['VECTARA_CUSTOMER_ID']
api_key = os.environ['VECTARA_API_KEY']
# Get Data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def get_post_headers() -> dict:
"""Returns headers that should be attached to each post request."""
return {
"x-api-key": api_key,
"customer-id": customer_id,
"Content-Type": "application/json",
}
def query_vectara(query: str, case_number: int, filter_str="", lambda_val=0.0) -> str:
corpus_key = {
"customerId": customer_id,
"corpusId": corpus_id,
"lexicalInterpolationConfig": {"lambda": lambda_val},
}
if filter_str:
corpus_key["metadataFilter"] = filter_str
data = {
"query": [
{
"query": query,
"start": 0,
"numResults": 10,
"contextConfig": {
"sentencesBefore": 2,
"sentencesAfter": 2
},
"corpusKey": [corpus_key],
"summary": [
{
"responseLang": "eng",
"maxSummarizedResults": 5,
"summarizerPromptName": "vectara-summary-ext-v1.2.0"
},
]
}
]
}
response = requests.post(
headers=get_post_headers(),
url="https://api.vectara.io/v1/query",
data=json.dumps(data),
timeout=130,
)
if response.status_code != 200:
st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
return ""
result = response.json()
answer = result["responseSet"][0]["summary"][0]["text"]
return re.sub(r'\[\d+(,\d+){0,5}\]', '', answer)
# Streamlit UI
st.title('Vectara Query Interface')
# Dropdown for selecting case number
case_number = st.selectbox("Select Case Number:", range(2001, 2100), format_func=lambda x: f"\"Case Number\": {x}")
# User input for query
user_query = st.text_input("Enter your query:", "" )
# Advanced options
st.sidebar.header("Advanced Options")
filter_str = st.sidebar.text_input("Filter String:", "")
lambda_val = st.sidebar.slider("Lambda Value:", min_value=0.0, max_value=1.0, value=0.025)
if st.button('Search'):
if user_query:
with st.spinner('Querying Vectara...'):
output = query_vectara(user_query, case_number, filter_str, lambda_val)
st.markdown("## Result")
st.write(output)
else:
st.error("Please enter a query to search.")
# Initialize the HHEM model +++++++++++++++++++++++++++++++++++++++++++++++
model = CrossEncoder('vectara/hallucination_evaluation_model')
# Function to compute HHEM scores
def compute_hhem_scores(texts, summary):
pairs = [[text, summary] for text in texts]
scores = model.predict(pairs)
return scores
# Define the Vectara query function
def vectara_query(query: str, config: dict):
corpus_key = [{
"customerId": config["customer_id"],
"corpusId": config["corpus_id"],
"lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
}]
data = {
"query": [{
"query": query,
"start": 0,
"numResults": config.get("top_k", 10),
"contextConfig": {
"sentencesBefore": 2,
"sentencesAfter": 2,
},
"corpusKey": corpus_key,
"summary": [{
"responseLang": "eng",
"maxSummarizedResults": 5,
}]
}]
}
headers = {
"x-api-key": config["api_key"],
"customer-id": config["customer_id"],
"Content-Type": "application/json",
}
response = requests.post(
headers=headers,
url="https://api.vectara.io/v1/query",
data=json.dumps(data),
)
if response.status_code != 200:
st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
return [], ""
result = response.json()
responses = result["responseSet"][0]["response"]
summary = result["responseSet"][0]["summary"][0]["text"]
res = [[r['text'], r['score']] for r in responses]
return res, summary
# Streamlit UI setup
st.title("Vectara Content Query Interface")
# User inputs
query = st.text_input("Enter your query here", "")
lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)
if st.button("Query Vectara"):
config = {
"api_key": os.environ.get("VECTARA_API_KEY", ""),
"customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
"corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),
"lambda_val": lambda_val,
"top_k": top_k,
}
results, summary = vectara_query(query, config)
if results:
st.subheader("Summary")
st.write(summary)
st.subheader("Top Results")
# Extract texts from results
texts = [r[0] for r in results[:5]]
# Compute HHEM scores
scores = compute_hhem_scores(texts, summary)
# Prepare and display the dataframe
df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
st.dataframe(df)
else:
st.write("No results found.")