Spaces:

eaglelandsonce
/

hhem

Runtime error

App Files Files Community

hhem / app.py

eaglelandsonce

Update app.py

1947481 verified 10 months ago

raw

history blame

5.82 kB

	import streamlit as st
	import requests
	import json
	import os
	import pandas as pd
	from sentence_transformers import CrossEncoder
	import numpy as np
	import re


	# Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

	corpus_id = os.environ['VECTARA_CORPUS_ID']
	customer_id = os.environ['VECTARA_CUSTOMER_ID']
	api_key = os.environ['VECTARA_API_KEY']


	# Get Data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


	def get_post_headers() -> dict:
	"""Returns headers that should be attached to each post request."""
	return {
	"x-api-key": api_key,
	"customer-id": customer_id,
	"Content-Type": "application/json",
	}

	def query_vectara(query: str, filter_str="", lambda_val=0.0) -> str:
	corpus_key = {
	"customerId": customer_id,
	"corpusId": corpus_id,
	"lexicalInterpolationConfig": {"lambda": lambda_val},
	}
	if filter_str:
	corpus_key["metadataFilter"] = filter_str

	data = {
	"query": [
	{
	"query": query,
	"start": 0,
	"numResults": 10,
	"contextConfig": {
	"sentencesBefore": 2,
	"sentencesAfter": 2
	},
	"corpusKey": [corpus_key],
	"summary": [
	{
	"responseLang": "eng",
	"maxSummarizedResults": 5,
	"summarizerPromptName": "vectara-summary-ext-v1.2.0"
	},
	]
	}
	]
	}

	response = requests.post(
	"https://api.vectara.io/v1/query",
	headers=get_post_headers(),
	data=json.dumps(data),
	timeout=130,
	)

	if response.status_code != 200:
	st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
	return ""

	result = response.json()

	answer = result["responseSet"][0]["summary"][0]["text"]
	return re.sub(r'\[\d+(,\d+){0,5}\]', '', answer)



	# Initialize the HHEM model +++++++++++++++++++++++++++++++++++++++++++++++
	model = CrossEncoder('vectara/hallucination_evaluation_model')

	# Function to compute HHEM scores
	def compute_hhem_scores(texts, summary):
	pairs = [[text, summary] for text in texts]
	scores = model.predict(pairs)
	return scores

	# Define the Vectara query function
	def vectara_query(query: str, config: dict):
	corpus_key = [{
	"customerId": config["customer_id"],
	"corpusId": config["corpus_id"],
	"lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
	}]
	data = {
	"query": [{
	"query": query,
	"start": 0,
	"numResults": config.get("top_k", 10),
	"contextConfig": {
	"sentencesBefore": 2,
	"sentencesAfter": 2,
	},
	"corpusKey": corpus_key,
	"summary": [{
	"responseLang": "eng",
	"maxSummarizedResults": 5,
	}]
	}]
	}

	headers = {
	"x-api-key": config["api_key"],
	"customer-id": config["customer_id"],
	"Content-Type": "application/json",
	}
	response = requests.post(
	headers=headers,
	url="https://api.vectara.io/v1/query",
	data=json.dumps(data),
	)
	if response.status_code != 200:
	st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
	return [], ""

	result = response.json()
	responses = result["responseSet"][0]["response"]
	summary = result["responseSet"][0]["summary"][0]["text"]

	res = [[r['text'], r['score']] for r in responses]
	return res, summary


	# Create the main app with three tabs
	tab1, tab2, tab3 = st.tabs(["Synthetic Data", "Data Query", "HHEM-Victara Query Tuner"])

	with tab1:
	st.header("Synthetic Data")
	# Placeholder for Synthetic Data functionality
	st.write("Here you can generate or manage synthetic data.")

	with tab2:
	st.header("Data Query")
	# Placeholder for Data Query functionality
	st.write("Here you can perform data queries.")
	# Example of a simple query input
	query_input = st.text_input("Enter your query here")
	if st.button("Execute Query"):
	# Placeholder for query execution logic
	st.write(f"Executing query: {query_input}")

	with tab3:
	st.header("HHEM-Victara Query Tuner")


	# Streamlit UI setup
	st.title("HHEM-Vectara Query Tuning")

	# User inputs
	query = st.text_input("Enter your query here", "")
	lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
	top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)


	if st.button("Query Vectara"):
	config = {

	"api_key": os.environ.get("VECTARA_API_KEY", ""),
	"customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
	"corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),

	"lambda_val": lambda_val,
	"top_k": top_k,
	}

	results, summary = vectara_query(query, config)

	if results:
	st.subheader("Summary")
	st.write(summary)

	st.subheader("Top Results")

	# Extract texts from results
	texts = [r[0] for r in results[:5]]

	# Compute HHEM scores
	scores = compute_hhem_scores(texts, summary)

	# Prepare and display the dataframe
	df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
	st.dataframe(df)
	else:
	st.write("No results found.")