ChenyuRabbitLove's picture
feat:change search transcript content api return format
abab449
import os
import json
import openai
import pandas as pd
import numpy as np
from openai.embeddings_utils import distances_from_embeddings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPENAI_API_KEY
def clear_state(chatbot, *args):
return chatbot.clear_state(*args)
def send_system_nofification(chatbot, *args):
return chatbot.send_system_nofification(*args)
def build_knowledge_base(chatbot, *args):
return chatbot.build_knowledge_base(*args)
def change_md(chatbot, *args):
return chatbot.change_md(*args)
def get_index_file(chatbot, *args):
return chatbot.get_index_file(*args)
def user(chatbot, *args):
return chatbot.user(*args)
def bot(chatbot, *args):
return chatbot.bot(*args)
def video_bot(video_chatbot, *args):
return video_chatbot.answer_question(*args)
def search_transcript_content(transcript_id, user_question):
user_q_emb = openai.Embedding.create(input=user_question, engine="text-embedding-ada-002")["data"][0]["embedding"]
transcript_db = pd.read_csv("transcript.csv")
transcript_db = transcript_db[transcript_db["uid"] == transcript_id]
transcript_db["embedding"] = (
transcript_db["embedding"].apply(eval).apply(np.array)
)
transcript_db["distance"] = distances_from_embeddings(
user_q_emb,
transcript_db["embedding"].values,
distance_metric="cosine",
)
transcript_db = transcript_db.sort_values(
by="distance", ascending=True
)
if transcript_db["distance"].values[0] > 0.2:
result = {
"success": False,
"result": None
}
return json.dumps(result)
# return transcript_db["text"] in json format
result = {
"success": True,
"result": [text for text in transcript_db["text"].values]
}
return json.dumps(result)