import os import json import openai import pandas as pd import numpy as np from openai.embeddings_utils import distances_from_embeddings OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") openai.api_key = OPENAI_API_KEY def clear_state(chatbot, *args): return chatbot.clear_state(*args) def send_system_nofification(chatbot, *args): return chatbot.send_system_nofification(*args) def build_knowledge_base(chatbot, *args): return chatbot.build_knowledge_base(*args) def change_md(chatbot, *args): return chatbot.change_md(*args) def get_index_file(chatbot, *args): return chatbot.get_index_file(*args) def user(chatbot, *args): return chatbot.user(*args) def bot(chatbot, *args): return chatbot.bot(*args) def video_bot(video_chatbot, *args): return video_chatbot.answer_question(*args) def search_transcript_content(transcript_id, user_question): user_q_emb = openai.Embedding.create(input=user_question, engine="text-embedding-ada-002")["data"][0]["embedding"] transcript_db = pd.read_csv("transcript.csv") transcript_db = transcript_db[transcript_db["uid"] == transcript_id] transcript_db["embedding"] = ( transcript_db["embedding"].apply(eval).apply(np.array) ) transcript_db["distance"] = distances_from_embeddings( user_q_emb, transcript_db["embedding"].values, distance_metric="cosine", ) transcript_db = transcript_db.sort_values( by="distance", ascending=True ) if transcript_db["distance"].values[0] > 0.2: result = { "success": False, "result": None } return json.dumps(result) # return transcript_db["text"] in json format result = { "success": True, "result": [text for text in transcript_db["text"].values] } return json.dumps(result)