Spaces:
Runtime error
Runtime error
File size: 1,874 Bytes
69bac50 abab449 69bac50 b95388b 26f62c4 b95388b 26f62c4 b95388b 26f62c4 b95388b 26f62c4 b95388b 26f62c4 b95388b 26f62c4 b95388b 26f62c4 e5d2292 66b707b e5d2292 69bac50 abab449 69bac50 abab449 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import json
import openai
import pandas as pd
import numpy as np
from openai.embeddings_utils import distances_from_embeddings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPENAI_API_KEY
def clear_state(chatbot, *args):
return chatbot.clear_state(*args)
def send_system_nofification(chatbot, *args):
return chatbot.send_system_nofification(*args)
def build_knowledge_base(chatbot, *args):
return chatbot.build_knowledge_base(*args)
def change_md(chatbot, *args):
return chatbot.change_md(*args)
def get_index_file(chatbot, *args):
return chatbot.get_index_file(*args)
def user(chatbot, *args):
return chatbot.user(*args)
def bot(chatbot, *args):
return chatbot.bot(*args)
def video_bot(video_chatbot, *args):
return video_chatbot.answer_question(*args)
def search_transcript_content(transcript_id, user_question):
user_q_emb = openai.Embedding.create(input=user_question, engine="text-embedding-ada-002")["data"][0]["embedding"]
transcript_db = pd.read_csv("transcript.csv")
transcript_db = transcript_db[transcript_db["uid"] == transcript_id]
transcript_db["embedding"] = (
transcript_db["embedding"].apply(eval).apply(np.array)
)
transcript_db["distance"] = distances_from_embeddings(
user_q_emb,
transcript_db["embedding"].values,
distance_metric="cosine",
)
transcript_db = transcript_db.sort_values(
by="distance", ascending=True
)
if transcript_db["distance"].values[0] > 0.2:
result = {
"success": False,
"result": None
}
return json.dumps(result)
# return transcript_db["text"] in json format
result = {
"success": True,
"result": [text for text in transcript_db["text"].values]
}
return json.dumps(result) |