File size: 1,874 Bytes
69bac50
abab449
69bac50
 
 
 
 
 
 
 
 
b95388b
 
 
26f62c4
b95388b
 
 
26f62c4
b95388b
 
 
26f62c4
b95388b
 
 
26f62c4
b95388b
 
 
26f62c4
b95388b
 
 
26f62c4
b95388b
26f62c4
e5d2292
66b707b
e5d2292
 
69bac50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abab449
 
 
 
 
69bac50
abab449
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import json

import openai
import pandas as pd
import numpy as np
from openai.embeddings_utils import distances_from_embeddings

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPENAI_API_KEY

def clear_state(chatbot, *args):
    return chatbot.clear_state(*args)


def send_system_nofification(chatbot, *args):
    return chatbot.send_system_nofification(*args)


def build_knowledge_base(chatbot, *args):
    return chatbot.build_knowledge_base(*args)


def change_md(chatbot, *args):
    return chatbot.change_md(*args)


def get_index_file(chatbot, *args):
    return chatbot.get_index_file(*args)


def user(chatbot, *args):
    return chatbot.user(*args)


def bot(chatbot, *args):
    return chatbot.bot(*args)


def video_bot(video_chatbot, *args):
    return video_chatbot.answer_question(*args)

def search_transcript_content(transcript_id, user_question):
    user_q_emb = openai.Embedding.create(input=user_question, engine="text-embedding-ada-002")["data"][0]["embedding"]


    transcript_db = pd.read_csv("transcript.csv")
    transcript_db = transcript_db[transcript_db["uid"] == transcript_id]

    transcript_db["embedding"] = (
        transcript_db["embedding"].apply(eval).apply(np.array)
    )

    transcript_db["distance"] = distances_from_embeddings(
        user_q_emb,
        transcript_db["embedding"].values,
        distance_metric="cosine",
        )
    
    transcript_db = transcript_db.sort_values(
        by="distance", ascending=True
    )
    if transcript_db["distance"].values[0] > 0.2:
        result = {
            "success": False,
            "result": None
        }
        return json.dumps(result)
    
    # return transcript_db["text"] in json format
    result = {
        "success": True,
        "result": [text for text in transcript_db["text"].values]
    }
    
    return json.dumps(result)