Spaces:
Runtime error
Runtime error
first commit
Browse files- app.py +75 -0
- backend_functions.py +160 -0
- prompt_general.txt +15 -0
- prompt_standalone_message.txt +10 -0
- requirements.txt +5 -0
- utils.py +14 -0
app.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
|
5 |
+
from utils import make_invisible, make_visible
|
6 |
+
from backend_functions import get_answer_text
|
7 |
+
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
with gr.Blocks() as main_app:
|
14 |
+
with gr.Tab('Chatbot'):
|
15 |
+
user_id = gr.State('') # id used to find the chat into the database
|
16 |
+
|
17 |
+
chat = gr.Chatbot(label="Chatbot Crunchyroll")
|
18 |
+
|
19 |
+
messages = gr.State([])
|
20 |
+
|
21 |
+
with gr.Row():
|
22 |
+
text = gr.Textbox(label='Write your question')
|
23 |
+
|
24 |
+
with gr.Row():
|
25 |
+
with gr.Column():
|
26 |
+
button_text = gr.Button(value='Submit text')
|
27 |
+
with gr.Column():
|
28 |
+
clear_button = gr.ClearButton([chat, messages])
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
# with gr.Tab('Ventana Pruebas'):
|
33 |
+
# with gr.Row():
|
34 |
+
# with gr.Column():
|
35 |
+
# button_show = gr.Button(value="Mostrar texto")
|
36 |
+
# with gr.Column():
|
37 |
+
# button_hidden = gr.Button(value="Ocultar texto")
|
38 |
+
|
39 |
+
# with gr.Row(visible=False) as first_row:
|
40 |
+
# text = gr.Textbox(value="Hola mundo")
|
41 |
+
|
42 |
+
|
43 |
+
# Actions
|
44 |
+
|
45 |
+
# button_show.click(
|
46 |
+
# fn=make_visible,
|
47 |
+
# inputs=None,
|
48 |
+
# outputs=first_row
|
49 |
+
# )
|
50 |
+
|
51 |
+
# button_hidden.click(
|
52 |
+
# fn=make_invisible,
|
53 |
+
# inputs=None,
|
54 |
+
# outputs=first_row
|
55 |
+
# )
|
56 |
+
|
57 |
+
text.submit(
|
58 |
+
fn=get_answer_text,
|
59 |
+
inputs=[text, chat, messages],
|
60 |
+
outputs=[chat]
|
61 |
+
).then(
|
62 |
+
lambda: None, None, [text]
|
63 |
+
)
|
64 |
+
|
65 |
+
button_text.click(
|
66 |
+
fn=get_answer_text,
|
67 |
+
inputs=[text, chat, messages],
|
68 |
+
outputs=[chat]
|
69 |
+
).then(
|
70 |
+
lambda: None, None, [text]
|
71 |
+
)
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
main_app.launch(debug=True, auth=(os.environ.get('SPACE_USERNAME'), os.environ.get('SPACE_PASSWORD')))
|
backend_functions.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import random
|
3 |
+
import os
|
4 |
+
from openai import OpenAI
|
5 |
+
from pinecone import Pinecone
|
6 |
+
import uuid
|
7 |
+
from pymongo.mongo_client import MongoClient
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
+
MODEL_OPENAI = os.getenv("MODEL_OPENAI")
|
14 |
+
|
15 |
+
PINECONE_API_TOKEN = os.getenv("PINECONE_API_TOKEN")
|
16 |
+
PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENV")
|
17 |
+
PINECONE_HOST = os.getenv("PINECONE_HOST")
|
18 |
+
|
19 |
+
DB_USER_NAME = os.getenv("DB_USER_NAME")
|
20 |
+
DB_PASSWORD = os.getenv("DB_PASSWORD")
|
21 |
+
|
22 |
+
|
23 |
+
# Chat
|
24 |
+
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
25 |
+
|
26 |
+
# Vector store
|
27 |
+
pc = Pinecone(api_key=PINECONE_API_TOKEN)
|
28 |
+
index = pc.Index(host=PINECONE_HOST)
|
29 |
+
|
30 |
+
# Database
|
31 |
+
uri = f"mongodb+srv://{DB_USER_NAME}:{DB_PASSWORD}@cluster-rob01.3fpztfw.mongodb.net/?retryWrites=true&w=majority&appName=cluster-rob01"
|
32 |
+
client = MongoClient(uri)
|
33 |
+
db = client["ChatCrunchyroll"]
|
34 |
+
collection = db["history_msg"]
|
35 |
+
|
36 |
+
|
37 |
+
def _save_history_msg():
|
38 |
+
|
39 |
+
return None
|
40 |
+
|
41 |
+
|
42 |
+
def _add_question_vectorstore(question: str, response: str):
|
43 |
+
vector_id = str(uuid.uuid4())
|
44 |
+
vector_embedding = _call_embedding(question)
|
45 |
+
vector_metadata = {
|
46 |
+
'question': question,
|
47 |
+
'text': response
|
48 |
+
}
|
49 |
+
index.upsert([(vector_id, vector_embedding, vector_metadata)])
|
50 |
+
|
51 |
+
|
52 |
+
def _update_elements(question, chatbot, output, history_messages):
|
53 |
+
|
54 |
+
chatbot.append([question, output])
|
55 |
+
|
56 |
+
history_messages.append({'role': 'user', 'content': question})
|
57 |
+
history_messages.append({'role': 'assistant', 'content': output})
|
58 |
+
|
59 |
+
return chatbot
|
60 |
+
|
61 |
+
|
62 |
+
def _query_pinecone(embedding):
|
63 |
+
results = index.query(
|
64 |
+
vector=embedding,
|
65 |
+
top_k=10,
|
66 |
+
include_metadata=True,
|
67 |
+
)
|
68 |
+
|
69 |
+
final_results = """"""
|
70 |
+
for result in results['matches']:
|
71 |
+
final_results += f"{result['metadata']['text']}\n"
|
72 |
+
|
73 |
+
return final_results
|
74 |
+
|
75 |
+
|
76 |
+
def _general_prompt(context):
|
77 |
+
with open("prompt_general.txt", "r") as file:
|
78 |
+
file_prompt = file.read().replace("\n", "")
|
79 |
+
|
80 |
+
context_prompt = file_prompt.replace('CONTEXT', context)
|
81 |
+
print(context_prompt)
|
82 |
+
print("--------------------")
|
83 |
+
|
84 |
+
return context_prompt
|
85 |
+
|
86 |
+
|
87 |
+
def _call_embedding(text: str):
|
88 |
+
response = openai_client.embeddings.create(
|
89 |
+
input=text,
|
90 |
+
model='text-embedding-ada-002'
|
91 |
+
)
|
92 |
+
return response.data[0].embedding
|
93 |
+
|
94 |
+
|
95 |
+
def _call_gpt(prompt: str, message: str):
|
96 |
+
response = openai_client.chat.completions.create(
|
97 |
+
model=MODEL_OPENAI,
|
98 |
+
temperature=0.2,
|
99 |
+
messages=[
|
100 |
+
{'role': 'system', 'content': prompt},
|
101 |
+
{'role': 'user', 'content': message}
|
102 |
+
]
|
103 |
+
)
|
104 |
+
return response.choices[0].message.content
|
105 |
+
|
106 |
+
|
107 |
+
def _call_gpt_standalone(prompt: str):
|
108 |
+
response = openai_client.chat.completions.create(
|
109 |
+
model=MODEL_OPENAI,
|
110 |
+
temperature=0.2,
|
111 |
+
messages=[
|
112 |
+
{'role': 'system', 'content': prompt},
|
113 |
+
]
|
114 |
+
)
|
115 |
+
return response.choices[0].message.content
|
116 |
+
|
117 |
+
|
118 |
+
def _get_standalone_question(question, history_messages):
|
119 |
+
with open("prompt_standalone_message.txt", "r") as file:
|
120 |
+
file_prompt_standalone = file.read().replace("\n", "")
|
121 |
+
|
122 |
+
history = ''
|
123 |
+
for i, msg in enumerate(history_messages):
|
124 |
+
try:
|
125 |
+
if i == 0:
|
126 |
+
continue # Omit the prompt
|
127 |
+
if i % 2 == 0:
|
128 |
+
history += f'user: {msg["content"]}\n'
|
129 |
+
else:
|
130 |
+
history += f'assistant: {msg["content"]}\n'
|
131 |
+
except Exception as e:
|
132 |
+
print(e)
|
133 |
+
|
134 |
+
prompt_standalone = file_prompt_standalone.replace('HISTORY', history).replace('QUESTION', question)
|
135 |
+
standalone_msg_q = _call_gpt_standalone(prompt_standalone)
|
136 |
+
print(standalone_msg_q)
|
137 |
+
print("------------------")
|
138 |
+
|
139 |
+
return standalone_msg_q
|
140 |
+
|
141 |
+
|
142 |
+
def get_answer_text(question: str, chatbot: list[tuple[str, str]], history_messages):
|
143 |
+
"""
|
144 |
+
Gets the answer of the chatbot
|
145 |
+
"""
|
146 |
+
if len(chatbot) == 8:
|
147 |
+
message_output = 'Un placer haberte ayudado, hasta luego!'
|
148 |
+
else:
|
149 |
+
standalone_msg_q = _get_standalone_question(question, history_messages) # create standalone question or message
|
150 |
+
output_embedding = _call_embedding(standalone_msg_q) # create embedding of standalone question or message
|
151 |
+
best_results = _query_pinecone(output_embedding) # get nearest embeddings
|
152 |
+
final_context_prompt = _general_prompt(best_results) # create context/general prompt
|
153 |
+
message_output = _call_gpt(final_context_prompt, question)
|
154 |
+
|
155 |
+
if "Respuesta:" in message_output:
|
156 |
+
message_output.replace("Respuesta:", "")
|
157 |
+
|
158 |
+
print(history_messages)
|
159 |
+
|
160 |
+
return _update_elements(question, chatbot, message_output, history_messages)
|
prompt_general.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Tu objetivo es responder las preguntas que tengan los usuarios, en máximo 1 párrafo entre 100 y 200 carácteres, acerca de peliculas o series animadas de Crunchyroll.
|
2 |
+
Tu nombre es Roll, no tienes edad, eres amigable y cordial.
|
3 |
+
Tono de Comunicación: Tu tono debe ser amigable, cordial y casual. Debes comunicarte de manera natural y fácil de entender. Usa palabras simples y un lenguaje que se sienta cercano. Además, no repites información que ya diste anteriormente.
|
4 |
+
Objetivo de Roll: Tu objetivo es brindar información y recomendaciones sobre peliculas o series animadas de Crunchyroll.
|
5 |
+
Si te preguntan sobre situaciones donde se requiera ver los animes o peliculas debes redirigirlos a https://www.crunchyroll.com/es/ y especificar que no puedes ver videos ya que solamente eres un asistente.
|
6 |
+
Instrucciones para tener cualquier conversación. Debes hacer siempre lo siguiente:
|
7 |
+
Describe las series/peliculas según su información, calificaciones, puntuaciones o categorias, además de donde puedes verlos (enlaces)
|
8 |
+
Nunca hagas una pregunta que la respuesta sea si, sí o no, haz preguntas de multiples opciones.
|
9 |
+
Para información sobre donde encontrar una lista completa de las series o peliculas redirige al usuario a la pagina: https://www.crunchyroll.com/es/videos/alphabetical
|
10 |
+
Si te preguntan sobre mangas (palabra japonesa para designar las historietas en general) responde que actualmente Crunchyroll solo cuenta con noticias sobre mangas y redirige al usuario a la pagina: https://www.crunchyroll.com/es/news/manga
|
11 |
+
Para responder las preguntas, utiliza la siguiente información. Si la información dada no es suficiente, no inventes información y dile al usuario que visite esta página: https://www.crunchyroll.com/es/
|
12 |
+
=========
|
13 |
+
Contexto:
|
14 |
+
CONTEXT
|
15 |
+
=========
|
prompt_standalone_message.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a standalone question-maker. Given the following chat history and follow-up message, rephrase the follow-up phrase to be a standalone question (sometimes the follow-up is not a question, so create a standalone phrase), in spanish. In the standalone message you must include all the information at the moment that is known about the customer, all the important nouns and what they are looking for. In cases where you think is usefully, include what is the best recommendation for the customer/user. To give you context, the conversation is about the streaming service for anime series and movies Crunchyroll.
|
2 |
+
For example if the user says "Quiero ver una anime de ficción, cuál me recomiendas?" then the standalone phrase should me something like "animes en la categoría ficción o con descripción sobre ficción", or if the user says "He escuchado que One Piece es un excelente anime pero muy largo y no vale la pena ¿Me lo recomiendas?", then the standalone question should be something like "¿cuál es la descripción de One Piece, sus categorias y cuál es su puntuación?".
|
3 |
+
There might be moments when there isn't a question in those cases return a standalone phrase: for example if the user says "hola" (or something similar) then the output would be "el usuario está saludando", or if the user says "gracias" or "es muy util" (or something similar) then the output would be a phrase showing that the user is grateful and what they are grateful for, or if the user say "yes" then it would be a phrase encapsulating the relationship to its previous question or phrase.
|
4 |
+
Your response cannot be more than 100 words.
|
5 |
+
|
6 |
+
Chat History:
|
7 |
+
|
8 |
+
HISTORY
|
9 |
+
Follow-up message: QUESTION
|
10 |
+
Standalone message:
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.19.2
|
2 |
+
python-dotenv==0.20.0
|
3 |
+
pinecone-client==3.1.0
|
4 |
+
openai==1.13.3
|
5 |
+
pymongo==4.6.2
|
utils.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
def make_invisible():
|
4 |
+
"""
|
5 |
+
Makes visible a row
|
6 |
+
"""
|
7 |
+
return gr.Row.update(visible=False)
|
8 |
+
|
9 |
+
|
10 |
+
def make_visible():
|
11 |
+
"""
|
12 |
+
Makes visibles a rows
|
13 |
+
"""
|
14 |
+
return gr.Row.update(visible=True)
|