Spaces:
Runtime error
Runtime error
param4374416
commited on
Commit
•
c97b223
1
Parent(s):
14c5b18
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import InferenceClient
|
3 |
+
from typing import List, Tuple
|
4 |
+
import fitz # PyMuPDF
|
5 |
+
from sentence_transformers import SentenceTransformer, util
|
6 |
+
import numpy as np
|
7 |
+
import faiss
|
8 |
+
|
9 |
+
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
10 |
+
|
11 |
+
class MyApp:
|
12 |
+
def __init__(self) -> None:
|
13 |
+
self.documents = []
|
14 |
+
self.embeddings = None
|
15 |
+
self.index = None
|
16 |
+
self.load_pdf("YOURPDFFILE")
|
17 |
+
self.build_vector_db()
|
18 |
+
|
19 |
+
def load_pdf(self, file_path: str) -> None:
|
20 |
+
"""Extracts text from a PDF file and stores it in the app's documents."""
|
21 |
+
doc = fitz.open(file_path)
|
22 |
+
self.documents = []
|
23 |
+
for page_num in range(len(doc)):
|
24 |
+
page = doc[page_num]
|
25 |
+
text = page.get_text()
|
26 |
+
self.documents.append({"page": page_num + 1, "content": text})
|
27 |
+
print("PDF processed successfully!")
|
28 |
+
|
29 |
+
def build_vector_db(self) -> None:
|
30 |
+
"""Builds a vector database using the content of the PDF."""
|
31 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
32 |
+
# Generate embeddings for all document contents
|
33 |
+
self.embeddings = model.encode([doc["content"] for doc in self.documents])
|
34 |
+
# Create a FAISS index
|
35 |
+
self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
|
36 |
+
# Add the embeddings to the index
|
37 |
+
self.index.add(np.array(self.embeddings))
|
38 |
+
print("Vector database built successfully!")
|
39 |
+
|
40 |
+
def search_documents(self, query: str, k: int = 3) -> List[str]:
|
41 |
+
"""Searches for relevant documents using vector similarity."""
|
42 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
43 |
+
# Generate an embedding for the query
|
44 |
+
query_embedding = model.encode([query])
|
45 |
+
# Perform a search in the FAISS index
|
46 |
+
D, I = self.index.search(np.array(query_embedding), k)
|
47 |
+
# Retrieve the top-k documents
|
48 |
+
results = [self.documents[i]["content"] for i in I[0]]
|
49 |
+
return results if results else ["No relevant documents found."]
|
50 |
+
|
51 |
+
app = MyApp()
|
52 |
+
|
53 |
+
def respond(
|
54 |
+
message: str,
|
55 |
+
history: List[Tuple[str, str]],
|
56 |
+
system_message: str,
|
57 |
+
max_tokens: int,
|
58 |
+
temperature: float,
|
59 |
+
top_p: float,
|
60 |
+
):
|
61 |
+
system_message = "You are a Music Guru specializing in English pop music. You recommend upbeat tunes for energy boosts, soothing melodies for relaxation, and nostalgic songs for reflective moments. Share your mood or preferences, and let me suggest the perfect English pop track for you!"
|
62 |
+
messages = [{"role": "system", "content": system_message}]
|
63 |
+
|
64 |
+
for val in history:
|
65 |
+
if val[0]:
|
66 |
+
messages.append({"role": "user", "content": val[0]})
|
67 |
+
if val[1]:
|
68 |
+
messages.append({"role": "assistant", "content": val[1]})
|
69 |
+
|
70 |
+
messages.append({"role": "user", "content": message})
|
71 |
+
|
72 |
+
# RAG - Retrieve relevant documents
|
73 |
+
retrieved_docs = app.search_documents(message)
|
74 |
+
context = "\n".join(retrieved_docs)
|
75 |
+
messages.append({"role": "system", "content": "Relevant documents: " + context})
|
76 |
+
|
77 |
+
response = ""
|
78 |
+
for message in client.chat_completion(
|
79 |
+
messages,
|
80 |
+
max_tokens=max_tokens,
|
81 |
+
stream=True,
|
82 |
+
temperature=temperature,
|
83 |
+
top_p=top_p,
|
84 |
+
):
|
85 |
+
token = message.choices[0].delta.content
|
86 |
+
response += token
|
87 |
+
yield response
|
88 |
+
|
89 |
+
demo = gr.Blocks()
|
90 |
+
|
91 |
+
with demo:
|
92 |
+
gr.Markdown("🧘♀️ **Dialectical Behaviour Therapy**")
|
93 |
+
gr.Markdown(
|
94 |
+
"‼️Disclaimer: This chatbot is based on a DBT exercise book that is publicly available. "
|
95 |
+
"We are not medical practitioners, and the use of this chatbot is at your own responsibility.‼️"
|
96 |
+
)
|
97 |
+
|
98 |
+
chatbot = gr.ChatInterface(
|
99 |
+
respond,
|
100 |
+
examples=[
|
101 |
+
["I'm in the mood for some upbeat music to energize me."],
|
102 |
+
["Could you recommend a relaxing English pop song to unwind?"],
|
103 |
+
["HWhat's a good English pop song for when I'm feeling nostalgic?"],
|
104 |
+
["What are some DBT skills for managing anxiety?"],
|
105 |
+
["Can you explain mindfulness in DBT?"],
|
106 |
+
["I am interested in DBT excercises"],
|
107 |
+
["I feel restless. Please help me."],
|
108 |
+
["I have destructive thoughts coming to my mind repetatively."]
|
109 |
+
],
|
110 |
+
title='English Pop Music Advisor Chatbot 🎵'
|
111 |
+
)
|
112 |
+
|
113 |
+
if __name__ == "__main__":
|
114 |
+
demo.launch()
|