File size: 3,680 Bytes
7d38514 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import gradio as gr
import fitz # PyMuPDF
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from typing import List
from google.generativeai import GenerativeModel, configure, types
# Set up the Google API for the Gemini model
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
configure(api_key=GOOGLE_API_KEY)
class MyApp:
def __init__(self):
self.documents = []
self.embeddings = None
self.index = None
self.model = SentenceTransformer('all-MiniLM-L6-v2')
def load_pdfs(self, files):
"""Load and extract text from the provided PDF files."""
self.documents = []
for file in files:
file_path = file.name # Get the file path
doc = fitz.open(file_path) # Open the PDF using the file path
for page_num in range(len(doc)):
page = doc[page_num]
text = page.get_text()
self.documents.append({"page": page_num + 1, "content": text})
print("PDFs processed successfully.")
def build_vector_db(self):
"""Build a vector database using the content of the PDFs."""
if not self.documents:
return "No documents to process."
self.embeddings = self.model.encode(
[doc["content"] for doc in self.documents], show_progress_bar=True
)
self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
self.index.add(np.array(self.embeddings))
return "Vector database built successfully!"
def search_documents(self, query: str, k: int = 3) -> List[str]:
"""Search for relevant documents using vector similarity."""
if not self.index:
return ["Vector database is not ready."]
query_embedding = self.model.encode([query], show_progress_bar=False)
_, I = self.index.search(np.array(query_embedding), k)
results = [self.documents[i]["content"] for i in I[0]]
return results
app = MyApp()
def upload_files(files):
app.load_pdfs(files)
return "Files uploaded and processed. Ready to build vector database."
def build_vector_db():
return app.build_vector_db()
def answer_query(query):
results = app.search_documents(query)
if not results:
return "No results found."
# Generate a response using the generative model
model = GenerativeModel("gemini-1.5-pro-latest")
generation_config = types.GenerationConfig(
temperature=0.7,
max_output_tokens=150
)
try:
response = model.generate_content(results, generation_config=generation_config)
response_text = response.text if hasattr(response, "text") else "No response generated."
except Exception as e:
response_text = f"An error occurred while generating the response: {str(e)}"
return response_text
with gr.Blocks() as demo:
gr.Markdown("# 🧘♀️ **Dialectical Behaviour Therapy Chatbot**")
gr.Markdown("Upload your PDFs and interact with the content using AI.")
with gr.Row():
upload_btn = gr.Files(label="Upload PDFs", file_types=["pdf"])
upload_status = gr.Textbox()
with gr.Row():
db_btn = gr.Button("Build Vector Database")
db_status = gr.Textbox()
with gr.Row():
query_input = gr.Textbox(label="Enter your query")
submit_btn = gr.Button("Submit")
response_display = gr.Chatbot()
upload_btn.change(upload_files, inputs=[upload_btn], outputs=[upload_status])
db_btn.click(build_vector_db, outputs=[db_status])
submit_btn.click(answer_query, inputs=[query_input], outputs=[response_display])
demo.launch() |