RAGBOT

Running

File size: 3,680 Bytes

7d38514

import os
import gradio as gr
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from typing import List
from google.generativeai import GenerativeModel, configure, types

# Set up the Google API for the Gemini model
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
configure(api_key=GOOGLE_API_KEY)

class MyApp:
    def __init__(self):
        self.documents = []
        self.embeddings = None
        self.index = None
        self.model = SentenceTransformer('all-MiniLM-L6-v2')

    def load_pdfs(self, files):
        """Load and extract text from the provided PDF files."""
        self.documents = []
        for file in files:
            file_path = file.name  # Get the file path
            doc = fitz.open(file_path)  # Open the PDF using the file path
            for page_num in range(len(doc)):
                page = doc[page_num]
                text = page.get_text()
                self.documents.append({"page": page_num + 1, "content": text})
        print("PDFs processed successfully.")

    def build_vector_db(self):
        """Build a vector database using the content of the PDFs."""
        if not self.documents:
            return "No documents to process."
        self.embeddings = self.model.encode(
            [doc["content"] for doc in self.documents], show_progress_bar=True
        )
        self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
        self.index.add(np.array(self.embeddings))
        return "Vector database built successfully!"

    def search_documents(self, query: str, k: int = 3) -> List[str]:
        """Search for relevant documents using vector similarity."""
        if not self.index:
            return ["Vector database is not ready."]
        query_embedding = self.model.encode([query], show_progress_bar=False)
        _, I = self.index.search(np.array(query_embedding), k)
        results = [self.documents[i]["content"] for i in I[0]]
        return results

app = MyApp()

def upload_files(files):
    app.load_pdfs(files)
    return "Files uploaded and processed. Ready to build vector database."

def build_vector_db():
    return app.build_vector_db()

def answer_query(query):
    results = app.search_documents(query)
    if not results:
        return "No results found."
    
    # Generate a response using the generative model
    model = GenerativeModel("gemini-1.5-pro-latest")
    generation_config = types.GenerationConfig(
        temperature=0.7,
        max_output_tokens=150
    )
    try:
        response = model.generate_content(results, generation_config=generation_config)
        response_text = response.text if hasattr(response, "text") else "No response generated."
    except Exception as e:
        response_text = f"An error occurred while generating the response: {str(e)}"
    
    return response_text

with gr.Blocks() as demo:
    gr.Markdown("# 🧘‍♀️ **Dialectical Behaviour Therapy Chatbot**")
    gr.Markdown("Upload your PDFs and interact with the content using AI.")

    with gr.Row():
        upload_btn = gr.Files(label="Upload PDFs", file_types=["pdf"])
        upload_status = gr.Textbox()

    with gr.Row():
        db_btn = gr.Button("Build Vector Database")
        db_status = gr.Textbox()

    with gr.Row():
        query_input = gr.Textbox(label="Enter your query")
        submit_btn = gr.Button("Submit")
        response_display = gr.Chatbot()

    upload_btn.change(upload_files, inputs=[upload_btn], outputs=[upload_status])
    db_btn.click(build_vector_db, outputs=[db_status])
    submit_btn.click(answer_query, inputs=[query_input], outputs=[response_display])

demo.launch()