import os import gradio as gr import google.generativeai as genai from typing import List, Tuple import fitz # PyMuPDF from sentence_transformers import SentenceTransformer import numpy as np import faiss # Initialize Google API Key GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") genai.configure(api_key=GOOGLE_API_KEY) # Placeholder for the app's state class MyApp: def __init__(self) -> None: self.documents = [] self.embeddings = None self.index = None self.load_pdf("THEDIA1.pdf") self.build_vector_db() def load_pdf(self, file_path: str) -> None: """Extracts text from a PDF file and stores it in the app's documents.""" doc = fitz.open(file_path) self.documents = [] for page_num in range(len(doc)): page = doc[page_num] text = page.get_text() self.documents.append({"page": page_num + 1, "content": text}) print("PDF processed successfully!") def build_vector_db(self) -> None: """Builds a vector database using FAISS and SentenceTransformer embeddings.""" model = SentenceTransformer("all-MiniLM-L6-v2") embeddings = model.encode([doc["content"] for doc in self.documents]) self.embeddings = np.array(embeddings, dtype="float32") self.index = faiss.IndexFlatL2(self.embeddings.shape[1]) self.index.add(self.embeddings) print("Vector database built successfully!") def search(self, query: str, top_k: int = 5) -> List[Tuple[int, str]]: """Searches for the most similar documents based on the query.""" query_embedding = SentenceTransformer("all-MiniLM-L6-v2").encode([query]) distances, indices = self.index.search(np.array(query_embedding, dtype="float32"), top_k) return [(self.documents[idx]["page"], self.documents[idx]["content"]) for idx in indices[0]] def generate_response(self, query: str) -> str: """Generates a response using the Gemini model based on the query.""" if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY is not set. Please set it up.") generation_config = genai.types.GenerationConfig( temperature=0.7, max_output_tokens=512 ) model_name = "gemini-1.5-pro-latest" model = genai.GenerativeModel(model_name) response = model.generate_content([query], generation_config=generation_config) return response[0].text if response else "No response generated." # Gradio UI setup for interaction def main(): app = MyApp() def handle_query(query): search_results = app.search(query) response = app.generate_response(query) return {"Search Results": search_results, "Response": response} gr.Interface( fn=handle_query, inputs=gr.Textbox(placeholder="Enter your query here"), outputs=[ gr.JSON(label="Search Results"), gr.Textbox(label="Generated Response") ], title="Dialectical Behavioral Exercise with Gemini", description="This app uses Google Gemini to generate responses based on document content." ).launch() if __name__ == "__main__": main()