|
import os |
|
import gradio as gr |
|
import google.generativeai as genai |
|
from typing import List, Tuple |
|
import fitz |
|
from sentence_transformers import SentenceTransformer |
|
import numpy as np |
|
import faiss |
|
|
|
|
|
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") |
|
genai.configure(api_key=GOOGLE_API_KEY) |
|
|
|
|
|
class MyApp: |
|
def __init__(self) -> None: |
|
self.documents = [] |
|
self.embeddings = None |
|
self.index = None |
|
self.load_pdf("THEDIA1.pdf") |
|
self.build_vector_db() |
|
|
|
def load_pdf(self, file_path: str) -> None: |
|
"""Extracts text from a PDF file and stores it in the app's documents.""" |
|
doc = fitz.open(file_path) |
|
self.documents = [] |
|
for page_num in range(len(doc)): |
|
page = doc[page_num] |
|
text = page.get_text() |
|
self.documents.append({"page": page_num + 1, "content": text}) |
|
print("PDF processed successfully!") |
|
|
|
def build_vector_db(self) -> None: |
|
"""Builds a vector database using FAISS and SentenceTransformer embeddings.""" |
|
model = SentenceTransformer("all-MiniLM-L6-v2") |
|
embeddings = model.encode([doc["content"] for doc in self.documents]) |
|
self.embeddings = np.array(embeddings, dtype="float32") |
|
self.index = faiss.IndexFlatL2(self.embeddings.shape[1]) |
|
self.index.add(self.embeddings) |
|
print("Vector database built successfully!") |
|
|
|
def search(self, query: str, top_k: int = 5) -> List[Tuple[int, str]]: |
|
"""Searches for the most similar documents based on the query.""" |
|
query_embedding = SentenceTransformer("all-MiniLM-L6-v2").encode([query]) |
|
distances, indices = self.index.search(np.array(query_embedding, dtype="float32"), top_k) |
|
return [(self.documents[idx]["page"], self.documents[idx]["content"]) for idx in indices[0]] |
|
|
|
def generate_response(self, query: str) -> str: |
|
"""Generates a response using the Gemini model based on the query.""" |
|
if not GOOGLE_API_KEY: |
|
raise ValueError("GOOGLE_API_KEY is not set. Please set it up.") |
|
|
|
generation_config = genai.types.GenerationConfig( |
|
temperature=0.7, |
|
max_output_tokens=512 |
|
) |
|
|
|
model_name = "gemini-1.5-pro-latest" |
|
model = genai.GenerativeModel(model_name) |
|
response = model.generate_content([query], generation_config=generation_config) |
|
|
|
return response[0].text if response else "No response generated." |
|
|
|
|
|
def main(): |
|
app = MyApp() |
|
|
|
def handle_query(query): |
|
search_results = app.search(query) |
|
response = app.generate_response(query) |
|
return {"Search Results": search_results, "Response": response} |
|
|
|
gr.Interface( |
|
fn=handle_query, |
|
inputs=gr.Textbox(placeholder="Enter your query here"), |
|
outputs=[ |
|
gr.JSON(label="Search Results"), |
|
gr.Textbox(label="Generated Response") |
|
], |
|
title="Dialectical Behavioral Exercise with Gemini", |
|
description="This app uses Google Gemini to generate responses based on document content." |
|
).launch() |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|