import os
import gradio as gr
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.llms.ollama import Ollama

# Set up Ollama
os.system('curl -fsSL https://ollama.com/install.sh | sh')
os.system('ollama serve &')
os.system('sleep 5')
os.system('ollama pull llama3.2')
os.system('ollama pull llama3.2')

# Initialize embeddings and LLM
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
llama = Ollama(
    model="llama3.2",
    request_timeout=1000,
)

def initialize_index():
    """Initialize the vector store index from PDF files in the data directory"""
    # Load documents from the data directory
    loader = SimpleDirectoryReader(
        input_dir="data",
        required_exts=[".pdf"]
    )
    documents = loader.load_data()
    
    # Create index
    index = VectorStoreIndex.from_documents(
        documents,
        embed_model=embeddings,
    )
    
    # Return query engine with Llama
    return index.as_query_engine(llm=llama)

# Initialize the query engine at startup
query_engine = initialize_index()

def process_query(
    message: str,
    history: list[tuple[str, str]],
) -> str:
    """Process a query using the RAG system"""
    try:
        # Get response from the query engine
        response = query_engine.query(
            message,
            streaming=True
        )
        return str(response)
    except Exception as e:
        return f"Error processing query: {str(e)}"

# Create the Gradio interface
demo = gr.ChatInterface(
    process_query,
    title="PDF Question Answering with RAG + Llama",
    description="Ask questions about the content of the loaded PDF documents using Llama model",
    examples=[
        ["What is Computer"],
    ],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
)

if __name__ == "__main__":
    demo.launch()