import os import gradio as gr from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from langchain_community.embeddings import HuggingFaceEmbeddings from llama_index.llms.ollama import Ollama # Set up Ollama os.system('curl -fsSL https://ollama.com/install.sh | sh') os.system('ollama serve &') os.system('sleep 5') os.system('ollama pull llama3.2') os.system('ollama pull llama3.2') # Initialize embeddings and LLM embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") llama = Ollama( model="llama3.2", request_timeout=1000, ) def initialize_index(): """Initialize the vector store index from PDF files in the data directory""" # Load documents from the data directory loader = SimpleDirectoryReader( input_dir="data", required_exts=[".pdf"] ) documents = loader.load_data() # Create index index = VectorStoreIndex.from_documents( documents, embed_model=embeddings, ) # Return query engine with Llama return index.as_query_engine(llm=llama) # Initialize the query engine at startup query_engine = initialize_index() def process_query( message: str, history: list[tuple[str, str]], ) -> str: """Process a query using the RAG system""" try: # Get response from the query engine response = query_engine.query( message, streaming=True ) return str(response) except Exception as e: return f"Error processing query: {str(e)}" # Create the Gradio interface demo = gr.ChatInterface( process_query, title="PDF Question Answering with RAG + Llama", description="Ask questions about the content of the loaded PDF documents using Llama model", examples=[ ["What is Computer"], ], cache_examples=False, retry_btn=None, undo_btn="Delete Previous", clear_btn="Clear", ) if __name__ == "__main__": demo.launch()