import time import psutil import glob import PyPDF2 #import chromadb from transformers import ( AutoTokenizer, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, pipeline ) from transformers import LlamaTokenizer, LlamaForCausalLM,BitsAndBytesConfig from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings from llama_cpp import Llama def RAG_Chain(pdf_file,question,llama_model): model_path = "/home/mona/Downloads/Pubmed_model_GGUF" pdf_reader = PyPDF2.PdfReader(pdf_file) doc = "" for page_num in range(len(pdf_reader.pages) ): page = pdf_reader.pages[page_num] doc += page.extract_text() # Check if any documents were loaded if not doc: raise ValueError("No documents found. Please check the PDF directory path.") # Split the loaded documents into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200) splits = text_splitter.split_text(doc) # Create HuggingFace embeddings and vector store embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2' # Efficient model suitable for most tasks embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) __import__('pysqlite3') import sys sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') import chromadb chromadb.api.client.SharedSystemClient.clear_system_cache() vectorstore = Chroma.from_texts(texts=splits, embedding=embeddings) # Define the retriever using Chroma retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) # Retrieve relevant documents retrieved_docs = retriever.get_relevant_documents(question) if not retrieved_docs: return "No relevant information found in the documents." # Format the context formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs) # Prepare the prompt for the LLM formatted_prompt = ( f"Answer the question based on the context below.\n\n" f"Context:\n{formatted_context}\n\nQuestion: {question}\n\nAnswer:" ) answer = llama_model(formatted_prompt) return answer["choices"][0]["text"] # Instantiate the Llama model using the gguf file ''' llama_model = Llama( model_path, n_ctx=2048, # Context length #n_threads=8, # Number of CPU threads to use temperature=0.7, # Sampling temperature n_gpu_layers=2 ) ''' # Generate the answer