import time
import psutil
import glob
import PyPDF2
#import chromadb
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoTokenizer, AutoModelForCausalLM,
    pipeline
)
from transformers import LlamaTokenizer, LlamaForCausalLM,BitsAndBytesConfig
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_cpp import Llama


def RAG_Chain(pdf_file,question,llama_model):
    model_path = "/home/mona/Downloads/Pubmed_model_GGUF"
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    doc = ""
    for page_num in range(len(pdf_reader.pages) ):
        page = pdf_reader.pages[page_num] 
        doc += page.extract_text()

    # Check if any documents were loaded
    if not doc:
        raise ValueError("No documents found. Please check the PDF directory path.")
    
    # Split the loaded documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_text(doc)

    # Create HuggingFace embeddings and vector store
    embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'  # Efficient model suitable for most tasks
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
    __import__('pysqlite3')
    import sys
    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
    import chromadb

    chromadb.api.client.SharedSystemClient.clear_system_cache()
   
    vectorstore = Chroma.from_texts(texts=splits, embedding=embeddings)

        # Define the retriever using Chroma
    retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

    # Retrieve relevant documents
    retrieved_docs = retriever.get_relevant_documents(question)
    if not retrieved_docs:
        return "No relevant information found in the documents."
    
    # Format the context
    formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
    
    # Prepare the prompt for the LLM
    formatted_prompt = (
        f"Answer the question based on the context below.\n\n"
        f"Context:\n{formatted_context}\n\nQuestion: {question}\n\nAnswer:"
    )
    answer = llama_model(formatted_prompt)
    return answer["choices"][0]["text"]

     # Instantiate the Llama model using the gguf file
'''
    llama_model = Llama(
        model_path,
        n_ctx=2048,          # Context length
        #n_threads=8,         # Number of CPU threads to use
        temperature=0.7,      # Sampling temperature
        n_gpu_layers=2
    )
'''
    # Generate the answer