import PyPDF2
import gradio as gr
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import LlamaCpp

from langchain.embeddings import HuggingFaceEmbeddings 
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer, util
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from ctransformers import AutoModelForCausalLM

# Customized file paths
pdf_files = ["CV_Vidhi_Parikh.pdf"]

# Function to extract documents from PDF files
def extract_documents_from_pdf(pdf_files):
    documents = []
    metadata = []
    content = []
    for pdf in pdf_files:
        pdf_reader = PyPDF2.PdfReader(pdf)
        for index, page in enumerate(pdf_reader.pages):
            document_page = {'title': pdf + " page " + str(index + 1),'content': page.extract_text()}
            documents.append(document_page)
    for doc in documents:
        content.append(doc["content"])
        metadata.append({
            "title": doc["title"]
        })
    print("Documents extracted from PDF files.")
    return content, metadata

# Function to split documents into text chunks
def split_documents_into_chunks(content, metadata):
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=512,
        chunk_overlap=256,
    )
    split_documents = text_splitter.create_documents(content, metadatas=metadata)
    print(f"Documents split into {len(split_documents)} passages.")
    return split_documents

# Function to ingest split documents into the vector database
def ingest_into_vector_database(split_documents):
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    database = FAISS.from_documents(split_documents, embeddings)
    DB_PATH = 'vectorstore/vector_database'
    database.save_local(DB_PATH)
    return database

# Customized conversation template
template = """[INST]
As an AI, provide accurate and relevant information based on the provided document. Your responses should adhere to the following guidelines:
- Answer the question based on the provided documents.
- Be concise and factual, limited to 50 words and 2-3 sentences. Begin your response without using introductory phrases like yes, no, etc.
- Maintain an ethical and unbiased tone, avoiding harmful or offensive content.
- If the document does not contain relevant information, state "I cannot provide an answer based on the provided document."
- Avoid using confirmatory phrases like "Yes, you are correct" or any similar validation in your responses.
- Do not fabricate information or include questions in your responses.
- Do not prompt to select answers. Do not ask additional questions.
- Cite the source of where exactly the information in the document is found and mention it in your responses.
{question}
[/INST]
"""

# Callback manager for handling callbacks
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Function to create a conversational chain
def create_conversational_chain(database):
    model_name = 'TheBloke/Llama-2-7b-chat-hf'
    model_directory = "files"
    #Check if the model file exists in the specified directory
    model_file = os.path.join(model_directory, model_name)
    if os.path.exists(model_file):
        model_path = model_file
        print("Model file found in the directory. Using the local model file.")
    else:
        model_path = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q8_0.gguf"
        print("Model file not found in the directory. Downloading the model from the repository.")
    #Load the model
    model = AutoModelForCausalLM.from_pretrained(model_path)
    print(model_path)
    llama_llm = LlamaCpp(
        # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system. llama-2-7b-chat.Q8_0.gguf
        model_path = model_path,
        temperature=0.75,
        max_tokens=200,
        top_p=1,
        callback_manager=callback_manager,
        n_ctx=3000)

    retriever = database.as_retriever()
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(template)

    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True, output_key='answer')

    conversation_chain = (ConversationalRetrievalChain.from_llm
                          (llm=llama_llm,
                           retriever=retriever,
                           #condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                           memory=memory,
                           return_source_documents=True))
    print("Conversational Chain created.")
    return conversation_chain

# Function to validate the answer against source documents
def validate_answer(response_answer, source_documents):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    similarity_threshold = 0.5  
    source_texts = [doc.page_content for doc in source_documents]

    answer_embedding = model.encode(response_answer, convert_to_tensor=True)
    source_embeddings = model.encode(source_texts, convert_to_tensor=True)

    cosine_scores = util.pytorch_cos_sim(answer_embedding, source_embeddings)

    if any(score.item() > similarity_threshold for score in cosine_scores[0]):
        return True  

    return False

# Extract documents from PDF files
content, metadata = extract_documents_from_pdf(pdf_files)

# Split documents into text chunks
split_documents = split_documents_into_chunks(content, metadata)

# Ingest split documents into the vector database
vector_database = ingest_into_vector_database(split_documents)
print("Vector database created.")

# Create the conversation chain
conversation_chain = create_conversational_chain(vector_database)

# Function for the chatbot
def chat_with_bot(input_text):
    user_query = input_text
    response = conversation_chain({"question": user_query})
    print("Response:", response)
    print("Answer:", response['answer'])
    return response['answer']

# Create Gradio interface
iface = gr.Interface(
    fn=chat_with_bot,
    inputs=gr.inputs.Textbox(lines=2, label="User Input"),
    outputs="text",
    layout="vertical",
    title="Simple Chatbot",
    description="Enter your message and the chatbot will respond."
)

# Launch the interface
iface.launch()