import streamlit as st
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceEndpoint


from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import tempfile
import chromadb

chromadb.api.client.SharedSystemClient.clear_system_cache()

st.title("💬 CV Q&A Chatbot")
st.write("Ask any questions about your CV")


"""
This chatbot can take a CV you provide and answer to questions about the CV.  
Here are two ways you can use this app : 
1. Use the bot on your CV to evaluate if your CV is easy to understand. 
2. Implement your own bot (by taking the [code](https://huggingface.co/spaces/Lauredecaudin/resume_guide/blob/main/pages/4-💬%20Create%20your%20own%20bot%20(developers).py) accessible from this project), Then you can embedd the bot on your website (if you have one), or deploy the app on Streamlit or create an app in Spaces in HuggingFace like this one. 


*In this example chatbot, we're using mistralai/Mixtral-8x7B-Instruct-v0.1 with LangChain 🤝 to discuss with your CV.*
"""


# Function to load and split the PDF document
def load_doc(list_file_path, chunk_size, chunk_overlap):
    loaders = [PyPDFLoader(x) for x in list_file_path]
    pages = []
    for loader in loaders:
        pages.extend(loader.load())
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, 
        chunk_overlap=chunk_overlap
    )
    doc_splits = text_splitter.split_documents(pages)
    return doc_splits

# Function to create the vector database
def create_db(splits, collection_name):
    embedding = HuggingFaceEmbeddings()
    vectordb = Chroma.from_documents(
        documents=splits,
        embedding=embedding,
        collection_name=collection_name,
        persist_directory="./chroma_db",
    )
    return vectordb

# Function to initialize the LLM chain
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):


    llm = HuggingFaceEndpoint(
        repo_id=llm_model,  
        temperature = temperature,
        max_new_tokens = max_tokens,
        top_k = top_k,
        load_in_8bit = True,
    )
    
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key='answer',
        return_messages=True
    )
    retriever=vector_db.as_retriever()
    
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm,
        retriever=retriever,
        chain_type="stuff", 
        memory=memory,
        # combine_docs_chain_kwargs={"prompt": your_prompt})
        return_source_documents=True,
        #return_generated_question=False,
        verbose=False,
    )
    
    return qa_chain

# Function to handle the conversation
def conversation(qa_chain, message, history):
    # Generate response using QA chain
    response = qa_chain({"question": message, "chat_history": history})
    response_answer = response["answer"]
    # Update chat history
    new_history = history + [(message, response_answer)]
    return new_history, response_answer

# Initialize session state variables
if 'llm_chain' not in st.session_state:
    st.session_state['llm_chain'] = None
if 'vector_db' not in st.session_state:
    st.session_state['vector_db'] = None
if 'chat_history' not in st.session_state:
    st.session_state['chat_history'] = []


# Function to reset the conversation
def reset_conversation():
    st.session_state['chat_history'] = []
    st.session_state['llm_chain'] = None
    st.session_state['vector_db'] = None


# File uploader for PDF document
file = st.file_uploader("Upload your CV", type=["pdf"])

if file is not None and st.session_state['llm_chain'] is None:
    with st.spinner("Processing document..."):
        # Save the uploaded file to a temporary location
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            tmp_file.write(file.read())
            tmp_file_path = tmp_file.name
        # Load document and create splits
        doc_splits = load_doc([tmp_file_path], chunk_size=600, chunk_overlap=50)
        # Create vector database
        vector_db = create_db(doc_splits, collection_name="my_collection")
        # Initialize LLM chain
        llm_chain = initialize_llmchain(
            llm_model="mistralai/Mixtral-8x7B-Instruct-v0.1",
            temperature=0.7,
            max_tokens=1024,
            top_k=3,
            vector_db=vector_db
        )
        # Store in session state
        st.session_state['llm_chain'] = llm_chain
        st.session_state['vector_db'] = vector_db
        st.session_state['chat_history'] = []
    st.success("Document processed successfully!")


if "messages" not in st.session_state.keys(): # Initialize the chat message history
    st.session_state.messages = [
        {"role": "assistant", "content": "Ask me a question about the Resume you uploaded !"}
    ]

st.write("Please upload your CV to start the chatbot.")

for message in st.session_state.messages: # Display the prior chat messages
    st.chat_message(message["role"]).write(message["content"])

if prompt := st.chat_input(placeholder="Your question"): # Prompt for user input and save to chat history
    if not st.session_state.get('llm_chain'):
        st.info("Please upload your CV to continue.")
        st.stop()
        
    st.session_state.messages.append({"role": "user", "content": prompt})
    st.chat_message("user").write(prompt)

    with st.chat_message("assistant"):
        st.session_state['chat_history'], response_answer = conversation(
            st.session_state['llm_chain'],
            prompt,
            st.session_state['chat_history']
        )
        st.session_state.messages.append({"role": "assistant", "content": response_answer})
        st.write(response_answer)