import streamlit as st import os from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.chains import ConversationalRetrievalChain from langchain.embeddings import HuggingFaceEmbeddings from langchain.llms import HuggingFacePipeline from langchain.memory import ConversationBufferMemory from langchain_community.llms import HuggingFaceEndpoint from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline import tempfile import chromadb chromadb.api.client.SharedSystemClient.clear_system_cache() st.title("💬 CV Q&A Chatbot") st.write("Ask any questions about your CV") """ This chatbot can take a CV you provide and answer to questions about the CV. Here are two ways you can use this app : 1. Use the bot on your CV to evaluate if your CV is easy to understand. 2. Implement your own bot (by taking the [code](https://huggingface.co/spaces/Lauredecaudin/resume_guide/blob/main/pages/4-💬%20Create%20your%20own%20bot%20(developers).py) accessible from this project), Then you can embedd the bot on your website (if you have one), or deploy the app on Streamlit or create an app in Spaces in HuggingFace like this one. *In this example chatbot, we're using mistralai/Mixtral-8x7B-Instruct-v0.1 with LangChain 🤝 to discuss with your CV.* """ # Function to load and split the PDF document def load_doc(list_file_path, chunk_size, chunk_overlap): loaders = [PyPDFLoader(x) for x in list_file_path] pages = [] for loader in loaders: pages.extend(loader.load()) text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap ) doc_splits = text_splitter.split_documents(pages) return doc_splits # Function to create the vector database def create_db(splits, collection_name): embedding = HuggingFaceEmbeddings() vectordb = Chroma.from_documents( documents=splits, embedding=embedding, collection_name=collection_name, persist_directory="./chroma_db", ) return vectordb # Function to initialize the LLM chain def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db): llm = HuggingFaceEndpoint( repo_id=llm_model, temperature = temperature, max_new_tokens = max_tokens, top_k = top_k, load_in_8bit = True, ) memory = ConversationBufferMemory( memory_key="chat_history", output_key='answer', return_messages=True ) retriever=vector_db.as_retriever() qa_chain = ConversationalRetrievalChain.from_llm( llm, retriever=retriever, chain_type="stuff", memory=memory, # combine_docs_chain_kwargs={"prompt": your_prompt}) return_source_documents=True, #return_generated_question=False, verbose=False, ) return qa_chain # Function to handle the conversation def conversation(qa_chain, message, history): # Generate response using QA chain response = qa_chain({"question": message, "chat_history": history}) response_answer = response["answer"] # Update chat history new_history = history + [(message, response_answer)] return new_history, response_answer # Initialize session state variables if 'llm_chain' not in st.session_state: st.session_state['llm_chain'] = None if 'vector_db' not in st.session_state: st.session_state['vector_db'] = None if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] # Function to reset the conversation def reset_conversation(): st.session_state['chat_history'] = [] st.session_state['llm_chain'] = None st.session_state['vector_db'] = None # File uploader for PDF document file = st.file_uploader("Upload your CV", type=["pdf"]) if file is not None and st.session_state['llm_chain'] is None: with st.spinner("Processing document..."): # Save the uploaded file to a temporary location with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(file.read()) tmp_file_path = tmp_file.name # Load document and create splits doc_splits = load_doc([tmp_file_path], chunk_size=600, chunk_overlap=50) # Create vector database vector_db = create_db(doc_splits, collection_name="my_collection") # Initialize LLM chain llm_chain = initialize_llmchain( llm_model="mistralai/Mixtral-8x7B-Instruct-v0.1", temperature=0.7, max_tokens=1024, top_k=3, vector_db=vector_db ) # Store in session state st.session_state['llm_chain'] = llm_chain st.session_state['vector_db'] = vector_db st.session_state['chat_history'] = [] st.success("Document processed successfully!") if "messages" not in st.session_state.keys(): # Initialize the chat message history st.session_state.messages = [ {"role": "assistant", "content": "Ask me a question about the Resume you uploaded !"} ] st.write("Please upload your CV to start the chatbot.") for message in st.session_state.messages: # Display the prior chat messages st.chat_message(message["role"]).write(message["content"]) if prompt := st.chat_input(placeholder="Your question"): # Prompt for user input and save to chat history if not st.session_state.get('llm_chain'): st.info("Please upload your CV to continue.") st.stop() st.session_state.messages.append({"role": "user", "content": prompt}) st.chat_message("user").write(prompt) with st.chat_message("assistant"): st.session_state['chat_history'], response_answer = conversation( st.session_state['llm_chain'], prompt, st.session_state['chat_history'] ) st.session_state.messages.append({"role": "assistant", "content": response_answer}) st.write(response_answer)