import PyPDF2 import gradio as gr import os from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.llms import LlamaCpp from langchain.embeddings import HuggingFaceEmbeddings from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain_community.vectorstores import FAISS from langchain.prompts import PromptTemplate from sentence_transformers import SentenceTransformer, util from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from ctransformers import AutoModelForCausalLM # Customized file paths pdf_files = ["CV_Vidhi_Parikh.pdf"] # Function to extract documents from PDF files def extract_documents_from_pdf(pdf_files): documents = [] metadata = [] content = [] for pdf in pdf_files: pdf_reader = PyPDF2.PdfReader(pdf) for index, page in enumerate(pdf_reader.pages): document_page = {'title': pdf + " page " + str(index + 1),'content': page.extract_text()} documents.append(document_page) for doc in documents: content.append(doc["content"]) metadata.append({ "title": doc["title"] }) print("Documents extracted from PDF files.") return content, metadata # Function to split documents into text chunks def split_documents_into_chunks(content, metadata): text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( chunk_size=512, chunk_overlap=256, ) split_documents = text_splitter.create_documents(content, metadatas=metadata) print(f"Documents split into {len(split_documents)} passages.") return split_documents # Function to ingest split documents into the vector database def ingest_into_vector_database(split_documents): embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') database = FAISS.from_documents(split_documents, embeddings) DB_PATH = 'vectorstore/vector_database' database.save_local(DB_PATH) return database # Customized conversation template template = """[INST] As an AI, provide accurate and relevant information based on the provided document. Your responses should adhere to the following guidelines: - Answer the question based on the provided documents. - Be concise and factual, limited to 50 words and 2-3 sentences. Begin your response without using introductory phrases like yes, no, etc. - Maintain an ethical and unbiased tone, avoiding harmful or offensive content. - If the document does not contain relevant information, state "I cannot provide an answer based on the provided document." - Avoid using confirmatory phrases like "Yes, you are correct" or any similar validation in your responses. - Do not fabricate information or include questions in your responses. - Do not prompt to select answers. Do not ask additional questions. - Cite the source of where exactly the information in the document is found and mention it in your responses. {question} [/INST] """ # Callback manager for handling callbacks callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # Function to create a conversational chain def create_conversational_chain(database): model_name = 'TheBloke/Llama-2-7b-chat-hf' model_directory = "files" #Check if the model file exists in the specified directory model_file = os.path.join(model_directory, model_name) if os.path.exists(model_file): model_path = model_file print("Model file found in the directory. Using the local model file.") else: model_path = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q8_0.gguf" print("Model file not found in the directory. Downloading the model from the repository.") #Load the model model = AutoModelForCausalLM.from_pretrained(model_path) print(model_path) llama_llm = LlamaCpp( # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system. llama-2-7b-chat.Q8_0.gguf model_path = model_path, temperature=0.75, max_tokens=200, top_p=1, callback_manager=callback_manager, n_ctx=3000) retriever = database.as_retriever() CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(template) memory = ConversationBufferMemory( memory_key='chat_history', return_messages=True, output_key='answer') conversation_chain = (ConversationalRetrievalChain.from_llm (llm=llama_llm, retriever=retriever, #condense_question_prompt=CONDENSE_QUESTION_PROMPT, memory=memory, return_source_documents=True)) print("Conversational Chain created.") return conversation_chain # Function to validate the answer against source documents def validate_answer(response_answer, source_documents): model = SentenceTransformer('all-MiniLM-L6-v2') similarity_threshold = 0.5 source_texts = [doc.page_content for doc in source_documents] answer_embedding = model.encode(response_answer, convert_to_tensor=True) source_embeddings = model.encode(source_texts, convert_to_tensor=True) cosine_scores = util.pytorch_cos_sim(answer_embedding, source_embeddings) if any(score.item() > similarity_threshold for score in cosine_scores[0]): return True return False # Extract documents from PDF files content, metadata = extract_documents_from_pdf(pdf_files) # Split documents into text chunks split_documents = split_documents_into_chunks(content, metadata) # Ingest split documents into the vector database vector_database = ingest_into_vector_database(split_documents) print("Vector database created.") # Create the conversation chain conversation_chain = create_conversational_chain(vector_database) # Function for the chatbot def chat_with_bot(input_text): user_query = input_text response = conversation_chain({"question": user_query}) print("Response:", response) print("Answer:", response['answer']) return response['answer'] # Create Gradio interface iface = gr.Interface( fn=chat_with_bot, inputs=gr.inputs.Textbox(lines=2, label="User Input"), outputs="text", layout="vertical", title="Simple Chatbot", description="Enter your message and the chatbot will respond." ) # Launch the interface iface.launch()