# import dependencies import torch from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline import os import gradio as gr #from google.colab import drive import chromadb from langchain.llms import HuggingFacePipeline from langchain.document_loaders import TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain import HuggingFacePipeline from langchain.document_loaders import PyPDFDirectoryLoader from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory # specify model huggingface mode name model_name = "anakin87/zephyr-7b-alpha-sharded" #https://huggingface.co/anakin87/zephyr-7b-alpha-sharded #HuggingFaceH4/zephyr-7b-alpha #https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha # function for loading 4-bit quantized model def load_quantized_model(model_name: str): """ :param model_name: Name or path of the model to be loaded. :return: Loaded quantized model. """ bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) model = AutoModelForCausalLM.from_pretrained( model_name, load_in_4bit=True, torch_dtype=torch.bfloat16, quantization_config=bnb_config ) return model # fucntion for initializing tokenizer def initialize_tokenizer(model_name: str): """ Initialize the tokenizer with the specified model_name. :param model_name: Name or path of the model for tokenizer initialization. :return: Initialized tokenizer. """ tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.bos_token_id = 1 # Set beginning of sentence token id return tokenizer # load model model = load_quantized_model(model_name) # initialize tokenizer tokenizer = initialize_tokenizer(model_name) # specify stop token ids stop_token_ids = [0] # load pdf files loader = PyPDFDirectoryLoader(pdf_files) documents = loader.load() # split the documents in small chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) #Chage the chunk_size and chunk_overlap as needed all_splits = text_splitter.split_documents(documents) # specify embedding model (using huggingface sentence transformer) embedding_model_name = "sentence-transformers/all-mpnet-base-v2" #model_kwargs = {"device": "cuda"} #embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs) embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) #embed document chunks vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db") # specify the retriever retriever = vectordb.as_retriever()