Spaces:

binqiangliu
/

Zephyr7BAlpha

Runtime error

App Files Files Community

Zephyr7BAlpha / app.py

binqiangliu

Update app.py

3c0fc42 about 1 year ago

raw

history blame

2.94 kB

	# import dependencies
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

	import os
	import gradio as gr
	#from google.colab import drive

	import chromadb
	from langchain.llms import HuggingFacePipeline
	from langchain.document_loaders import TextLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import Chroma
	from langchain import HuggingFacePipeline
	from langchain.document_loaders import PyPDFDirectoryLoader
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory

	# specify model huggingface mode name
	model_name = "anakin87/zephyr-7b-alpha-sharded"
	#https://huggingface.co/anakin87/zephyr-7b-alpha-sharded

	#HuggingFaceH4/zephyr-7b-alpha
	#https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha

	# function for loading 4-bit quantized model
	def load_quantized_model(model_name: str):
	"""
	:param model_name: Name or path of the model to be loaded.
	:return: Loaded quantized model.
	"""
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	load_in_4bit=True,
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config
	)
	return model

	# fucntion for initializing tokenizer
	def initialize_tokenizer(model_name: str):
	"""
	Initialize the tokenizer with the specified model_name.

	:param model_name: Name or path of the model for tokenizer initialization.
	:return: Initialized tokenizer.
	"""
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	tokenizer.bos_token_id = 1 # Set beginning of sentence token id
	return tokenizer

	# load model
	model = load_quantized_model(model_name)

	# initialize tokenizer
	tokenizer = initialize_tokenizer(model_name)

	# specify stop token ids
	stop_token_ids = [0]

	# load pdf files
	loader = PyPDFDirectoryLoader(pdf_files)
	documents = loader.load()

	# split the documents in small chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) #Chage the chunk_size and chunk_overlap as needed
	all_splits = text_splitter.split_documents(documents)

	# specify embedding model (using huggingface sentence transformer)
	embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
	#model_kwargs = {"device": "cuda"}
	#embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)
	embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

	#embed document chunks
	vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")

	# specify the retriever
	retriever = vectordb.as_retriever()