Spaces:

binqiangliu
/

Zephyr7BAlpha

Runtime error

App Files Files Community

Zephyr7BAlpha / app.py

binqiangliu

Update app.py

22c11b2 over 1 year ago

raw

history blame

2.1 kB

	# import dependencies
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

	import os
	import gradio as gr
	#from google.colab import drive

	import chromadb
	from langchain.llms import HuggingFacePipeline
	from langchain.document_loaders import TextLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import Chroma
	from langchain import HuggingFacePipeline
	from langchain.document_loaders import PyPDFDirectoryLoader
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory

	# specify model huggingface mode name
	model_name = "anakin87/zephyr-7b-alpha-sharded"
	#https://huggingface.co/anakin87/zephyr-7b-alpha-sharded

	#HuggingFaceH4/zephyr-7b-alpha
	#https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha

	# function for loading 4-bit quantized model
	def load_quantized_model(model_name: str):
	"""
	:param model_name: Name or path of the model to be loaded.
	:return: Loaded quantized model.
	"""
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	load_in_4bit=True,
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config
	)
	return model

	# fucntion for initializing tokenizer
	def initialize_tokenizer(model_name: str):
	"""
	Initialize the tokenizer with the specified model_name.

	:param model_name: Name or path of the model for tokenizer initialization.
	:return: Initialized tokenizer.
	"""
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	tokenizer.bos_token_id = 1 # Set beginning of sentence token id
	return tokenizer

	# load model
	model = load_quantized_model(model_name)

	# initialize tokenizer
	tokenizer = initialize_tokenizer(model_name)

	# specify stop token ids
	stop_token_ids = [0]