TheBloke_dolphin-2.0-mistral-7B-GGUF_Playground

Paused

Rename main.py to app.py

099d117 verified 12 months ago

1.46 kB

	from ctransformers import AutoModelForCausalLM
	import gradio as gr

	greety = """
	A special thanks to [TheBloke](https://huggingface.co/TheBloke) for the quantized model and [Gathnex](https://medium.com/@gathnex) for his excellent tutorial.
	"""

	#Model loading
	llm = AutoModelForCausalLM.from_pretrained("dolphin-2.0-mistral-7b.Q4_K_S.gguf",
	model_type='mistral',
	max_new_tokens = 1096,
	threads = 3,
	)

	def stream(prompt,UL):
	system_prompt = 'You are a hlepful AI assistant. Below is an instruction that describes a task. Write a response that appropriately completes the request.'
	start,end = "<\|im_start\|>", "<\|im_end\|>"
	prompt = f"<\|im_start\|>system\n{system_prompt}{end}\n{start}user\n{prompt.strip()}{end}\n"
	return llm(prompt)

	css = """
	h1{
	text-align: center;
	}

	#duplicate-button{
	margin: auto;
	color: whitesmoke;
	background: #1565c0;
	}

	.contain{
	max-width: 900px;
	margin: auto;
	padding-top: 1.5rem;
	}
	"""

	chat_interface = gr.ChatInterface(
	fn = stream,
	stop_btn='None',
	examples = [
	"what are 'Large Language Models'?",
	"Explain OCEAN personality types"
	],
	)

	with gr.Blocks(css=css) as demo:
	gr.HTML("<h1><center>Dolphin2.0_x_Mistral Demo</center></h1>")
	gr.DuplicateButton(value="Duplicate Space for private use",elem_id="duplicate-button")
	chat_interface.render()
	gr.Markdown(greety)


	if __name__ == "__main__":
	demo.queue(max_size=10).launch()