Spaces:

Mat17892
/

iris

Runtime error

iris / app.py

desert

init inference

b97d649 17 days ago

2.52 kB

	import os
	import subprocess
	import gradio as gr
	from huggingface_hub import hf_hub_download

	# Hugging Face repository IDs
	base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
	adapter_repo = "Mat17892/llama_lora_gguf"

	# Download the base model GGUF file
	print("Downloading base model...")
	base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")

	# Download the LoRA adapter GGUF file
	print("Downloading LoRA adapter...")
	lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")

	# Function to run `llama-cli` with base model and adapter
	def run_llama_cli(prompt):
	print("Running inference with llama-cli...")
	cmd = [
	"./llama-cli",
	"-c", "2048", # Context length
	"-cnv", # Enable conversational mode
	"-m", base_model_path,
	"--lora", lora_adapter_path,
	"--prompt", prompt,
	]
	try:
	process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout, stderr = process.communicate()

	if process.returncode != 0:
	print("Error during inference:")
	print(stderr.decode())
	return "Error: Could not generate response."

	return stdout.decode().strip()
	except Exception as e:
	print(f"Exception occurred: {e}")
	return "Error: Could not generate response."

	# Gradio interface
	def chatbot_fn(user_input, chat_history):
	# Build the full chat history as the prompt
	prompt = ""
	for user, ai in chat_history:
	prompt += f"User: {user}\nAI: {ai}\n"
	prompt += f"User: {user_input}\nAI:" # Add latest user input

	# Generate response using llama-cli
	response = run_llama_cli(prompt)

	# Update chat history
	chat_history.append((user_input, response))
	return chat_history, chat_history

	# Build the Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 🦙 LLaMA Chatbot with Base Model and LoRA Adapter")
	chatbot = gr.Chatbot(label="Chat with the Model")

	with gr.Row():
	with gr.Column(scale=4):
	user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
	with gr.Column(scale=1):
	submit_btn = gr.Button("Send")

	chat_history = gr.State([])

	# Link components
	submit_btn.click(
	chatbot_fn,
	inputs=[user_input, chat_history],
	outputs=[chatbot, chat_history],
	show_progress=True,
	)

	# Launch the Gradio app
	demo.launch()