Spaces:

Daeyongkwon98
/

chatbot_demo

Sleeping

App Files Files Community

chatbot_demo / app.py

Daeyongkwon98

Update app.py

e3ab0ad verified 16 days ago

raw

history blame contribute delete

3.1 kB

	import os
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
	import torch
	from string import Template
	from huggingface_hub import login

	# Hugging Face에 로그인 (환경 변수에서 Access Token 가져오기)
	login(os.getenv("ACCESS_TOKEN")) # ACCESS_TOKEN을 환경 변수에서 불러옴

	# 프롬프트 템플릿 설정
	prompt_template = Template("Human: ${inst} </s> Assistant: ")

	# 모델과 토크나이저 로드
	model_name = "meta-llama/Llama-3.2-1b-instruct" # 모델 경로
	tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="cpu").eval()

	# 생성 설정 (Gradio UI에서 제어할 수 있는 변수들)
	default_generation_config = GenerationConfig(
	temperature=0.1,
	top_k=30,
	top_p=0.5,
	do_sample=True,
	num_beams=1,
	repetition_penalty=1.1,
	min_new_tokens=10,
	max_new_tokens=30
	)

	# 응답 생성 함수
	def respond(message, history, system_message, max_tokens, temperature, top_p):
	# 생성 설정
	generation_config = GenerationConfig(
	**default_generation_config.to_dict() # 기본 설정과 병합
	)
	generation_config.max_new_tokens = max_tokens # max_tokens 따로 설정
	generation_config.temperature = temperature # temperature 따로 설정
	generation_config.top_p = top_p

	# 대화 히스토리와 시스템 메시지를 포함한 프롬프트 구성
	prompt = prompt_template.safe_substitute({"inst": system_message})
	for val in history:
	if val[0]:
	prompt += f"Human: {val[0]} </s> Assistant: {val[1]} </s> "
	prompt += f"Human: {message} </s> Assistant: "

	# 모델 입력 생성
	inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
	response_ids = model.generate(
	**inputs,
	generation_config=generation_config,
	eos_token_id=tokenizer.eos_token_id, # 종료 토큰 설정
	pad_token_id=tokenizer.eos_token_id # pad_token_id도 종료 토큰으로 설정
	)

	# 모델 응답 디코딩
	response_text = tokenizer.decode(response_ids[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

	# 실시간 응답을 위한 부분적 텍스트 반환
	response = ""
	for token in response_text:
	response += token
	yield response


	# Gradio Chat Interface 설정
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly and knowledgeable assistant who can discuss a wide range of topics related to music, including genres, artists, albums, instruments, and music history.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=30, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Top-p (nucleus sampling)"),
	],
	)

	if __name__ == "__main__":
	demo.launch()