import os import pickle import gradio as gr from openai import OpenAI client = OpenAI( base_url="https://yo4x63mj3sbmgpwc.us-east-1.aws.endpoints.huggingface.cloud/v1/", api_key=os.environ.get("hf_token"), ) with open("./question_undetector.pkl", "rb") as f: (vectorizer, model) = pickle.load(f) def guard_question(question): pred = model.predict(vectorizer.transform([question])) if pred[0] == 1: return True else: return False def respond( 지자체, 제목, 질문, max_tokens, temperature, top_p, ): if guard_question(질문): messages = [{"role": "municipality", "content": 지자체}] messages.append({"role": "title", "content": 제목}) messages.append({"role": "question", "content": 질문}) response = "" chat_completion = client.chat.completions.create( model="tgi", messages=messages, stream=True, max_tokens=max_tokens, temperature=temperature, top_p=top_p, ) for message in chat_completion: token = message.choices[0].delta.content if token: response += token yield response else: yield "제가 답할 수 있는 질문이 아닌 것 같습니다. 저는 민원 게시글을 처리할 수 있어요." demo = gr.Interface( respond, inputs=["textbox", "textbox", "textbox"], outputs=["textbox"], additional_inputs=[ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()