nallm-test / app.py
gyulukeyi's picture
added tokenizer definition
7dc524e
raw
history blame
No virus
2.04 kB
import os
import pickle
import gradio as gr
from openai import OpenAI
from kiwipiepy import Kiwi
tagger = Kiwi()
def tokenizer(t):
return [e.form for e in tagger.tokenize(t)]
client = OpenAI(
base_url="https://yo4x63mj3sbmgpwc.us-east-1.aws.endpoints.huggingface.cloud/v1/",
api_key=os.environ.get("hf_token"),
)
with open("./question_undetector.pkl", "rb") as f:
(vectorizer, model) = pickle.load(f)
def guard_question(question):
pred = model.predict(vectorizer.transform([question]))
if pred[0] == 1:
return True
else:
return False
def respond(
์ง€์ž์ฒด,
์ œ๋ชฉ,
์งˆ๋ฌธ,
max_tokens,
temperature,
top_p,
):
if guard_question(์งˆ๋ฌธ):
messages = [{"role": "municipality", "content": ์ง€์ž์ฒด}]
messages.append({"role": "title", "content": ์ œ๋ชฉ})
messages.append({"role": "question", "content": ์งˆ๋ฌธ})
response = ""
chat_completion = client.chat.completions.create(
model="tgi",
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
for message in chat_completion:
token = message.choices[0].delta.content
if token:
response += token
yield response
else:
yield "์ œ๊ฐ€ ๋‹ตํ•  ์ˆ˜ ์žˆ๋Š” ์งˆ๋ฌธ์ด ์•„๋‹Œ ๊ฒƒ ๊ฐ™์Šต๋‹ˆ๋‹ค. ์ €๋Š” ๋ฏผ์› ๊ฒŒ์‹œ๊ธ€์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ์–ด์š”."
demo = gr.Interface(
respond,
inputs=["textbox", "textbox", "textbox"],
outputs=["textbox"],
additional_inputs=[
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.90,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()