nallm-test / app.py
gyulukeyi's picture
added tokenizer definition
7dc524e
raw
history blame contribute delete
No virus
2.04 kB
import os
import pickle
import gradio as gr
from openai import OpenAI
from kiwipiepy import Kiwi
tagger = Kiwi()
def tokenizer(t):
return [e.form for e in tagger.tokenize(t)]
client = OpenAI(
base_url="https://yo4x63mj3sbmgpwc.us-east-1.aws.endpoints.huggingface.cloud/v1/",
api_key=os.environ.get("hf_token"),
)
with open("./question_undetector.pkl", "rb") as f:
(vectorizer, model) = pickle.load(f)
def guard_question(question):
pred = model.predict(vectorizer.transform([question]))
if pred[0] == 1:
return True
else:
return False
def respond(
์ง€์ž์ฒด,
์ œ๋ชฉ,
์งˆ๋ฌธ,
max_tokens,
temperature,
top_p,
):
if guard_question(์งˆ๋ฌธ):
messages = [{"role": "municipality", "content": ์ง€์ž์ฒด}]
messages.append({"role": "title", "content": ์ œ๋ชฉ})
messages.append({"role": "question", "content": ์งˆ๋ฌธ})
response = ""
chat_completion = client.chat.completions.create(
model="tgi",
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
for message in chat_completion:
token = message.choices[0].delta.content
if token:
response += token
yield response
else:
yield "์ œ๊ฐ€ ๋‹ตํ•  ์ˆ˜ ์žˆ๋Š” ์งˆ๋ฌธ์ด ์•„๋‹Œ ๊ฒƒ ๊ฐ™์Šต๋‹ˆ๋‹ค. ์ €๋Š” ๋ฏผ์› ๊ฒŒ์‹œ๊ธ€์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ์–ด์š”."
demo = gr.Interface(
respond,
inputs=["textbox", "textbox", "textbox"],
outputs=["textbox"],
additional_inputs=[
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.90,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()