File size: 4,446 Bytes
2fc39d8 cd69181 2fc39d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import spaces
import os
from huggingface_hub import Repository
from huggingface_hub import login
init_feedback = False
try:
login(token = os.environ['HUB_TOKEN'])
repo = Repository(
local_dir="backend_fn",
repo_type="dataset",
clone_from=os.environ['DATASET'],
token=True,
git_email='zhiheng_dev@dahreply.ai'
)
repo.git_pull()
init_feedback = True
except:
pass
import json
import uuid
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
if init_feedback:
from backend_fn.feedback import feedback
from gradio_modal import Modal
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
model_name = "Merdeka-LLM/merdeka-llm-hr-3b-128k-instruct"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
streamer = TextIteratorStreamer(tokenizer, timeout=300, skip_prompt=True, skip_special_tokens=True)
histories = []
action = None
feedback_index = None
session_id = uuid.uuid1().__str__()
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
# system_message,
max_tokens = 4096,
temperature = 0.01,
top_p = 0.95,
):
messages = [
{"role": "system", "content": "You are a professional Human Resource advisor who is familiar with HR related Malaysia Law."}
]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generate_kwargs = dict(
model_inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
streamer=streamer
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
for new_token in streamer:
if new_token != '<':
response += new_token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
def submit_feedback(value):
feedback(session_id, json.dumps(histories), value, action, feedback_index)
with gr.Blocks() as demo:
def vote(history,data: gr.LikeData):
global histories
global action
global feedback_index
histories = history
action = data.liked
feedback_index = data.index[0]
with Modal(visible=False) as modal:
textb = gr.Textbox(
label='Actual response',
info='Leave blank if the answer is good enough'
)
submit_btn = gr.Button(
'Submit'
)
submit_btn.click(submit_feedback,textb)
submit_btn.click(lambda: Modal(visible=False), None, modal)
submit_btn.click(lambda x: gr.update(value=''), [],[textb])
ci = gr.ChatInterface(
respond,
description='Due to an unknown bug in Gradio, we are unable to expand the conversation section to full height.'
# fill_height=True
# additional_inputs=[
# # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
# gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
# gr.Slider(
# minimum=0.1,
# maximum=1.0,
# value=0.95,
# step=0.05,
# label="Top-p (nucleus sampling)",
# ),
# ],
)
ci.chatbot.show_copy_button=True
# ci.chatbot.value=[(None,"Hello! I'm here to assist you with understanding the laws and acts of Malaysia.")]
# ci.chatbot.height=500
if init_feedback:
ci.chatbot.like(vote, ci.chatbot, None).then(
lambda: Modal(visible=True), None, modal
)
if __name__ == "__main__":
demo.launch(
)
|