File size: 2,088 Bytes
9c5645d ce01037 066ff91 ce01037 1ad1ab5 ce01037 057549c 066ff91 9c5645d 33a927c c951d7c 066ff91 057549c 9c5645d ce01037 066ff91 057549c e86895d ce01037 9c5645d 1ad1ab5 c951d7c 057549c 1ad1ab5 2a28e7a 066ff91 2a28e7a 066ff91 ce01037 066ff91 e86895d 066ff91 e86895d 1ad1ab5 e86895d 057549c 9c5645d 93baeba ce01037 93baeba ce01037 2a28e7a 1ad1ab5 0d6cabe 9c5645d ce01037 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
from diarizationlm import utils
title = "💬DiarizationLM GGUF inference on CPU💬"
description = """
A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506
"""
model_path = "models"
model_name = "q4_k_m.gguf"
prompt_suffix = " --> "
completion_suffix = " [eod]"
hf_hub_download(repo_id="google/DiarizationLM-8b-Fisher-v2", filename=model_name, local_dir=model_path)
print("Start the model init process")
model = GPT4All(model_name=model_name,
model_path=model_path,
allow_download = False,
device="cpu")
print("Finish the model init process")
def generater(prompt):
llm_prompt = prompt + prompt_suffix
max_new_tokens = round(len(prompt) / 3.0 * 1.2)
outputs = []
for token in model.generate(prompt=llm_prompt,
temp=0.1,
top_k=50,
top_p=0.5,
max_tokens=max_new_tokens,
streaming=True):
outputs.append(token)
completion = "".join(outputs)
yield completion
if completion.endswith(" [eod]"):
break
transferred_completion = utils.transfer_llm_completion(completion, prompt)
yield transferred_completion
demo = gr.Interface(
fn = generater,
title=title,
description = description,
inputs=["text"],
outputs=["text"],
examples=[
["<speaker:1> Hello, my name is Tom. May I speak to Laura <speaker:2> please? Hello, this is Laura. <speaker:1> Hi Laura, how are you? This is <speaker:2> Tom. Hi Tom, I haven't seen you for a <speaker:1> while."],
["<speaker:1> This demo looks really <speaker:2> good! Thanks, I am glad to hear that."],
],
cache_examples=False,
)
if __name__ == "__main__":
demo.queue(max_size=3).launch() |