wq2012 commited on
Commit
066ff91
1 Parent(s): 700a4e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -18
app.py CHANGED
@@ -1,36 +1,49 @@
1
  import gradio as gr
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
 
4
 
5
  title = "DiarizationLM GGUF inference on CPU"
6
 
7
  description = """
8
- DiarizationLM GGUF inference on CPU
 
 
9
  """
10
 
11
  model_path = "models"
12
  model_name = "q4_k_m.gguf"
 
 
 
13
  hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
14
 
15
  print("Start the model init process")
16
- model = GPT4All(model_name=model_name, model_path=model_path, allow_download = False, device="cpu")
 
 
 
17
  print("Finish the model init process")
18
 
19
- model.config["promptTemplate"] = "{0} --> "
20
- model.config["systemPrompt"] = ""
21
- model._is_chat_session_activated = False
22
-
23
- print("Finish the model config process")
24
-
25
- def generater(message, history, temperature, top_p, top_k):
26
- prompt = model.config["promptTemplate"].format(message)
27
  max_new_tokens = round(len(prompt) / 3.0 * 1.2)
28
  outputs = []
29
- for token in model.generate(prompt=prompt, temp=0.0, top_k = 50, top_p = 0.9, max_tokens = max_new_tokens, streaming=True):
 
 
 
 
 
30
  outputs.append(token)
31
- yield "".join(outputs)
32
-
33
-
 
 
 
 
 
34
  def vote(data: gr.LikeData):
35
  if data.liked:
36
  return
@@ -48,17 +61,14 @@ iface = gr.ChatInterface(
48
  chatbot=chatbot,
49
  additional_inputs=[],
50
  examples=[
51
- ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
52
  ]
53
  )
54
 
55
- print("Added iface")
56
-
57
  with gr.Blocks() as demo:
58
  chatbot.like(vote, None, None)
59
  iface.render()
60
 
61
- print("Rendered iface")
62
 
63
  if __name__ == "__main__":
64
  demo.queue(max_size=3).launch()
 
1
  import gradio as gr
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
+ from diarizationlm import utils
5
 
6
  title = "DiarizationLM GGUF inference on CPU"
7
 
8
  description = """
9
+ A demo of the DiarizationLM model finetuned from Llama 2. In this demo, we run a 4-bit quantized GGUF model on CPU.
10
+
11
+ To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506
12
  """
13
 
14
  model_path = "models"
15
  model_name = "q4_k_m.gguf"
16
+ prompt_suffix = " --> "
17
+ completion_suffix = " [eod]"
18
+
19
  hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
20
 
21
  print("Start the model init process")
22
+ model = GPT4All(model_name=model_name,
23
+ model_path=model_path,
24
+ allow_download = False,
25
+ evice="cpu")
26
  print("Finish the model init process")
27
 
28
+ def generater(message, history):
29
+ prompt = message + prompt_suffix
 
 
 
 
 
 
30
  max_new_tokens = round(len(prompt) / 3.0 * 1.2)
31
  outputs = []
32
+ for token in model.generate(prompt=prompt,
33
+ temp=0.0,
34
+ top_k=50,
35
+ top_p=0.9,
36
+ max_tokens=max_new_tokens,
37
+ streaming=True):
38
  outputs.append(token)
39
+ completion = "".join(outputs)
40
+ if completion.endswith(" [eod]"):
41
+ transferred_completion = utils.transfer_llm_completion(completion, message)
42
+ yield transferred_completion
43
+ return
44
+ else:
45
+ yield completion
46
+
47
  def vote(data: gr.LikeData):
48
  if data.liked:
49
  return
 
61
  chatbot=chatbot,
62
  additional_inputs=[],
63
  examples=[
64
+ ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well. What about <speaker:1> you? I'm doing well, too. Thank you."],
65
  ]
66
  )
67
 
 
 
68
  with gr.Blocks() as demo:
69
  chatbot.like(vote, None, None)
70
  iface.render()
71
 
 
72
 
73
  if __name__ == "__main__":
74
  demo.queue(max_size=3).launch()