wq2012 commited on
Commit
057549c
1 Parent(s): a3df7e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -6,7 +6,7 @@ from diarizationlm import utils
6
  title = "DiarizationLM GGUF inference on CPU"
7
 
8
  description = """
9
- A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
10
 
11
  To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506
12
  """
@@ -16,19 +16,19 @@ model_name = "q4_k_m.gguf"
16
  prompt_suffix = " --> "
17
  completion_suffix = " [eod]"
18
 
19
- hf_hub_download(repo_id="google/DiarizationLM-8b-Fisher-v2", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
20
 
21
  print("Start the model init process")
22
  model = GPT4All(model_name=model_name,
23
  model_path=model_path,
24
- allow_download = False,
25
  device="cpu")
26
  print("Finish the model init process")
27
 
28
  def generater(message, history):
29
  prompt = message + prompt_suffix
30
  max_new_tokens = round(len(prompt) / 3.0 * 1.2)
31
- outputs = []
32
  for token in model.generate(prompt=prompt,
33
  temp=0.1,
34
  top_k=50,
@@ -42,7 +42,7 @@ def generater(message, history):
42
  break
43
  transferred_completion = utils.transfer_llm_completion(completion, message)
44
  yield transferred_completion
45
-
46
  print("Create chatbot")
47
  chatbot = gr.Chatbot()
48
  print("Created chatbot")
 
6
  title = "DiarizationLM GGUF inference on CPU"
7
 
8
  description = """
9
+ A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
10
 
11
  To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506
12
  """
 
16
  prompt_suffix = " --> "
17
  completion_suffix = " [eod]"
18
 
19
+ hf_hub_download(repo_id="google/DiarizationLM-8b-Fisher-v2", filename=model_name, local_dir=model_path)
20
 
21
  print("Start the model init process")
22
  model = GPT4All(model_name=model_name,
23
  model_path=model_path,
24
+ allow_download = False,
25
  device="cpu")
26
  print("Finish the model init process")
27
 
28
  def generater(message, history):
29
  prompt = message + prompt_suffix
30
  max_new_tokens = round(len(prompt) / 3.0 * 1.2)
31
+ outputs = []
32
  for token in model.generate(prompt=prompt,
33
  temp=0.1,
34
  top_k=50,
 
42
  break
43
  transferred_completion = utils.transfer_llm_completion(completion, message)
44
  yield transferred_completion
45
+
46
  print("Create chatbot")
47
  chatbot = gr.Chatbot()
48
  print("Created chatbot")