Spaces:

diarizers-community
/

DiarizationLM-GGUF

Running

wq2012 commited on Aug 3

Commit

057549c

•

1 Parent(s): a3df7e0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from diarizationlm import utils
 title = "DiarizationLM GGUF inference on CPU"
 description = """
-A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
 To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506
 """
@@ -16,19 +16,19 @@ model_name = "q4_k_m.gguf"
 prompt_suffix = " --> "
 completion_suffix = " [eod]"
-hf_hub_download(repo_id="google/DiarizationLM-8b-Fisher-v2", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
 print("Start the model init process")
 model = GPT4All(model_name=model_name,
                 model_path=model_path,
-                allow_download = False,
                 device="cpu")
 print("Finish the model init process")
 def generater(message, history):
     prompt = message + prompt_suffix
     max_new_tokens = round(len(prompt) / 3.0 * 1.2)
-    outputs = []
     for token in model.generate(prompt=prompt,
                                 temp=0.1,
                                 top_k=50,
@@ -42,7 +42,7 @@ def generater(message, history):
             break
     transferred_completion = utils.transfer_llm_completion(completion, message)
     yield transferred_completion
 print("Create chatbot")
 chatbot = gr.Chatbot()
 print("Created chatbot")

 title = "DiarizationLM GGUF inference on CPU"
 description = """
+A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
 To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506
 """
 prompt_suffix = " --> "
 completion_suffix = " [eod]"
+hf_hub_download(repo_id="google/DiarizationLM-8b-Fisher-v2", filename=model_name, local_dir=model_path)
 print("Start the model init process")
 model = GPT4All(model_name=model_name,
                 model_path=model_path,
+                allow_download = False,
                 device="cpu")
 print("Finish the model init process")
 def generater(message, history):
     prompt = message + prompt_suffix
     max_new_tokens = round(len(prompt) / 3.0 * 1.2)
+    outputs = []
     for token in model.generate(prompt=prompt,
                                 temp=0.1,
                                 top_k=50,
             break
     transferred_completion = utils.transfer_llm_completion(completion, message)
     yield transferred_completion
 print("Create chatbot")
 chatbot = gr.Chatbot()
 print("Created chatbot")