Spaces:

diarizers-community
/

DiarizationLM-GGUF

Running

App Files Files Community

wq2012 commited on Jun 27

Commit

c951d7c

•

1 Parent(s): db9930e

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -39

app.py CHANGED Viewed

@@ -9,26 +9,24 @@ DiarizationLM GGUF inference on CPU
 """
 model_path = "models"
-# model_name = "model-unsloth.Q4_K_M.gguf"
-model_name = "model-unsloth.BF16.gguf"
 hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
 print("Start the model init process")
-model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
 print("Finish the model init process")
 model.config["promptTemplate"] = "{0} --> "
 model.config["systemPrompt"] = ""
 model._is_chat_session_activated = False
-max_new_tokens = 2048
 print("Finish the model config process")
 def generater(message, history, temperature, top_p, top_k):
     prompt = model.config["promptTemplate"].format(message)
     outputs = []
-    for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
         outputs.append(token)
         yield "".join(outputs)
@@ -43,44 +41,12 @@ print("Create chatbot")
 chatbot = gr.Chatbot()
 print("Created chatbot")
-print("Add additional_inputs")
-additional_inputs=[
-    gr.Slider(
-        label="temperature",
-        value=0.0,
-        minimum=0.0,
-        maximum=2.0,
-        step=0.05,
-        interactive=True,
-        info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
-    ),
-    gr.Slider(
-        label="top_p",
-        value=1.0,
-        minimum=0.0,
-        maximum=1.0,
-        step=0.01,
-        interactive=True,
-        info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it",
-    ),
-    gr.Slider(
-        label="top_k",
-        value=50,
-        minimum=0,
-        maximum=1000,
-        step=1,
-        interactive=True,
-        info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
-    )
-]
-print("Added additional_inputs")
 iface = gr.ChatInterface(
     fn = generater,
     title=title,
     description = description,
     chatbot=chatbot,
-    additional_inputs=additional_inputs,
     examples=[
         ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
    ]

 """
 model_path = "models"
+model_name = "q4_k_m.gguf"
 hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
 print("Start the model init process")
+model = GPT4All(model_name=model_name, model_path=model_path, allow_download = False, device="cpu")
 print("Finish the model init process")
 model.config["promptTemplate"] = "{0} --> "
 model.config["systemPrompt"] = ""
 model._is_chat_session_activated = False
 print("Finish the model config process")
 def generater(message, history, temperature, top_p, top_k):
     prompt = model.config["promptTemplate"].format(message)
+    max_new_tokens = round(len(prompt) / 3.0 * 1.2)
     outputs = []
+    for token in model.generate(prompt=prompt, temp=0.0, top_k = 50, top_p = 0.9, max_tokens = max_new_tokens, streaming=True):
         outputs.append(token)
         yield "".join(outputs)
 chatbot = gr.Chatbot()
 print("Created chatbot")
 iface = gr.ChatInterface(
     fn = generater,
     title=title,
     description = description,
     chatbot=chatbot,
+    additional_inputs=[],
     examples=[
         ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
    ]