wq2012 commited on
Commit
33a927c
1 Parent(s): b1e4ee0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -24
app.py CHANGED
@@ -2,15 +2,22 @@ import gradio as gr
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
 
5
- title = "DiarizationLM GGUF inference on CPU"
6
 
7
  description = """
8
- DiarizationLM GGUF inference on CPU
 
 
 
 
 
 
 
 
 
9
  """
10
 
11
- model_path = "model"
12
- # model_name = "model-unsloth.Q4_K_M.gguf"
13
- # hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
14
  model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
15
  hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
16
 
@@ -18,37 +25,35 @@ print("Start the model init process")
18
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
19
  print("Finish the model init process")
20
 
21
- model.config["promptTemplate"] = "{0} --> "
22
  model.config["systemPrompt"] = ""
23
  model._is_chat_session_activated = False
24
 
25
  max_new_tokens = 2048
26
 
27
- print("Finish the model config process")
28
-
29
  def generater(message, history, temperature, top_p, top_k):
30
- prompt = model.config["promptTemplate"].format(message)
 
 
 
 
31
  outputs = []
32
  for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
33
  outputs.append(token)
34
  yield "".join(outputs)
35
 
36
-
37
  def vote(data: gr.LikeData):
38
  if data.liked:
39
  return
40
  else:
41
  return
42
 
43
- print("Create chatbot")
44
- chatbot = gr.Chatbot()
45
- print("Created chatbot")
46
 
47
- print("Add additional_inputs")
48
  additional_inputs=[
49
  gr.Slider(
50
  label="temperature",
51
- value=0.0,
52
  minimum=0.0,
53
  maximum=2.0,
54
  step=0.05,
@@ -66,7 +71,7 @@ additional_inputs=[
66
  ),
67
  gr.Slider(
68
  label="top_k",
69
- value=50,
70
  minimum=0,
71
  maximum=1000,
72
  step=1,
@@ -74,7 +79,9 @@ additional_inputs=[
74
  info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
75
  )
76
  ]
77
- print("Added additional_inputs")
 
 
78
 
79
  iface = gr.ChatInterface(
80
  fn = generater,
@@ -83,17 +90,17 @@ iface = gr.ChatInterface(
83
  chatbot=chatbot,
84
  additional_inputs=additional_inputs,
85
  examples=[
86
- ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
87
- ]
 
 
 
 
88
  )
89
 
90
- print("Added iface")
91
-
92
- with gr.Blocks() as demo:
93
  chatbot.like(vote, None, None)
94
  iface.render()
95
 
96
- print("Rendered iface")
97
-
98
  if __name__ == "__main__":
99
  demo.queue(max_size=3).launch()
 
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
 
5
+ title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
6
 
7
  description = """
8
+ 🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
9
+
10
+ 🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
11
+
12
+ Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
13
+ """
14
+
15
+ """
16
+ [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
17
+ [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
18
  """
19
 
20
+ model_path = "models"
 
 
21
  model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
22
  hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
23
 
 
25
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
26
  print("Finish the model init process")
27
 
28
+ model.config["promptTemplate"] = "[INST] {0} [/INST]"
29
  model.config["systemPrompt"] = ""
30
  model._is_chat_session_activated = False
31
 
32
  max_new_tokens = 2048
33
 
 
 
34
  def generater(message, history, temperature, top_p, top_k):
35
+ prompt = "<s>"
36
+ for user_message, assistant_message in history:
37
+ prompt += model.config["promptTemplate"].format(user_message)
38
+ prompt += assistant_message + "</s>"
39
+ prompt += model.config["promptTemplate"].format(message)
40
  outputs = []
41
  for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
42
  outputs.append(token)
43
  yield "".join(outputs)
44
 
 
45
  def vote(data: gr.LikeData):
46
  if data.liked:
47
  return
48
  else:
49
  return
50
 
51
+ chatbot = gr.Chatbot(avatar_images=('resourse/user-icon.png', 'resourse/chatbot-icon.png'),bubble_full_width = False)
 
 
52
 
 
53
  additional_inputs=[
54
  gr.Slider(
55
  label="temperature",
56
+ value=0.5,
57
  minimum=0.0,
58
  maximum=2.0,
59
  step=0.05,
 
71
  ),
72
  gr.Slider(
73
  label="top_k",
74
+ value=40,
75
  minimum=0,
76
  maximum=1000,
77
  step=1,
 
79
  info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
80
  )
81
  ]
82
+
83
+ character = "Sherlock Holmes"
84
+ series = "Arthur Conan Doyle's novel"
85
 
86
  iface = gr.ChatInterface(
87
  fn = generater,
 
90
  chatbot=chatbot,
91
  additional_inputs=additional_inputs,
92
  examples=[
93
+ ["Hello there! How are you doing?"],
94
+ ["How many hours does it take a man to eat a Helicopter?"],
95
+ ["You are a helpful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."],
96
+ ["I want you to act as a spoken English teacher and improver. I will speak to you in English and you will reply to me in English to practice my spoken English. I want you to strictly correct my grammar mistakes, typos, and factual errors. I want you to ask me a question in your reply. Now let's start practicing, you could ask me a question first. Remember, I want you to strictly correct my grammar mistakes, typos, and factual errors."],
97
+ [f"I want you to act like {character} from {series}. I want you to respond and answer like {character} using the tone, manner and vocabulary {character} would use. Do not write any explanations. Only answer like {character}. You must know all of the knowledge of {character}."]
98
+ ]
99
  )
100
 
101
+ with gr.Blocks(css="resourse/style/custom.css") as demo:
 
 
102
  chatbot.like(vote, None, None)
103
  iface.render()
104
 
 
 
105
  if __name__ == "__main__":
106
  demo.queue(max_size=3).launch()