ylacombe commited on
Commit
1346128
1 Parent(s): c529a11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -62
app.py CHANGED
@@ -6,20 +6,31 @@ import gradio as gr
6
  import numpy as np
7
  import torch
8
  import nltk # we'll use this to split into sentences
9
- nltk.download('punkt')
10
  import uuid
 
11
 
12
  from TTS.api import TTS
13
- # By using XTTS you agree to CPML license https://coqui.ai/cpml
14
- os.environ["COQUI_TOS_AGREED"] = "1"
15
-
16
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
17
 
18
- DESCRIPTION = """# Speak with Llama2
19
- TODO
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  """
 
 
21
 
22
- CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1"
23
 
24
  system_message = "\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
25
  temperature = 0.9
@@ -40,6 +51,7 @@ from gradio_client import Client
40
  whisper_client = Client("https://sanchit-gandhi-whisper-large-v2.hf.space/")
41
  text_client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
42
 
 
43
  def transcribe(wav_path):
44
 
45
  return whisper_client.predict(
@@ -52,88 +64,122 @@ def transcribe(wav_path):
52
  # Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.
53
 
54
 
55
- def add_text(history, text):
56
- history = history + [(text, None)]
57
- return history, gr.update(value="", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- def add_file(history, file):
60
- text = transcribe(
61
- file
62
- )
63
-
64
 
65
- history = history + [(text, None)]
66
- return history
67
-
68
-
69
- def bot(history):
70
-
71
- history[-1][1] = ""
72
- for character in text_client.submit(
73
- history,
74
- system_message,
75
- temperature,
76
- 4096,
77
- temperature,
78
- repetition_penalty,
79
- api_name="/chat"
80
- ):
81
- history[-1][1] = character
82
- yield history
83
-
84
- def generate_speech(history):
85
- text_to_generate = history[-1][1]
86
- text_to_generate = text_to_generate.replace("\n", " ").strip()
87
- text_to_generate = nltk.sent_tokenize(text_to_generate)
88
 
89
- filename = f"{uuid.uuid4()}.wav"
90
- sampling_rate = tts.synthesizer.tts_config.audio["sample_rate"]
91
- silence = [0] * int(0.25 * sampling_rate)
 
 
 
 
 
 
92
 
93
-
94
- for sentence in text_to_generate:
95
- # generate speech by cloning a voice using default settings
96
- wav = tts.tts(text=sentence,
97
- speaker_wav="examples/female.wav",
98
- speed=1.5,
99
- language="en")
100
 
101
- yield (sampling_rate, np.array(wav)) #np.array(wav + silence))
 
 
 
 
 
 
 
 
 
 
 
102
 
 
 
 
103
 
 
 
 
 
 
104
 
105
-
106
- with gr.Blocks() as demo:
107
  chatbot = gr.Chatbot(
108
  [],
109
  elem_id="chatbot",
 
110
  bubble_full_width=False,
111
- avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
112
  )
113
 
114
  with gr.Row():
115
  txt = gr.Textbox(
116
- scale=4,
117
  show_label=False,
118
  placeholder="Enter text and press enter, or speak to your microphone",
119
  container=False,
120
  )
121
- btn = gr.inputs.Audio(source="microphone", type="filepath", optional=True)
122
 
123
  with gr.Row():
124
- audio = gr.Audio(type="numpy", streaming=True, autoplay=True)
 
 
 
125
 
126
- txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
127
- bot, chatbot, chatbot
128
- ).then(generate_speech, chatbot, audio)
129
 
130
  txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
131
 
132
- file_msg = btn.stop_recording(add_file, [chatbot, btn], [chatbot], queue=False).then(
133
- bot, chatbot, chatbot
134
- ).then(generate_speech, chatbot, audio)
135
 
136
- #file_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
137
 
 
 
 
138
  demo.queue()
139
  demo.launch(debug=True)
 
6
  import numpy as np
7
  import torch
8
  import nltk # we'll use this to split into sentences
 
9
  import uuid
10
+ import soundfile as SF
11
 
12
  from TTS.api import TTS
 
 
 
13
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
14
 
15
+ title = "Speak with Llama2 70B"
16
+
17
+ DESCRIPTION = """# Speak with Llama2 70B
18
+
19
+ This Space demonstrates how to speak to a chatbot, based solely on open-source models.
20
+ It relies on 3 models:
21
+ 1. [Whisper-large-v2](https://huggingface.co/spaces/sanchit-gandhi/whisper-large-v2) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
22
+ 2. [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) as the chat model, the actual chat model. It is also called through a [gradio client](https://www.gradio.app/docs/client).
23
+ 3. [Coqui's XTTS](https://huggingface.co/spaces/coqui/xtts) as a TTS model, to generate the chatbot answers. This time, the model is hosted locally.
24
+
25
+
26
+ Note:
27
+ - As a derivate work of [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta,
28
+ this demo is governed by the original [license](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI/blob/main/USE_POLICY.md).
29
+ - By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml
30
  """
31
+ css = """.toast-wrap { display: none !important } """
32
+
33
 
 
34
 
35
  system_message = "\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
36
  temperature = 0.9
 
51
  whisper_client = Client("https://sanchit-gandhi-whisper-large-v2.hf.space/")
52
  text_client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
53
 
54
+
55
  def transcribe(wav_path):
56
 
57
  return whisper_client.predict(
 
64
  # Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.
65
 
66
 
67
+ def add_text(history, text, agree):
68
+ if agree == True:
69
+ history = [] if history is None else history
70
+ history = history + [(text, None)]
71
+ return history, gr.update(value="", interactive=False)
72
+ else:
73
+ gr.Warning("Please accept the Terms & Condition!")
74
+ return None, gr.update(value="", interactive=True)
75
+
76
+ def add_file(history, file, agree):
77
+ if agree == True:
78
+ history = [] if history is None else history
79
+ text = transcribe(
80
+ file
81
+ )
82
+
83
+ history = history + [(text, None)]
84
+ return history
85
+ else:
86
+ gr.Warning("Please accept the Terms & Condition!")
87
+ return None
88
 
89
+
90
+ def bot(history, agree, system_prompt=""):
 
 
 
91
 
92
+ if agree==True:
93
+ history = [] if history is None else history
94
+
95
+ if system_prompt == "":
96
+ system_prompt = system_message
97
+
98
+ history[-1][1] = ""
99
+ for character in text_client.submit(
100
+ history,
101
+ system_prompt,
102
+ temperature,
103
+ 4096,
104
+ temperature,
105
+ repetition_penalty,
106
+ api_name="/chat"
107
+ ):
108
+ history[-1][1] = character
109
+ yield history
110
+ else:
111
+ gr.Warning("Please accept the Terms & Condition!")
112
+ return None
 
 
113
 
114
+ def generate_speech(history, agree):
115
+ if agree==True:
116
+ text_to_generate = history[-1][1]
117
+ text_to_generate = text_to_generate.replace("\n", " ").strip()
118
+ text_to_generate = nltk.sent_tokenize(text_to_generate)
119
+
120
+ filename = f"{uuid.uuid4()}.wav"
121
+ sampling_rate = tts.synthesizer.tts_config.audio["sample_rate"]
122
+ silence = [0] * int(0.25 * sampling_rate)
123
 
 
 
 
 
 
 
 
124
 
125
+ for sentence in text_to_generate:
126
+ # generate speech by cloning a voice using default settings
127
+ wav = tts.tts(text=sentence,
128
+ speaker_wav="examples/female.wav",
129
+ decoder_iterations=20,
130
+ speed=1.2,
131
+ language="en")
132
+
133
+ yield (sampling_rate, np.array(wav)) #np.array(wav + silence))
134
+ else:
135
+ gr.Warning("Please accept the Terms & Condition!")
136
+ return None
137
 
138
+
139
+ with gr.Blocks(title=title) as demo:
140
+ gr.Markdown(DESCRIPTION)
141
 
142
+ agree = gr.Checkbox(
143
+ label="Agree",
144
+ value=False,
145
+ info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
146
+ )
147
 
 
 
148
  chatbot = gr.Chatbot(
149
  [],
150
  elem_id="chatbot",
151
+ avatar_images=('examples/lama.jpeg', 'examples/lama2.jpeg'),
152
  bubble_full_width=False,
 
153
  )
154
 
155
  with gr.Row():
156
  txt = gr.Textbox(
157
+ scale=1,
158
  show_label=False,
159
  placeholder="Enter text and press enter, or speak to your microphone",
160
  container=False,
161
  )
162
+ btn = gr.Audio(source="microphone", type="filepath", scale=2)
163
 
164
  with gr.Row():
165
+ audio = gr.Audio(type="numpy", streaming=True, autoplay=True, label="Generated audio response", show_label=True)
166
+
167
+ clear_btn = gr.ClearButton([chatbot, audio])
168
+
169
 
170
+ txt_msg = txt.submit(add_text, [chatbot, txt, agree], [chatbot, txt], queue=False).then(
171
+ bot, [chatbot, agree], chatbot
172
+ ).then(generate_speech, [chatbot, agree], audio)
173
 
174
  txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
175
 
176
+ file_msg = btn.stop_recording(add_file, [chatbot, btn, agree], [chatbot], queue=False).then(
177
+ bot, [chatbot, agree], chatbot
178
+ ).then(generate_speech, [chatbot, agree], audio)
179
 
 
180
 
181
+ gr.Markdown("""<div style='margin:20px auto;'>
182
+ <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
183
+ </div>""")
184
  demo.queue()
185
  demo.launch(debug=True)