John Langley commited on
Commit
94648ab
1 Parent(s): f60cc77

trying things with cpu

Browse files
Files changed (1) hide show
  1. app.py +33 -19
app.py CHANGED
@@ -32,13 +32,15 @@ from faster_whisper import WhisperModel
32
  import gradio as gr
33
  from huggingface_hub import hf_hub_download
34
  from llama_cpp import Llama
 
 
35
  #from TTS.tts.configs.xtts_config import XttsConfig
36
  #from TTS.tts.models.xtts import Xtts
37
  #from TTS.utils.generic_utils import get_user_data_dir
38
  #from TTS.utils.manage import ModelManager
39
 
40
  # Local imports
41
- from utils import get_sentence, wave_header_chunk #, generate_speech_for_sentence,
42
 
43
  # Load Whisper ASR model
44
  print("Loading Whisper ASR")
@@ -52,7 +54,19 @@ mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens
52
 
53
 
54
  # Load XTTS Model
55
- #print("Loading XTTS model")
 
 
 
 
 
 
 
 
 
 
 
 
56
  #os.environ["COQUI_TOS_AGREED"] = "1"
57
  #tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
58
  #ModelManager().download_model(tts_model_name)
@@ -140,23 +154,23 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
140
  #yield ("", chatbot_history)
141
 
142
  # Helper function to handle the speech generation and yielding process
143
- # def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
144
- # if sentence != "":
145
- # print("Processing sentence")
146
- # generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
147
- # if generated_speech is not None:
148
- # _, audio_dict = generated_speech
149
- # yield (sentence, chatbot_history, audio_dict["value"])
150
-
151
- # if initial_greeting:
152
- # # Process only the initial greeting if specified
153
- # for _, sentence in chatbot_history:
154
- # yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
155
- # else:
156
- # # Continuously get and process sentences from a generator function
157
- # for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
158
- # print("Inserting sentence to queue")
159
- # yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
160
 
161
  txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
162
  ).then(fn=generate_speech, inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
 
32
  import gradio as gr
33
  from huggingface_hub import hf_hub_download
34
  from llama_cpp import Llama
35
+ from TTS.api import TTS
36
+ from TTS.utils.manage import ModelManager
37
  #from TTS.tts.configs.xtts_config import XttsConfig
38
  #from TTS.tts.models.xtts import Xtts
39
  #from TTS.utils.generic_utils import get_user_data_dir
40
  #from TTS.utils.manage import ModelManager
41
 
42
  # Local imports
43
+ from utils import get_sentence, wave_header_chunk, generate_speech_for_sentence
44
 
45
  # Load Whisper ASR model
46
  print("Loading Whisper ASR")
 
54
 
55
 
56
  # Load XTTS Model
57
+ print("Loading XTTS model")
58
+ model_names = TTS().list_models()
59
+ print(model_names.__dict__)
60
+ print(model_names.__dir__())
61
+ model_name = "tts_models/multilingual/multi-dataset/xtts_v2" # move in v2, since xtts_v1 is generated keyerror, I guess you can select it with old github's release.
62
+
63
+ #m = ModelManager().download_model(model_name)
64
+ #print(m)
65
+ m = model_name
66
+
67
+ xtts_model = TTS(model_name, gpu=False)
68
+ xtts_model.to("cpu") # no GPU or Amd
69
+ #tts.to("cuda") # cuda only
70
  #os.environ["COQUI_TOS_AGREED"] = "1"
71
  #tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
72
  #ModelManager().download_model(tts_model_name)
 
154
  #yield ("", chatbot_history)
155
 
156
  # Helper function to handle the speech generation and yielding process
157
+ def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
158
+ if sentence != "":
159
+ print("Processing sentence")
160
+ generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
161
+ if generated_speech is not None:
162
+ _, audio_dict = generated_speech
163
+ yield (sentence, chatbot_history, audio_dict["value"])
164
+
165
+ if initial_greeting:
166
+ # Process only the initial greeting if specified
167
+ for _, sentence in chatbot_history:
168
+ yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
169
+ else:
170
+ # Continuously get and process sentences from a generator function
171
+ for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
172
+ print("Inserting sentence to queue")
173
+ yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
174
 
175
  txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
176
  ).then(fn=generate_speech, inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])