Spaces:

JohnInizio
/

persona-chat-demo

Runtime error

App Files Files Community

John Langley commited on Aug 12

Commit

94648ab

•

1 Parent(s): f60cc77

trying things with cpu

Browse files

Files changed (1) hide show

app.py +33 -19

app.py CHANGED Viewed

@@ -32,13 +32,15 @@ from faster_whisper import WhisperModel
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 #from TTS.tts.configs.xtts_config import XttsConfig
 #from TTS.tts.models.xtts import Xtts
 #from TTS.utils.generic_utils import get_user_data_dir
 #from TTS.utils.manage import ModelManager
 # Local imports
-from utils import get_sentence, wave_header_chunk #, generate_speech_for_sentence,
 # Load Whisper ASR model
 print("Loading Whisper ASR")
@@ -52,7 +54,19 @@ mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens
 # Load XTTS Model
-#print("Loading XTTS model")
 #os.environ["COQUI_TOS_AGREED"] = "1"
 #tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
 #ModelManager().download_model(tts_model_name)
@@ -140,23 +154,23 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
         #yield ("", chatbot_history)
         # Helper function to handle the speech generation and yielding process
-    #    def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
-    #        if sentence != "":
-    #            print("Processing sentence")
-    #            generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
-    #            if generated_speech is not None:
-    #                _, audio_dict = generated_speech
-    #                yield (sentence, chatbot_history, audio_dict["value"])
-    #    if initial_greeting:
-    #        # Process only the initial greeting if specified
-    #        for _, sentence in chatbot_history:
-    #            yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
-    #    else:
-    #        # Continuously get and process sentences from a generator function
-    #        for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
-    #            print("Inserting sentence to queue")
-    #            yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
     txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
                              ).then(fn=generate_speech,  inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])

 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+from TTS.api import TTS
+from TTS.utils.manage import ModelManager
 #from TTS.tts.configs.xtts_config import XttsConfig
 #from TTS.tts.models.xtts import Xtts
 #from TTS.utils.generic_utils import get_user_data_dir
 #from TTS.utils.manage import ModelManager
 # Local imports
+from utils import get_sentence, wave_header_chunk, generate_speech_for_sentence
 # Load Whisper ASR model
 print("Loading Whisper ASR")
 # Load XTTS Model
+print("Loading XTTS model")
+model_names = TTS().list_models()
+print(model_names.__dict__)
+print(model_names.__dir__())
+model_name = "tts_models/multilingual/multi-dataset/xtts_v2" # move in v2, since xtts_v1 is generated keyerror, I guess you can select it with old github's release.
+#m = ModelManager().download_model(model_name)
+#print(m)
+m = model_name
+xtts_model = TTS(model_name, gpu=False)
+xtts_model.to("cpu") # no GPU or Amd
+#tts.to("cuda") # cuda only
 #os.environ["COQUI_TOS_AGREED"] = "1"
 #tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
 #ModelManager().download_model(tts_model_name)
         #yield ("", chatbot_history)
         # Helper function to handle the speech generation and yielding process
+        def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
+            if sentence != "":
+                print("Processing sentence")
+                generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
+                if generated_speech is not None:
+                    _, audio_dict = generated_speech
+                    yield (sentence, chatbot_history, audio_dict["value"])
+        if initial_greeting:
+            # Process only the initial greeting if specified
+            for _, sentence in chatbot_history:
+                yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
+        else:
+            # Continuously get and process sentences from a generator function
+            for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
+                print("Inserting sentence to queue")
+                yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
     txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
                              ).then(fn=generate_speech,  inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])