Speech-ChatGPT-Speech

Runtime error

App Files Files Community

Yusin commited on Dec 10, 2022

Commit

cda6806

•

1 Parent(s): 16d35dc

Update app.py (#2)

Browse files

- Update app.py (ef8b6c34d6f8d5a5a91b8680044986ea5b383a14)

Files changed (1) hide show

app.py +71 -34

app.py CHANGED Viewed

@@ -1,46 +1,82 @@
 import tempfile
 import gradio as gr
 from neon_tts_plugin_coqui import CoquiTTS
 LANGUAGES = list(CoquiTTS.langs.keys())
 default_lang = "en"
-title = "🐸💬 - NeonAI Coqui AI TTS Plugin"
-description = "🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production"
-info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
-badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
 coquiTTS = CoquiTTS()
-def tts(text: str, language: str):
-    print(text, language)
-    # return output
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        coquiTTS.get_tts(text, fp, speaker = {"language" : language})
-        return fp.name
 with gr.Blocks() as blocks:
     gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
                 + title
                 + "</h1>")
-    gr.Markdown(description)
     with gr.Row():# equal_height=False
         with gr.Column():# variant="panel"
-            textbox = gr.Textbox(
-                label="Input",
-                value=CoquiTTS.langs[default_lang]["sentence"],
-                max_lines=3,
-            )
             radio = gr.Radio(
                 label="Language",
                 choices=LANGUAGES,
@@ -48,20 +84,21 @@ with gr.Blocks() as blocks:
             )
             with gr.Row():# mobile_collapse=False
                 submit = gr.Button("Submit", variant="primary")
         audio = gr.Audio(label="Output", interactive=False)
-    gr.Markdown(info)
-    gr.Markdown("<center>"
-                +f'<img src={badge} alt="visitors badge"/>'
-                +"</center>")
     # actions
     submit.click(
-        tts,
-        [textbox, radio],
-        [audio],
     )
-    radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, textbox)
-blocks.launch()

 import tempfile
 import gradio as gr
 from neon_tts_plugin_coqui import CoquiTTS
 LANGUAGES = list(CoquiTTS.langs.keys())
 default_lang = "en"
+# ChatGPT
+from pyChatGPT import ChatGPT
+import whisper
+whisper_model = whisper.load_model("small")
+session_token = os.environ.get('SessionToken')
+title = "Voice to ChatGPT to Voice"
+#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
+#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
 coquiTTS = CoquiTTS()
+# ChatGPT
+def chat_hf(audio, custom_token, language):
+    try:
+        whisper_text = translate(audio)
+        api = ChatGPT(session_token)
+        resp = api.send_message(whisper_text)
+        api.refresh_auth()  # refresh the authorization token
+        api.reset_conversation()  # reset the conversation
+        gpt_response = resp['message']
+    except:
+        whisper_text = translate(audio)
+        api = ChatGPT(custom_token)
+        resp = api.send_message(whisper_text)
+        api.refresh_auth()  # refresh the authorization token
+        api.reset_conversation()  # reset the conversation
+        gpt_response = resp['message']
+    # to voice
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        coquiTTS.get_tts(whisper_text, fp, speaker = {"language" : language})
+    return whisper_text, gpt_response, fp.name
+# whisper
+def translate(audio):
+    print("""
+    —
+    Sending audio to Whisper ...
+    —
+    """)
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+    mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
+    _, probs = whisper_model.detect_language(mel)
+    transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
+    transcription = whisper.decode(whisper_model, mel, transcript_options)
+    print("language spoken: " + transcription.language)
+    print("transcript: " + transcription.text)
+    print("———————————————————————————————————————————")
+    return transcription.text
 with gr.Blocks() as blocks:
     gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
                 + title
                 + "</h1>")
+    #gr.Markdown(description)
     with gr.Row():# equal_height=False
         with gr.Column():# variant="panel"
+            audio_file = gr.inputs.Audio(source="microphone", type="filepath")
+            custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
             radio = gr.Radio(
                 label="Language",
                 choices=LANGUAGES,
             )
             with gr.Row():# mobile_collapse=False
                 submit = gr.Button("Submit", variant="primary")
+        text1 = gr.Textbox(label="Speech to Text")
+        text2 = gr.Textbox(label="chatGPT response")
         audio = gr.Audio(label="Output", interactive=False)
+    #gr.Markdown(info)
+    #gr.Markdown("<center>"
+    #            +f'<img src={badge} alt="visitors badge"/>'
+    #            +"</center>")
     # actions
     submit.click(
+        chat_hf,
+        [audio_file, custom_token, radio]
+        [text1, text2, audio],
     )
+    radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text1)
+blocks.launch(debug=True)