mrfakename commited on
Commit
0cc615c
1 Parent(s): ab9dfa8

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. app.py +45 -10
app.py CHANGED
@@ -540,15 +540,19 @@ Have a conversation with an AI using your reference voice!
540
  chatbot_interface = gr.Chatbot(label="Conversation")
541
 
542
  with gr.Row():
543
- with gr.Column():
544
- audio_output_chat = gr.Audio(autoplay=True)
545
  with gr.Column():
546
  audio_input_chat = gr.Microphone(
547
  label="Speak your message",
548
  type="filepath",
549
  )
550
-
551
- clear_btn_chat = gr.Button("Clear Conversation")
 
 
 
 
 
 
552
 
553
  conversation_state = gr.State(
554
  value=[
@@ -561,13 +565,14 @@ Have a conversation with an AI using your reference voice!
561
 
562
  # Modify process_audio_input to use model and tokenizer from state
563
  @gpu_decorator
564
- def process_audio_input(audio_path, history, conv_state):
565
- """Handle audio input from user"""
566
- if not audio_path:
 
567
  return history, conv_state, ""
568
 
569
- text = ""
570
- text = preprocess_ref_audio_text(audio_path, text, clip_short=False)[1]
571
 
572
  if not text.strip():
573
  return history, conv_state, ""
@@ -621,7 +626,7 @@ Have a conversation with an AI using your reference voice!
621
  # Handle audio input
622
  audio_input_chat.stop_recording(
623
  process_audio_input,
624
- inputs=[audio_input_chat, chatbot_interface, conversation_state],
625
  outputs=[chatbot_interface, conversation_state],
626
  ).then(
627
  generate_audio_response,
@@ -633,6 +638,36 @@ Have a conversation with an AI using your reference voice!
633
  audio_input_chat,
634
  )
635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
  # Handle clear button
637
  clear_btn_chat.click(
638
  clear_conversation,
 
540
  chatbot_interface = gr.Chatbot(label="Conversation")
541
 
542
  with gr.Row():
 
 
543
  with gr.Column():
544
  audio_input_chat = gr.Microphone(
545
  label="Speak your message",
546
  type="filepath",
547
  )
548
+ audio_output_chat = gr.Audio(autoplay=True)
549
+ with gr.Column():
550
+ text_input_chat = gr.Textbox(
551
+ label="Type your message",
552
+ lines=1,
553
+ )
554
+ send_btn_chat = gr.Button("Send")
555
+ clear_btn_chat = gr.Button("Clear Conversation")
556
 
557
  conversation_state = gr.State(
558
  value=[
 
565
 
566
  # Modify process_audio_input to use model and tokenizer from state
567
  @gpu_decorator
568
+ def process_audio_input(audio_path, text, history, conv_state):
569
+ """Handle audio or text input from user"""
570
+
571
+ if not audio_path and not text.strip():
572
  return history, conv_state, ""
573
 
574
+ if audio_path:
575
+ text = preprocess_ref_audio_text(audio_path, text)[1]
576
 
577
  if not text.strip():
578
  return history, conv_state, ""
 
626
  # Handle audio input
627
  audio_input_chat.stop_recording(
628
  process_audio_input,
629
+ inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
630
  outputs=[chatbot_interface, conversation_state],
631
  ).then(
632
  generate_audio_response,
 
638
  audio_input_chat,
639
  )
640
 
641
+ # Handle text input
642
+ text_input_chat.submit(
643
+ process_audio_input,
644
+ inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
645
+ outputs=[chatbot_interface, conversation_state],
646
+ ).then(
647
+ generate_audio_response,
648
+ inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
649
+ outputs=[audio_output_chat],
650
+ ).then(
651
+ lambda: None,
652
+ None,
653
+ text_input_chat,
654
+ )
655
+
656
+ # Handle send button
657
+ send_btn_chat.click(
658
+ process_audio_input,
659
+ inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
660
+ outputs=[chatbot_interface, conversation_state],
661
+ ).then(
662
+ generate_audio_response,
663
+ inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
664
+ outputs=[audio_output_chat],
665
+ ).then(
666
+ lambda: None,
667
+ None,
668
+ text_input_chat,
669
+ )
670
+
671
  # Handle clear button
672
  clear_btn_chat.click(
673
  clear_conversation,