DSatishchandra commited on
Commit
46c3d84
1 Parent(s): f0ffae0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import gradio as gr
2
- from transformers import pipeline, TFAutoModelForSeq2SeqLM, AutoTokenizer
3
  import torch
4
 
5
  # Initialize Hugging Face pipelines
6
  speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-large")
7
- text_to_speech = pipeline("text-to-speech", model="facebook/tacotron2", device=0) # Set device to CPU (0) or GPU (cuda)
 
 
8
 
9
  # Function to process speech to text and text to speech
10
  def process_audio(input_audio):
@@ -12,7 +14,7 @@ def process_audio(input_audio):
12
  recognized_text = speech_to_text(input_audio)["text"]
13
  print(f"Recognized text: {recognized_text}")
14
 
15
- # Process the text to speech using Tacotron2 model
16
  audio_response = text_to_speech(recognized_text)
17
  return audio_response, recognized_text
18
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import torch
4
 
5
  # Initialize Hugging Face pipelines
6
  speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-large")
7
+
8
+ # Use a valid TTS model for text-to-speech (VITS model from Hugging Face)
9
+ text_to_speech = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits", device=0) # Use CPU or GPU (cuda)
10
 
11
  # Function to process speech to text and text to speech
12
  def process_audio(input_audio):
 
14
  recognized_text = speech_to_text(input_audio)["text"]
15
  print(f"Recognized text: {recognized_text}")
16
 
17
+ # Process the text to speech using the TTS model
18
  audio_response = text_to_speech(recognized_text)
19
  return audio_response, recognized_text
20