Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
import numpy as np
|
|
|
4 |
|
5 |
# Load the pipeline for speech recognition and translation
|
6 |
pipe = pipeline(
|
@@ -12,9 +13,13 @@ translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-t
|
|
12 |
tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
|
13 |
|
14 |
# Define the function to translate speech
|
15 |
-
def translate_speech(
|
16 |
-
#
|
17 |
-
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Use the speech recognition pipeline to transcribe the audio
|
20 |
output = pipe(audio_data)
|
@@ -58,7 +63,7 @@ def translate_speech(audio):
|
|
58 |
# Define the Gradio interface
|
59 |
iface = gr.Interface(
|
60 |
fn=translate_speech,
|
61 |
-
inputs=gr.inputs.Audio(
|
62 |
outputs=gr.outputs.Audio(type="numpy"),
|
63 |
title="Hausa to English Translation",
|
64 |
description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
import numpy as np
|
4 |
+
from pydub import AudioSegment
|
5 |
|
6 |
# Load the pipeline for speech recognition and translation
|
7 |
pipe = pipeline(
|
|
|
13 |
tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
|
14 |
|
15 |
# Define the function to translate speech
|
16 |
+
def translate_speech(audio_file):
|
17 |
+
# Load the audio file with pydub
|
18 |
+
audio = AudioSegment.from_mp3(audio_file.name)
|
19 |
+
|
20 |
+
# Convert the audio to mono and get the raw data
|
21 |
+
audio = audio.set_channels(1)
|
22 |
+
audio_data = np.array(audio.get_array_of_samples())
|
23 |
|
24 |
# Use the speech recognition pipeline to transcribe the audio
|
25 |
output = pipe(audio_data)
|
|
|
63 |
# Define the Gradio interface
|
64 |
iface = gr.Interface(
|
65 |
fn=translate_speech,
|
66 |
+
inputs=gr.inputs.Audio(type="file"), # Change this line
|
67 |
outputs=gr.outputs.Audio(type="numpy"),
|
68 |
title="Hausa to English Translation",
|
69 |
description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
|