Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTextToWaveform
|
3 |
-
import torch
|
4 |
|
5 |
# Load your pretrained models
|
6 |
asr_model = Wav2Vec2ForCTC.from_pretrained("Baghdad99/saad-speech-recognition-hausa-audio-to-text")
|
@@ -25,7 +25,7 @@ def translate_speech(speech):
|
|
25 |
# Transcribe the speech to text
|
26 |
inputs = asr_processor(audio_signal, return_tensors="pt", padding=True)
|
27 |
logits = asr_model(inputs.input_values).logits
|
28 |
-
predicted_ids = torch.argmax(logits, dim=-1)
|
29 |
transcription = asr_processor.decode(predicted_ids[0])
|
30 |
|
31 |
# Translate the text
|
@@ -40,5 +40,5 @@ def translate_speech(speech):
|
|
40 |
|
41 |
|
42 |
# Define the Gradio interface
|
43 |
-
iface = gr.Interface(fn=translate_speech, inputs=gr.inputs.Audio(source="microphone"), outputs="audio")
|
44 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTextToWaveform
|
3 |
+
import torch
|
4 |
|
5 |
# Load your pretrained models
|
6 |
asr_model = Wav2Vec2ForCTC.from_pretrained("Baghdad99/saad-speech-recognition-hausa-audio-to-text")
|
|
|
25 |
# Transcribe the speech to text
|
26 |
inputs = asr_processor(audio_signal, return_tensors="pt", padding=True)
|
27 |
logits = asr_model(inputs.input_values).logits
|
28 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
29 |
transcription = asr_processor.decode(predicted_ids[0])
|
30 |
|
31 |
# Translate the text
|
|
|
40 |
|
41 |
|
42 |
# Define the Gradio interface
|
43 |
+
iface = gr.Interface(fn=translate_speech, inputs=gr.inputs.Audio(source="microphone", type="numpy"), outputs="audio")
|
44 |
iface.launch()
|