Spaces:

Baghdad99
/

ha-en

Sleeping

Baghdad99 commited on Dec 6, 2023

Commit

382ed84

•

1 Parent(s): 3369603

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTextToWaveform
-import torch  # Add the import statement for torch
 # Load your pretrained models
 asr_model = Wav2Vec2ForCTC.from_pretrained("Baghdad99/saad-speech-recognition-hausa-audio-to-text")
@@ -25,7 +25,7 @@ def translate_speech(speech):
     # Transcribe the speech to text
     inputs = asr_processor(audio_signal, return_tensors="pt", padding=True)
     logits = asr_model(inputs.input_values).logits
-    predicted_ids = torch.argmax(logits, dim=-1)  # Add torch module to access argmax function
     transcription = asr_processor.decode(predicted_ids[0])
     # Translate the text
@@ -40,5 +40,5 @@ def translate_speech(speech):
 # Define the Gradio interface
-iface = gr.Interface(fn=translate_speech, inputs=gr.inputs.Audio(source="microphone"), outputs="audio")
 iface.launch()

 import gradio as gr
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTextToWaveform
+import torch
 # Load your pretrained models
 asr_model = Wav2Vec2ForCTC.from_pretrained("Baghdad99/saad-speech-recognition-hausa-audio-to-text")
     # Transcribe the speech to text
     inputs = asr_processor(audio_signal, return_tensors="pt", padding=True)
     logits = asr_model(inputs.input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
     transcription = asr_processor.decode(predicted_ids[0])
     # Translate the text
 # Define the Gradio interface
+iface = gr.Interface(fn=translate_speech, inputs=gr.inputs.Audio(source="microphone", type="numpy"), outputs="audio")
 iface.launch()