Spaces:
Runtime error
Runtime error
Commit
·
dcb549e
1
Parent(s):
341a129
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
import numpy as np
|
4 |
import whisper
|
5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
from gtts import gTTS
|
@@ -13,12 +13,9 @@ tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
|
|
13 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
14 |
|
15 |
def translate_speech(audio, target_lang):
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
audio = [audio]
|
20 |
-
audio = np.array(audio).astype("float32") # Convert audio to float32
|
21 |
-
audio = whisper.pad_or_trim(audio, whisper_model.audio_config.sample_rate)
|
22 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
23 |
_, probs = whisper_model.detect_language(mel)
|
24 |
options = whisper.DecodingOptions(fp16=False)
|
@@ -39,8 +36,6 @@ def translate_speech(audio, target_lang):
|
|
39 |
return audio_path
|
40 |
|
41 |
|
42 |
-
|
43 |
-
|
44 |
def translate_speech_interface(audio, target_lang):
|
45 |
translated_audio = translate_speech(audio, target_lang)
|
46 |
translated_audio_bytes = open(translated_audio, "rb").read()
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
import whisper
|
5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
from gtts import gTTS
|
|
|
13 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
14 |
|
15 |
def translate_speech(audio, target_lang):
|
16 |
+
audio = audio[0].astype("float32") # Extract audio from tuple and convert to float32
|
17 |
+
sample_rate = whisper.sample_rate # Get sample rate from whisper_model
|
18 |
+
audio = whisper.pad_or_trim(audio, sample_rate)
|
|
|
|
|
|
|
19 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
20 |
_, probs = whisper_model.detect_language(mel)
|
21 |
options = whisper.DecodingOptions(fp16=False)
|
|
|
36 |
return audio_path
|
37 |
|
38 |
|
|
|
|
|
39 |
def translate_speech_interface(audio, target_lang):
|
40 |
translated_audio = translate_speech(audio, target_lang)
|
41 |
translated_audio_bytes = open(translated_audio, "rb").read()
|