Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,8 @@ tts_tokenizer = AutoTokenizer.from_pretrained("Baghdad99/english_voice_tts")
|
|
12 |
tts_model = AutoModelForTextToWaveform.from_pretrained("Baghdad99/english_voice_tts")
|
13 |
|
14 |
# Define the translation and synthesis functions
|
15 |
-
def translate(audio_signal):
|
16 |
-
inputs = asr_processor(audio_signal, return_tensors="pt", padding=True)
|
17 |
logits = asr_model(inputs.input_values).logits
|
18 |
predicted_ids = torch.argmax(logits, dim=-1)
|
19 |
transcription = asr_processor.decode(predicted_ids[0])
|
@@ -26,8 +26,8 @@ def synthesise(translated_text):
|
|
26 |
audio = tts_model.generate(inputs['input_ids'])
|
27 |
return audio
|
28 |
|
29 |
-
def translate_speech(audio):
|
30 |
-
translated_text = translate(audio)
|
31 |
synthesised_speech = synthesise(translated_text)
|
32 |
synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
|
33 |
return 16000, synthesised_speech
|
|
|
12 |
tts_model = AutoModelForTextToWaveform.from_pretrained("Baghdad99/english_voice_tts")
|
13 |
|
14 |
# Define the translation and synthesis functions
|
15 |
+
def translate(audio_signal, sampling_rate):
|
16 |
+
inputs = asr_processor(audio_signal, return_tensors="pt", padding=True, sampling_rate=sampling_rate)
|
17 |
logits = asr_model(inputs.input_values).logits
|
18 |
predicted_ids = torch.argmax(logits, dim=-1)
|
19 |
transcription = asr_processor.decode(predicted_ids[0])
|
|
|
26 |
audio = tts_model.generate(inputs['input_ids'])
|
27 |
return audio
|
28 |
|
29 |
+
def translate_speech(audio, sampling_rate):
|
30 |
+
translated_text = translate(audio, sampling_rate)
|
31 |
synthesised_speech = synthesise(translated_text)
|
32 |
synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
|
33 |
return 16000, synthesised_speech
|