File size: 1,551 Bytes
9b0d264 4bb9300 3a802c4 4bb9300 9b0d264 0fb503b 9b0d264 4bb9300 d2150bd 4bb9300 0fb503b 4bb9300 6298db6 4ac82ef 6298db6 d2150bd 5e83c71 9b0d264 0fb503b 06fe464 0fb503b 06fe464 4bb9300 0fb503b 4bb9300 6298db6 b3fbe5e 6298db6 06fe464 6298db6 9b0d264 b3fbe5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import logging
import torch
import librosa
import soundfile as sf
from io import BytesIO
from .config import pipe_tts
from streamlit_TTS import auto_play, text_to_audio
SAMPLING_RATE = 16000
class T2A:
def __init__(self, input_text: str):
self.output_model = pipe_tts(input_text)
def __get_duration(self, raw: bytes):
chunk = BytesIO(raw)
audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
duration = librosa.get_duration(y=audio, sr=sample_rate)
return duration
def autoplay(self, text: str, language: str = "en") -> None:
if text is not None:
if isinstance(text, str):
audio = text_to_audio(text, language=language)
auto_play(audio)
else:
text = f"Text you provide is {type(text)} accepted only string type"
audio = text_to_audio(text, language=language)
auto_play(audio)
else:
raise Exception("Text is None")
def get_audio(self):
try:
synth = self.output_model["audio"][0]
print(f"synth : {synth}")
with BytesIO() as buffer:
sf.write(buffer, synth, SAMPLING_RATE, format='wav')
output = buffer.getvalue() # bytes
print(f"type : {type(output)}")
duration = self.__get_duration(output)
print(f"duration : {duration}")
return output, SAMPLING_RATE, duration
except Exception as e:
logging.error(e) |