|
import logging |
|
|
|
import torch |
|
import librosa |
|
import soundfile as sf |
|
|
|
from io import BytesIO |
|
from .config import pipe_tts |
|
|
|
from streamlit_TTS import auto_play, text_to_audio |
|
|
|
SAMPLING_RATE = 16000 |
|
|
|
class T2A: |
|
def __init__(self, input_text: str = None): |
|
self.text = input_text |
|
self.output_model = pipe_tts(input_text) |
|
|
|
def __get_duration(self, raw: bytes): |
|
chunk = BytesIO(raw) |
|
audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE) |
|
duration = librosa.get_duration(y=audio, sr=sample_rate) |
|
return duration |
|
|
|
def autoplay(self, lang: str = "en") -> None: |
|
if self.text is not None: |
|
if isinstance(self.text, str): |
|
audio = text_to_audio(self.text, language=lang) |
|
auto_play(audio) |
|
else: |
|
text = f"Text you provide is {type(self.text)} accepted only string type" |
|
audio = text_to_audio(text, language=language) |
|
auto_play(audio) |
|
else: |
|
raise Exception("Text is None") |
|
|
|
def get_audio(self): |
|
try: |
|
synth = self.output_model["audio"][0] |
|
|
|
print(f"synth : {synth}") |
|
|
|
with BytesIO() as buffer: |
|
sf.write(buffer, synth, SAMPLING_RATE, format='wav') |
|
output = buffer.getvalue() |
|
|
|
print(f"type : {type(output)}") |
|
|
|
duration = self.__get_duration(output) |
|
|
|
print(f"duration : {duration}") |
|
|
|
return output, SAMPLING_RATE, duration |
|
except Exception as e: |
|
logging.error(e) |