import logging import torch import librosa import soundfile as sf from io import BytesIO from .config import pipe_tts from streamlit_TTS import auto_play, text_to_audio SAMPLING_RATE = 16000 class T2A: def __init__(self, input_text: str): self.output_model = pipe_tts(input_text) def __get_duration(self, raw: bytes): chunk = BytesIO(raw) audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE) duration = librosa.get_duration(y=audio, sr=sample_rate) return duration def autoplay(self, text: str, language: str = "en") -> None: if text is not None: if isinstance(text, str): audio = text_to_audio(text, language=language) auto_play(audio) else: text = f"Text you provide is {type(text)} accepted only string type" audio = text_to_audio(text, language=language) auto_play(audio) else: raise Exception("Text is None") def get_audio(self): try: synth = self.output_model["audio"][0] print(f"synth : {synth}") with BytesIO() as buffer: sf.write(buffer, synth, SAMPLING_RATE, format='wav') output = buffer.getvalue() # bytes print(f"type : {type(output)}") duration = self.__get_duration(output) print(f"duration : {duration}") return output, SAMPLING_RATE, duration except Exception as e: logging.error(e)