import logging

import torch
import librosa
import soundfile as sf

from io import BytesIO
from .config import pipe_tts

from streamlit_TTS import auto_play, text_to_audio

SAMPLING_RATE = 16000

class T2A:
    def __init__(self, input_text: str):
        self.output_model = pipe_tts(input_text)

    def __get_duration(self, raw: bytes):
        chunk = BytesIO(raw)
        audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
        duration = librosa.get_duration(y=audio, sr=sample_rate)
        return duration

    def autoplay(self, text: str, language: str = "en") -> None:
        if text is not None:
            if isinstance(text, str):
                audio = text_to_audio(text, language=language)
                auto_play(audio)
            else:
                text = f"Text you provide is {type(text)} accepted only string type"
                audio = text_to_audio(text, language=language)
                auto_play(audio)
        else:
            raise Exception("Text is None")

    def get_audio(self):
        try:
            synth = self.output_model["audio"][0]

            print(f"synth : {synth}")

            with BytesIO() as buffer:
                sf.write(buffer, synth, SAMPLING_RATE, format='wav')
                output = buffer.getvalue() # bytes

            print(f"type : {type(output)}")

            duration = self.__get_duration(output)

            print(f"duration : {duration}")

            return output, SAMPLING_RATE, duration
        except Exception as e:
            logging.error(e)