Spaces:

CineAI
/

Chelsea

Sleeping

File size: 1,001 Bytes

9b0d264
 
4bb9300
 
9b0d264
0fb503b
9b0d264
4bb9300
 
 
 
 
0fb503b
4bb9300
6298db6
 
 
 
 
 
5e83c71
9b0d264
0fb503b
06fe464
0fb503b
06fe464
4bb9300
0fb503b
4bb9300
 
6298db6
 
b3fbe5e
6298db6
 
06fe464
6298db6
9b0d264
b3fbe5e

import logging

import torch
import soundfile as sf

from io import BytesIO
from .config import pipe_tts

SAMPLING_RATE = 16000

class T2A:
    def __init__(self, input_text: str):
        self.output_model = pipe_tts(input_text)

    def __get_duration(self, raw: bytes):
        chunk = io.BytesIO(raw)
        audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
        duration = librosa.get_duration(y=audio, sr=sample_rate)
        return duration

    def get_audio(self):
        try:
            synth = self.output_model["audio"][0]

            print(f"synth : {synth}")

            with BytesIO() as buffer:
                sf.write(buffer, synth, SAMPLING_RATE, format='wav')
                output = buffer.getvalue() # bytes

            print(f"type : {type(output)}")

            duration = self.__get_duration(output)

            print(f"duration : {duration}")

            return output, SAMPLING_RATE, duration
        except Exception as e:
            logging.error(e)