CineAI's picture
Update audio_processing/T2A.py
3a802c4 verified
raw
history blame
1.01 kB
import logging
import torch
import librosa
import soundfile as sf
from io import BytesIO
from .config import pipe_tts
SAMPLING_RATE = 16000
class T2A:
def __init__(self, input_text: str):
self.output_model = pipe_tts(input_text)
def __get_duration(self, raw: bytes):
chunk = BytesIO(raw)
audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
duration = librosa.get_duration(y=audio, sr=sample_rate)
return duration
def get_audio(self):
try:
synth = self.output_model["audio"][0]
print(f"synth : {synth}")
with BytesIO() as buffer:
sf.write(buffer, synth, SAMPLING_RATE, format='wav')
output = buffer.getvalue() # bytes
print(f"type : {type(output)}")
duration = self.__get_duration(output)
print(f"duration : {duration}")
return output, SAMPLING_RATE, duration
except Exception as e:
logging.error(e)