|
import os |
|
|
|
import pytest |
|
from tests.utils import wrap_test_forked |
|
from src.tts_sentence_parsing import init_sentence_state |
|
from tests.test_sentence_parsing import bot_list |
|
|
|
|
|
@pytest.mark.audio |
|
@wrap_test_forked |
|
def test_sentence_to_wave(): |
|
os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7' |
|
from src.tts_coqui import sentence_to_wave, get_xtt, get_latent, get_role_to_wave_map |
|
|
|
chatbot_role = "Female AI Assistant" |
|
sentence = "I am an AI assistant. I can help you with any tasks." |
|
|
|
tts_speed = 1.0 |
|
model, supported_languages = get_xtt() |
|
latent = get_latent(get_role_to_wave_map()[chatbot_role], model=model) |
|
generated_speech = sentence_to_wave(sentence, |
|
supported_languages, |
|
tts_speed, |
|
latent=latent, |
|
model=model, |
|
return_as_byte=False, |
|
return_nonbyte_as_file=True, |
|
return_gradio=False) |
|
print(generated_speech, flush=True) |
|
|
|
|
|
import wave |
|
with wave.open(generated_speech, mode='rb') as f: |
|
pass |
|
|
|
|
|
@pytest.mark.audio |
|
@wrap_test_forked |
|
def test_generate_speech(): |
|
os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7' |
|
from src.tts_coqui import generate_speech, get_xtt, get_latent, get_role_to_wave_map |
|
|
|
chatbot_role = "Female AI Assistant" |
|
model, supported_languages = get_xtt() |
|
latent = get_latent(get_role_to_wave_map()[chatbot_role], model=model) |
|
|
|
response = 'I am an AI assistant. What do you want from me? I am very busy.' |
|
for char in response: |
|
generate_speech(char, model=model, supported_languages=supported_languages, latent=latent) |
|
|
|
|
|
@pytest.mark.audio |
|
@wrap_test_forked |
|
def test_full_generate_speech(): |
|
os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7' |
|
from src.tts_coqui import generate_speech, get_xtt, get_latent, get_role_to_wave_map |
|
bot = 'I am an AI assistant. What do you want from me? I am very busy.' |
|
|
|
def response_gen(): |
|
for word1 in bot.split(' '): |
|
yield word1 |
|
|
|
chatbot_role = "Female AI Assistant" |
|
model, supported_languages = get_xtt() |
|
latent = get_latent(get_role_to_wave_map()[chatbot_role], model=model) |
|
|
|
response = "" |
|
sentence_state = init_sentence_state() |
|
|
|
sentences = [] |
|
audios = [] |
|
sentences_expected = ['I am an AI assistant.', 'What do you want from me?', 'I am very busy.'] |
|
for word in response_gen(): |
|
response += word + ' ' |
|
audio, sentence, sentence_state = \ |
|
generate_speech(response, |
|
model=model, |
|
supported_languages=supported_languages, |
|
latent=latent, |
|
sentence_state=sentence_state, |
|
return_as_byte=False, |
|
return_nonbyte_as_file=True, |
|
return_gradio=False, |
|
is_final=False, verbose=True) |
|
if sentence is not None: |
|
print(sentence) |
|
sentences.append(sentence) |
|
if audio is not None: |
|
audios.append(audio) |
|
audio, sentence, sentence_state = \ |
|
generate_speech(response, |
|
model=model, |
|
supported_languages=supported_languages, |
|
latent=latent, |
|
sentence_state=sentence_state, |
|
return_as_byte=False, |
|
return_nonbyte_as_file=True, |
|
return_gradio=False, |
|
is_final=True, verbose=True) |
|
if sentence is not None: |
|
print(sentence) |
|
sentences.append(sentence) |
|
if audio is not None: |
|
audios.append(audio) |
|
assert sentences == sentences_expected |
|
assert len(sentences) == len(audios) |
|
print(audios) |
|
|
|
|
|
@pytest.mark.audio |
|
@wrap_test_forked |
|
@pytest.mark.parametrize("bot, sentences_expected", bot_list) |
|
def test_predict_from_text(bot, sentences_expected): |
|
speeches = [] |
|
from src.tts import get_tts_model, get_speakers |
|
processor, model, vocoder = get_tts_model() |
|
speaker = get_speakers()[0] |
|
tts_speed = 1.0 |
|
|
|
from src.tts import predict_from_text |
|
for audio in predict_from_text(bot, speaker, tts_speed, |
|
processor=processor, model=model, vocoder=vocoder, |
|
return_as_byte=False, |
|
verbose=True): |
|
if audio[1].shape[0] > 0: |
|
speeches.append(audio) |
|
assert len(speeches) == len(sentences_expected) |
|
|