|
import time |
|
import os |
|
import json |
|
import tempfile |
|
|
|
import google.cloud.texttospeech as tts |
|
import simpleaudio as sa |
|
|
|
|
|
def get_credentials(): |
|
creds_json_str = os.getenv("GOOGLE") |
|
if creds_json_str is None: |
|
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment") |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp: |
|
temp.write(creds_json_str) |
|
temp_filename = temp.name |
|
|
|
return temp_filename |
|
|
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_credentials() |
|
|
|
class TextToSpeech: |
|
def __init__(self): |
|
self.voice_params = tts.VoiceSelectionParams( |
|
language_code="id-ID", name="id-ID-Standard-A" |
|
) |
|
self.audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16, speaking_rate=1.25) |
|
self.client = tts.TextToSpeechClient() |
|
|
|
def text_to_speech(self, text: str): |
|
|
|
start = time.time() |
|
text_input = tts.SynthesisInput(text=text) |
|
response = self.client.synthesize_speech( |
|
input=text_input, |
|
voice=self.voice_params, |
|
audio_config=self.audio_config, |
|
) |
|
end = time.time() |
|
print(f"Time taken to synthesize speech: {end-start:.2f}s") |
|
|
|
play_obj = sa.play_buffer(response.audio_content, num_channels=1, bytes_per_sample=2, sample_rate=24000) |
|
play_obj.wait_done() |