import base64 import os from typing import Literal import requests from gtts import gTTS def text_to_speech( text, language: Literal["de", "en"] = "de", save_path: str = "output.mp3" ): tts = gTTS(text=text, lang=language, slow=False) tts.save(save_path) LANG_TO_VOICE_MAPPING = { "de": "Vicki", "en": "Joanna", } POLLY_URL = os.environ["POLLY_URL"] POLLY_KEY = os.environ["POLLY_KEY"] def text_to_speech_polly( text, language: Literal["de", "en"] = "de", save_path: str = "output.mp3" ): json_data = { "text": text, "voice": LANG_TO_VOICE_MAPPING.get(language, "Joanna"), "prefered_engine": "neural", "code": POLLY_KEY, } response = requests.post(POLLY_URL, json=json_data) try: response.raise_for_status() except requests.exceptions.HTTPError as error: print(error) print(response.text) return binary_data = base64.b64decode(response.content) with open(save_path, "wb") as f: f.write(binary_data)