Spaces:
Sleeping
Sleeping
feat: add tts
Browse files- .env_template +3 -1
- tts/tts.py +186 -0
.env_template
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
TYPHOON_CHAT_API = *
|
2 |
-
OPENTHAIGPT_CHAT_API = *
|
|
|
|
|
|
1 |
TYPHOON_CHAT_API = *
|
2 |
+
OPENTHAIGPT_CHAT_API = *
|
3 |
+
BOTNOI_API_TOKEN = *
|
4 |
+
VAJA9_API_KEY = *
|
tts/tts.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from pythainlp.tokenize import sent_tokenize
|
3 |
+
from pydantic import BaseModel , Field
|
4 |
+
import requests
|
5 |
+
from fastapi.responses import FileResponse
|
6 |
+
import uuid
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
import os
|
9 |
+
|
10 |
+
# Load environment variables
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
app = FastAPI()
|
14 |
+
|
15 |
+
# Pydantic model for input validation
|
16 |
+
class VoiceRequest(BaseModel):
|
17 |
+
text: str
|
18 |
+
audio_id: str = "EUOJF"
|
19 |
+
speaker: str = "52"
|
20 |
+
volume: int = 100
|
21 |
+
speed: float = 1
|
22 |
+
type_media: str = "mp3"
|
23 |
+
language: str = "th"
|
24 |
+
token: str = os.getenv("BOTNOI_API_TOKEN")
|
25 |
+
|
26 |
+
# Function to split text for text delay
|
27 |
+
def auto_generate_text_delay_with_pythainlp(text):
|
28 |
+
text_delay = sent_tokenize(text, engine="thaisum")
|
29 |
+
text_delay = " ".join(text_delay).strip()
|
30 |
+
return text_delay
|
31 |
+
|
32 |
+
# Function to call Botnoi's API to generate voice
|
33 |
+
def generate_voice(audio_id, text, text_delay, speaker, volume, speed, type_media, language, token):
|
34 |
+
url = "https://api-genvoice.botnoi.ai/voice/v1/generate_voice?provider=botnoivoice"
|
35 |
+
headers = {
|
36 |
+
"Accept": "application/json, text/plain, */*",
|
37 |
+
"Authorization": f"Bearer {token}",
|
38 |
+
"Content-Type": "application/json",
|
39 |
+
}
|
40 |
+
payload = {
|
41 |
+
"audio_id": audio_id,
|
42 |
+
"text": text,
|
43 |
+
"text_delay": text_delay,
|
44 |
+
"speaker": speaker,
|
45 |
+
"volume": str(volume),
|
46 |
+
"speed": str(speed),
|
47 |
+
"type_media": type_media,
|
48 |
+
"language": language,
|
49 |
+
}
|
50 |
+
|
51 |
+
response = requests.post(url, headers=headers, json=payload)
|
52 |
+
if response.status_code == 200:
|
53 |
+
data = response.json()
|
54 |
+
if "data" in data:
|
55 |
+
return data["data"] # URL of the generated audio
|
56 |
+
else:
|
57 |
+
raise HTTPException(status_code=500, detail=data.get("message", "Unknown error"))
|
58 |
+
else:
|
59 |
+
raise HTTPException(status_code=response.status_code, detail="Voice generation failed")
|
60 |
+
|
61 |
+
# Function to download MP3 from a URL
|
62 |
+
def download_mp3(url, output_path):
|
63 |
+
headers = {
|
64 |
+
"Accept-Encoding": "identity;q=1, *;q=0",
|
65 |
+
"Range": "bytes=0-",
|
66 |
+
"Referer": "https://voice.botnoi.ai/",
|
67 |
+
}
|
68 |
+
|
69 |
+
response = requests.get(url, headers=headers, stream=True)
|
70 |
+
if response.status_code == 200:
|
71 |
+
with open(output_path, "wb") as file:
|
72 |
+
for chunk in response.iter_content(chunk_size=1024):
|
73 |
+
if chunk: # Filter out keep-alive chunks
|
74 |
+
file.write(chunk)
|
75 |
+
else:
|
76 |
+
raise HTTPException(status_code=response.status_code, detail="Failed to download MP3")
|
77 |
+
|
78 |
+
# FastAPI endpoint to generate and download voice
|
79 |
+
@app.post("/generate_voice_botnoi/")
|
80 |
+
def generate_voice_endpoint(request: VoiceRequest):
|
81 |
+
text_delay = auto_generate_text_delay_with_pythainlp(request.text)
|
82 |
+
audio_url = generate_voice(
|
83 |
+
audio_id=request.audio_id,
|
84 |
+
text=request.text,
|
85 |
+
text_delay=text_delay,
|
86 |
+
speaker=request.speaker,
|
87 |
+
volume=request.volume,
|
88 |
+
speed=request.speed,
|
89 |
+
type_media=request.type_media,
|
90 |
+
language=request.language,
|
91 |
+
token=request.token,
|
92 |
+
)
|
93 |
+
|
94 |
+
# Generate unique filename for the MP3
|
95 |
+
output_file = f"{uuid.uuid4()}.mp3"
|
96 |
+
download_mp3(audio_url, output_file)
|
97 |
+
|
98 |
+
return FileResponse(output_file, media_type="audio/mpeg", filename="output.mp3")
|
99 |
+
|
100 |
+
# -----------------------------------------------------------VAJA9-----------------------------------------------------------
|
101 |
+
# VAJA9 Voice Generation
|
102 |
+
class Vaja9Request(BaseModel):
|
103 |
+
text: str
|
104 |
+
speaker: int = 1
|
105 |
+
phrase_break: int = 0
|
106 |
+
audiovisual: int = 0
|
107 |
+
|
108 |
+
def split_text_into_chunks(text: str, chunk_size: int = 20) -> list:
|
109 |
+
words = text.split()
|
110 |
+
return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
111 |
+
|
112 |
+
def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual: int):
|
113 |
+
url = 'https://api.aiforthai.in.th/vaja9/synth_audiovisual'
|
114 |
+
headers = {
|
115 |
+
'Apikey': os.getenv("VAJA9_API_KEY"),
|
116 |
+
'Content-Type': 'application/json'
|
117 |
+
}
|
118 |
+
data = {
|
119 |
+
'input_text': text,
|
120 |
+
'speaker': speaker,
|
121 |
+
'phrase_break': phrase_break,
|
122 |
+
'audiovisual': audiovisual
|
123 |
+
}
|
124 |
+
|
125 |
+
try:
|
126 |
+
response = requests.post(url, json=data, headers=headers, timeout=60) # Increased timeout to 60 seconds
|
127 |
+
if response.status_code == 200:
|
128 |
+
return response.json()['wav_url']
|
129 |
+
elif response.status_code == 502:
|
130 |
+
raise HTTPException(status_code=502, detail="Bad Gateway - The server received an invalid response from the upstream server")
|
131 |
+
else:
|
132 |
+
raise HTTPException(status_code=response.status_code, detail="Voice generation failed")
|
133 |
+
except requests.exceptions.ReadTimeout:
|
134 |
+
raise HTTPException(status_code=504, detail="Gateway Timeout - The server took too long to respond")
|
135 |
+
except requests.exceptions.RequestException as e:
|
136 |
+
raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
|
137 |
+
|
138 |
+
def download_vaja9_wav(url: str, output_path: str):
|
139 |
+
headers = {'Apikey': os.getenv("VAJA9_API_KEY")}
|
140 |
+
try:
|
141 |
+
response = requests.get(url, headers=headers, timeout=60) # Increased timeout to 60 seconds
|
142 |
+
if response.status_code == 200:
|
143 |
+
with open(output_path, 'wb') as file:
|
144 |
+
file.write(response.content)
|
145 |
+
elif response.status_code == 502:
|
146 |
+
raise HTTPException(status_code=502, detail="Bad Gateway - The server received an invalid response from the upstream server")
|
147 |
+
else:
|
148 |
+
raise HTTPException(status_code=response.status_code, detail="Failed to download WAV")
|
149 |
+
except requests.exceptions.ReadTimeout:
|
150 |
+
raise HTTPException(status_code=504, detail="Gateway Timeout - The server took too long to respond")
|
151 |
+
except requests.exceptions.RequestException as e:
|
152 |
+
raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
|
153 |
+
|
154 |
+
@app.post("/generate_voice_vaja9/")
|
155 |
+
def generate_voice_vaja9_endpoint(request: Vaja9Request):
|
156 |
+
try:
|
157 |
+
# Split text into chunks of 20 words if needed
|
158 |
+
text_chunks = split_text_into_chunks(request.text)
|
159 |
+
output_files = []
|
160 |
+
|
161 |
+
# Process each chunk
|
162 |
+
for chunk in text_chunks:
|
163 |
+
audio_url = generate_vaja9_voice(
|
164 |
+
text=chunk,
|
165 |
+
speaker=request.speaker,
|
166 |
+
phrase_break=request.phrase_break,
|
167 |
+
audiovisual=request.audiovisual
|
168 |
+
)
|
169 |
+
|
170 |
+
# Generate unique filename for each chunk
|
171 |
+
output_file = f"{uuid.uuid4()}.wav"
|
172 |
+
download_vaja9_wav(audio_url, output_file)
|
173 |
+
output_files.append(output_file)
|
174 |
+
|
175 |
+
# If only one chunk, return it directly
|
176 |
+
if len(output_files) == 1:
|
177 |
+
return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
|
178 |
+
|
179 |
+
# TODO: If multiple chunks, they should be combined into a single audio file
|
180 |
+
# For now, return the first chunk
|
181 |
+
return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
|
182 |
+
|
183 |
+
except HTTPException:
|
184 |
+
raise
|
185 |
+
except Exception as e:
|
186 |
+
raise HTTPException(status_code=502, detail=f"Bad Gateway - Unexpected error: {str(e)}")
|