Spaces:
Sleeping
Sleeping
fix tts
Browse files- .env_template +1 -0
- .gitignore +3 -1
- poetry.lock +51 -1
- pyproject.toml +1 -0
- tts/tts.py +52 -35
.env_template
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
TYPHOON_CHAT_API = *
|
|
|
2 |
DEBUG_MODE = false
|
3 |
OPENTHAIGPT_CHAT_API = *
|
4 |
BOTNOI_API_TOKEN = *
|
|
|
1 |
TYPHOON_CHAT_API = *
|
2 |
+
GROQ_CHAT_KEY = *
|
3 |
DEBUG_MODE = false
|
4 |
OPENTHAIGPT_CHAT_API = *
|
5 |
BOTNOI_API_TOKEN = *
|
.gitignore
CHANGED
@@ -8,4 +8,6 @@ __pycache__
|
|
8 |
|
9 |
# Files
|
10 |
speedtest.py
|
11 |
-
*.csv
|
|
|
|
|
|
8 |
|
9 |
# Files
|
10 |
speedtest.py
|
11 |
+
*.csv
|
12 |
+
*.wav
|
13 |
+
*.mp3
|
poetry.lock
CHANGED
@@ -1994,6 +1994,56 @@ files = [
|
|
1994 |
[package.extras]
|
1995 |
windows-terminal = ["colorama (>=0.4.6)"]
|
1996 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1997 |
[[package]]
|
1998 |
name = "python-dateutil"
|
1999 |
version = "2.9.0.post0"
|
@@ -2820,4 +2870,4 @@ propcache = ">=0.2.0"
|
|
2820 |
[metadata]
|
2821 |
lock-version = "2.0"
|
2822 |
python-versions = "^3.10"
|
2823 |
-
content-hash = "
|
|
|
1994 |
[package.extras]
|
1995 |
windows-terminal = ["colorama (>=0.4.6)"]
|
1996 |
|
1997 |
+
[[package]]
|
1998 |
+
name = "pythainlp"
|
1999 |
+
version = "5.0.4"
|
2000 |
+
description = "Thai Natural Language Processing library"
|
2001 |
+
optional = false
|
2002 |
+
python-versions = ">=3.7"
|
2003 |
+
files = [
|
2004 |
+
{file = "pythainlp-5.0.4-py3-none-any.whl", hash = "sha256:5f036d558f673215c142c6e32fd38b111d674d94d64f1b03b409fdfed0fa8dcd"},
|
2005 |
+
{file = "pythainlp-5.0.4.tar.gz", hash = "sha256:2cd8e088d722617c6065225fffbaf2522bc20b8a3eff5bd2bcb251c40eccdce0"},
|
2006 |
+
]
|
2007 |
+
|
2008 |
+
[package.dependencies]
|
2009 |
+
requests = ">=2.22.0"
|
2010 |
+
tzdata = {version = "*", markers = "sys_platform == \"win32\""}
|
2011 |
+
|
2012 |
+
[package.extras]
|
2013 |
+
abbreviation = ["khamyo (>=0.2.0)"]
|
2014 |
+
attacut = ["attacut (>=1.0.6)"]
|
2015 |
+
benchmarks = ["PyYAML (>=5.3.1)", "numpy (>=1.22)", "pandas (>=0.24)"]
|
2016 |
+
coreference-resolution = ["fastcoref (>=2.1.5)", "spacy (>=3.0)"]
|
2017 |
+
dependency-parsing = ["spacy-thai (>=0.7.1)", "transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)"]
|
2018 |
+
el = ["multiel (>=0.5)"]
|
2019 |
+
esupar = ["esupar (>=1.3.8)", "numpy", "transformers (>=4.22.1)"]
|
2020 |
+
full = ["PyYAML (>=5.3.1)", "attacut (>=1.0.4)", "bpemb (>=0.3.2)", "emoji (>=0.5.1)", "epitran (>=1.1)", "fairseq (>=0.10.0)", "fastai (<2.0)", "fastcoref (>=2.1.5)", "gensim (>=4.0.0)", "khamyo (>=0.2.0)", "nlpo3 (>=1.2.2)", "nltk (>=3.3)", "numpy (>=1.22)", "onnxruntime (>=1.10.0)", "oskut (>=1.3)", "pandas (>=0.24)", "panphon (>=0.20.0)", "phunspell (>=0.1.6)", "pyicu (>=2.3)", "sacremoses (>=0.0.41)", "sefr-cut (>=1.1)", "sentence-transformers (>=2.2.2)", "sentencepiece (>=0.1.91)", "spacy (>=3.0)", "spacy-thai (>=0.7.1)", "spylls (>=0.1.5)", "ssg (>=0.0.8)", "symspellpy (>=6.7.6)", "thai-nner", "torch (>=1.0.0)", "transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)", "wtpsplit (>=1.0.1)", "wunsen (>=0.0.3)"]
|
2021 |
+
generate = ["fastai (<2.0)"]
|
2022 |
+
icu = ["pyicu (>=2.3)"]
|
2023 |
+
ipa = ["epitran (>=1.1)"]
|
2024 |
+
ml = ["numpy (>=1.22)", "torch (>=1.0.0)"]
|
2025 |
+
mt5 = ["sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
|
2026 |
+
nlpo3 = ["nlpo3 (>=1.2.2)"]
|
2027 |
+
onnx = ["numpy (>=1.22)", "onnxruntime (>=1.10.0)", "sentencepiece (>=0.1.91)"]
|
2028 |
+
oskut = ["oskut (>=1.3)"]
|
2029 |
+
sefr-cut = ["sefr-cut (>=1.1)"]
|
2030 |
+
spacy-thai = ["spacy-thai (>=0.7.1)"]
|
2031 |
+
spell = ["phunspell (>=0.1.6)", "spylls (>=0.1.5)", "symspellpy (>=6.7.6)"]
|
2032 |
+
ssg = ["ssg (>=0.0.8)"]
|
2033 |
+
textaugment = ["bpemb", "gensim (>=4.0.0)"]
|
2034 |
+
thai-nner = ["thai-nner"]
|
2035 |
+
thai2fit = ["emoji (>=0.5.1)", "gensim (>=4.0.0)", "numpy (>=1.22)"]
|
2036 |
+
thai2rom = ["numpy (>=1.22)", "torch (>=1.0.0)"]
|
2037 |
+
transformers-ud = ["transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)"]
|
2038 |
+
translate = ["fairseq (>=0.10.0)", "sacremoses (>=0.0.41)", "sentencepiece (>=0.1.91)", "torch (>=1.0.0)", "transformers (>=4.6.0)"]
|
2039 |
+
wangchanberta = ["sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
|
2040 |
+
wangchanglm = ["pandas (>=0.24)", "sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
|
2041 |
+
word-approximation = ["panphon (>=0.20.0)"]
|
2042 |
+
wordnet = ["nltk (>=3.3)"]
|
2043 |
+
wsd = ["sentence-transformers (>=2.2.2)"]
|
2044 |
+
wtp = ["transformers (>=4.6.0)", "wtpsplit (>=1.0.1)"]
|
2045 |
+
wunsen = ["wunsen (>=0.0.1)"]
|
2046 |
+
|
2047 |
[[package]]
|
2048 |
name = "python-dateutil"
|
2049 |
version = "2.9.0.post0"
|
|
|
2870 |
[metadata]
|
2871 |
lock-version = "2.0"
|
2872 |
python-versions = "^3.10"
|
2873 |
+
content-hash = "e0d24b26c313871a6ec8a9d37d495f8a11ca13efa6e2e82cfa872f77935955e9"
|
pyproject.toml
CHANGED
@@ -16,6 +16,7 @@ fastapi = "^0.115.5"
|
|
16 |
uvicorn = "^0.32.0"
|
17 |
gradio = "^5.8.0"
|
18 |
langchain-groq = "^0.2.1"
|
|
|
19 |
|
20 |
|
21 |
[build-system]
|
|
|
16 |
uvicorn = "^0.32.0"
|
17 |
gradio = "^5.8.0"
|
18 |
langchain-groq = "^0.2.1"
|
19 |
+
pythainlp = "^5.0.4"
|
20 |
|
21 |
|
22 |
[build-system]
|
tts/tts.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from pythainlp.tokenize import sent_tokenize
|
3 |
from pydantic import BaseModel , Field
|
@@ -7,6 +8,8 @@ import uuid
|
|
7 |
from dotenv import load_dotenv
|
8 |
import os
|
9 |
|
|
|
|
|
10 |
# Load environment variables
|
11 |
load_dotenv()
|
12 |
|
@@ -62,8 +65,8 @@ def generate_voice(audio_id, text, text_delay, speaker, volume, speed, type_medi
|
|
62 |
def download_mp3(url, output_path):
|
63 |
headers = {
|
64 |
"Accept-Encoding": "identity;q=1, *;q=0",
|
65 |
-
"Range": "bytes=0-",
|
66 |
"Referer": "https://voice.botnoi.ai/",
|
|
|
67 |
}
|
68 |
|
69 |
response = requests.get(url, headers=headers, stream=True)
|
@@ -73,12 +76,15 @@ def download_mp3(url, output_path):
|
|
73 |
if chunk: # Filter out keep-alive chunks
|
74 |
file.write(chunk)
|
75 |
else:
|
76 |
-
raise HTTPException(status_code=response.status_code, detail="Failed to download MP3")
|
77 |
|
78 |
# FastAPI endpoint to generate and download voice
|
79 |
@app.post("/generate_voice_botnoi/")
|
80 |
def generate_voice_endpoint(request: VoiceRequest):
|
81 |
-
|
|
|
|
|
|
|
82 |
audio_url = generate_voice(
|
83 |
audio_id=request.audio_id,
|
84 |
text=request.text,
|
@@ -92,7 +98,10 @@ def generate_voice_endpoint(request: VoiceRequest):
|
|
92 |
)
|
93 |
|
94 |
# Generate unique filename for the MP3
|
95 |
-
|
|
|
|
|
|
|
96 |
download_mp3(audio_url, output_file)
|
97 |
|
98 |
return FileResponse(output_file, media_type="audio/mpeg", filename="output.mp3")
|
@@ -105,10 +114,6 @@ class Vaja9Request(BaseModel):
|
|
105 |
phrase_break: int = 0
|
106 |
audiovisual: int = 0
|
107 |
|
108 |
-
def split_text_into_chunks(text: str, chunk_size: int = 20) -> list:
|
109 |
-
words = text.split()
|
110 |
-
return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
111 |
-
|
112 |
def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual: int):
|
113 |
url = 'https://api.aiforthai.in.th/vaja9/synth_audiovisual'
|
114 |
headers = {
|
@@ -136,7 +141,10 @@ def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual
|
|
136 |
raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
|
137 |
|
138 |
def download_vaja9_wav(url: str, output_path: str):
|
139 |
-
|
|
|
|
|
|
|
140 |
try:
|
141 |
response = requests.get(url, headers=headers, timeout=60) # Increased timeout to 60 seconds
|
142 |
if response.status_code == 200:
|
@@ -153,34 +161,43 @@ def download_vaja9_wav(url: str, output_path: str):
|
|
153 |
|
154 |
@app.post("/generate_voice_vaja9/")
|
155 |
def generate_voice_vaja9_endpoint(request: Vaja9Request):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
try:
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
160 |
|
161 |
-
#
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
phrase_break=request.phrase_break,
|
167 |
-
audiovisual=request.audiovisual
|
168 |
-
)
|
169 |
-
|
170 |
-
# Generate unique filename for each chunk
|
171 |
-
output_file = f"{uuid.uuid4()}.wav"
|
172 |
-
download_vaja9_wav(audio_url, output_file)
|
173 |
-
output_files.append(output_file)
|
174 |
|
175 |
-
|
176 |
-
if len(output_files) == 1:
|
177 |
-
return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
|
178 |
-
|
179 |
-
# TODO: If multiple chunks, they should be combined into a single audio file
|
180 |
-
# For now, return the first chunk
|
181 |
-
return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
|
182 |
|
183 |
-
except HTTPException:
|
184 |
-
raise
|
185 |
except Exception as e:
|
186 |
-
raise HTTPException(status_code=502, detail=f"Bad Gateway - Unexpected error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
from fastapi import FastAPI, HTTPException
|
3 |
from pythainlp.tokenize import sent_tokenize
|
4 |
from pydantic import BaseModel , Field
|
|
|
8 |
from dotenv import load_dotenv
|
9 |
import os
|
10 |
|
11 |
+
import uvicorn
|
12 |
+
|
13 |
# Load environment variables
|
14 |
load_dotenv()
|
15 |
|
|
|
65 |
def download_mp3(url, output_path):
|
66 |
headers = {
|
67 |
"Accept-Encoding": "identity;q=1, *;q=0",
|
|
|
68 |
"Referer": "https://voice.botnoi.ai/",
|
69 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
70 |
}
|
71 |
|
72 |
response = requests.get(url, headers=headers, stream=True)
|
|
|
76 |
if chunk: # Filter out keep-alive chunks
|
77 |
file.write(chunk)
|
78 |
else:
|
79 |
+
raise HTTPException(status_code=response.status_code, detail=f"{response.status_code} Failed to download MP3")
|
80 |
|
81 |
# FastAPI endpoint to generate and download voice
|
82 |
@app.post("/generate_voice_botnoi/")
|
83 |
def generate_voice_endpoint(request: VoiceRequest):
|
84 |
+
|
85 |
+
text_delay = request.text
|
86 |
+
# text_delay = auto_generate_text_delay_with_pythainlp(request.text)
|
87 |
+
print("Text delay:", text_delay)
|
88 |
audio_url = generate_voice(
|
89 |
audio_id=request.audio_id,
|
90 |
text=request.text,
|
|
|
98 |
)
|
99 |
|
100 |
# Generate unique filename for the MP3
|
101 |
+
print("botnoi url: ", audio_url)
|
102 |
+
output_dir = "generated_voice/botnoi"
|
103 |
+
os.makedirs(output_dir, exist_ok=True)
|
104 |
+
output_file = os.path.join(output_dir, f"{int(time.time())}.mp3")
|
105 |
download_mp3(audio_url, output_file)
|
106 |
|
107 |
return FileResponse(output_file, media_type="audio/mpeg", filename="output.mp3")
|
|
|
114 |
phrase_break: int = 0
|
115 |
audiovisual: int = 0
|
116 |
|
|
|
|
|
|
|
|
|
117 |
def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual: int):
|
118 |
url = 'https://api.aiforthai.in.th/vaja9/synth_audiovisual'
|
119 |
headers = {
|
|
|
141 |
raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
|
142 |
|
143 |
def download_vaja9_wav(url: str, output_path: str):
|
144 |
+
api_key = os.getenv("VAJA9_API_KEY")
|
145 |
+
if not api_key:
|
146 |
+
raise HTTPException(status_code=500, detail="VAJA9_API_KEY environment variable not set")
|
147 |
+
headers = {'Apikey': api_key}
|
148 |
try:
|
149 |
response = requests.get(url, headers=headers, timeout=60) # Increased timeout to 60 seconds
|
150 |
if response.status_code == 200:
|
|
|
161 |
|
162 |
@app.post("/generate_voice_vaja9/")
|
163 |
def generate_voice_vaja9_endpoint(request: Vaja9Request):
|
164 |
+
"""
|
165 |
+
Generate a voice file using the Vaja9 endpoint.
|
166 |
+
- text (str): ข้อความที่ต้องการสังเคราะห์เสียง (สูงสุดไม่เกิน 300 ตัวอักษร)
|
167 |
+
- speaker (int): ประเภทของเสียงที่ต้องการ
|
168 |
+
0 : เสียงผู้ชาย
|
169 |
+
1 : เสียงผู้หญิง
|
170 |
+
2 : เสียงเด็กผู้ชาย
|
171 |
+
3 : เสียงเด็กผู้หญิง
|
172 |
+
- phrase_break (int): ประเภทของการหยุดเว้นวรรค
|
173 |
+
0 : หยุดเว้นวรรคแบบอัตโนมัติ
|
174 |
+
1 : ไม่หยุดเว้นวรรค
|
175 |
+
- audiovisual (int): ประเภทของโมเดล
|
176 |
+
0 : โมเดลสังเคราะห์เสียง
|
177 |
+
1 : โมเดลสังเคราะห์เสียง และภาพ
|
178 |
+
"""
|
179 |
+
|
180 |
try:
|
181 |
+
audio_url = generate_vaja9_voice(
|
182 |
+
text=request.text,
|
183 |
+
speaker=request.speaker,
|
184 |
+
phrase_break=request.phrase_break,
|
185 |
+
audiovisual=request.audiovisual
|
186 |
+
)
|
187 |
|
188 |
+
# Generate unique filename for the WAV file
|
189 |
+
output_dir = "generated_voice/vaja9"
|
190 |
+
os.makedirs(output_dir, exist_ok=True)
|
191 |
+
output_file = os.path.join(output_dir, f"{int(time.time())}.wav")
|
192 |
+
download_vaja9_wav(audio_url, output_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
+
return FileResponse(output_file, media_type="audio/wav", filename="output.wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
+
except HTTPException as e:
|
197 |
+
raise e
|
198 |
except Exception as e:
|
199 |
+
raise HTTPException(status_code=502, detail=f"Bad Gateway - Unexpected error: {str(e)}")
|
200 |
+
|
201 |
+
|
202 |
+
if __name__ == "__main__":
|
203 |
+
uvicorn.run("tts:app", host="0.0.0.0", port=8001, reload=True)
|