microhum commited on
Commit
4bd456c
·
1 Parent(s): 0551729
Files changed (5) hide show
  1. .env_template +1 -0
  2. .gitignore +3 -1
  3. poetry.lock +51 -1
  4. pyproject.toml +1 -0
  5. tts/tts.py +52 -35
.env_template CHANGED
@@ -1,4 +1,5 @@
1
  TYPHOON_CHAT_API = *
 
2
  DEBUG_MODE = false
3
  OPENTHAIGPT_CHAT_API = *
4
  BOTNOI_API_TOKEN = *
 
1
  TYPHOON_CHAT_API = *
2
+ GROQ_CHAT_KEY = *
3
  DEBUG_MODE = false
4
  OPENTHAIGPT_CHAT_API = *
5
  BOTNOI_API_TOKEN = *
.gitignore CHANGED
@@ -8,4 +8,6 @@ __pycache__
8
 
9
  # Files
10
  speedtest.py
11
- *.csv
 
 
 
8
 
9
  # Files
10
  speedtest.py
11
+ *.csv
12
+ *.wav
13
+ *.mp3
poetry.lock CHANGED
@@ -1994,6 +1994,56 @@ files = [
1994
  [package.extras]
1995
  windows-terminal = ["colorama (>=0.4.6)"]
1996
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1997
  [[package]]
1998
  name = "python-dateutil"
1999
  version = "2.9.0.post0"
@@ -2820,4 +2870,4 @@ propcache = ">=0.2.0"
2820
  [metadata]
2821
  lock-version = "2.0"
2822
  python-versions = "^3.10"
2823
- content-hash = "fc3fccebdc1dad30f562185560b085f474738badd36f1a674b28d11c1fe7ed23"
 
1994
  [package.extras]
1995
  windows-terminal = ["colorama (>=0.4.6)"]
1996
 
1997
+ [[package]]
1998
+ name = "pythainlp"
1999
+ version = "5.0.4"
2000
+ description = "Thai Natural Language Processing library"
2001
+ optional = false
2002
+ python-versions = ">=3.7"
2003
+ files = [
2004
+ {file = "pythainlp-5.0.4-py3-none-any.whl", hash = "sha256:5f036d558f673215c142c6e32fd38b111d674d94d64f1b03b409fdfed0fa8dcd"},
2005
+ {file = "pythainlp-5.0.4.tar.gz", hash = "sha256:2cd8e088d722617c6065225fffbaf2522bc20b8a3eff5bd2bcb251c40eccdce0"},
2006
+ ]
2007
+
2008
+ [package.dependencies]
2009
+ requests = ">=2.22.0"
2010
+ tzdata = {version = "*", markers = "sys_platform == \"win32\""}
2011
+
2012
+ [package.extras]
2013
+ abbreviation = ["khamyo (>=0.2.0)"]
2014
+ attacut = ["attacut (>=1.0.6)"]
2015
+ benchmarks = ["PyYAML (>=5.3.1)", "numpy (>=1.22)", "pandas (>=0.24)"]
2016
+ coreference-resolution = ["fastcoref (>=2.1.5)", "spacy (>=3.0)"]
2017
+ dependency-parsing = ["spacy-thai (>=0.7.1)", "transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)"]
2018
+ el = ["multiel (>=0.5)"]
2019
+ esupar = ["esupar (>=1.3.8)", "numpy", "transformers (>=4.22.1)"]
2020
+ full = ["PyYAML (>=5.3.1)", "attacut (>=1.0.4)", "bpemb (>=0.3.2)", "emoji (>=0.5.1)", "epitran (>=1.1)", "fairseq (>=0.10.0)", "fastai (<2.0)", "fastcoref (>=2.1.5)", "gensim (>=4.0.0)", "khamyo (>=0.2.0)", "nlpo3 (>=1.2.2)", "nltk (>=3.3)", "numpy (>=1.22)", "onnxruntime (>=1.10.0)", "oskut (>=1.3)", "pandas (>=0.24)", "panphon (>=0.20.0)", "phunspell (>=0.1.6)", "pyicu (>=2.3)", "sacremoses (>=0.0.41)", "sefr-cut (>=1.1)", "sentence-transformers (>=2.2.2)", "sentencepiece (>=0.1.91)", "spacy (>=3.0)", "spacy-thai (>=0.7.1)", "spylls (>=0.1.5)", "ssg (>=0.0.8)", "symspellpy (>=6.7.6)", "thai-nner", "torch (>=1.0.0)", "transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)", "wtpsplit (>=1.0.1)", "wunsen (>=0.0.3)"]
2021
+ generate = ["fastai (<2.0)"]
2022
+ icu = ["pyicu (>=2.3)"]
2023
+ ipa = ["epitran (>=1.1)"]
2024
+ ml = ["numpy (>=1.22)", "torch (>=1.0.0)"]
2025
+ mt5 = ["sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
2026
+ nlpo3 = ["nlpo3 (>=1.2.2)"]
2027
+ onnx = ["numpy (>=1.22)", "onnxruntime (>=1.10.0)", "sentencepiece (>=0.1.91)"]
2028
+ oskut = ["oskut (>=1.3)"]
2029
+ sefr-cut = ["sefr-cut (>=1.1)"]
2030
+ spacy-thai = ["spacy-thai (>=0.7.1)"]
2031
+ spell = ["phunspell (>=0.1.6)", "spylls (>=0.1.5)", "symspellpy (>=6.7.6)"]
2032
+ ssg = ["ssg (>=0.0.8)"]
2033
+ textaugment = ["bpemb", "gensim (>=4.0.0)"]
2034
+ thai-nner = ["thai-nner"]
2035
+ thai2fit = ["emoji (>=0.5.1)", "gensim (>=4.0.0)", "numpy (>=1.22)"]
2036
+ thai2rom = ["numpy (>=1.22)", "torch (>=1.0.0)"]
2037
+ transformers-ud = ["transformers (>=4.22.1)", "ufal.chu-liu-edmonds (>=1.0.2)"]
2038
+ translate = ["fairseq (>=0.10.0)", "sacremoses (>=0.0.41)", "sentencepiece (>=0.1.91)", "torch (>=1.0.0)", "transformers (>=4.6.0)"]
2039
+ wangchanberta = ["sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
2040
+ wangchanglm = ["pandas (>=0.24)", "sentencepiece (>=0.1.91)", "transformers (>=4.6.0)"]
2041
+ word-approximation = ["panphon (>=0.20.0)"]
2042
+ wordnet = ["nltk (>=3.3)"]
2043
+ wsd = ["sentence-transformers (>=2.2.2)"]
2044
+ wtp = ["transformers (>=4.6.0)", "wtpsplit (>=1.0.1)"]
2045
+ wunsen = ["wunsen (>=0.0.1)"]
2046
+
2047
  [[package]]
2048
  name = "python-dateutil"
2049
  version = "2.9.0.post0"
 
2870
  [metadata]
2871
  lock-version = "2.0"
2872
  python-versions = "^3.10"
2873
+ content-hash = "e0d24b26c313871a6ec8a9d37d495f8a11ca13efa6e2e82cfa872f77935955e9"
pyproject.toml CHANGED
@@ -16,6 +16,7 @@ fastapi = "^0.115.5"
16
  uvicorn = "^0.32.0"
17
  gradio = "^5.8.0"
18
  langchain-groq = "^0.2.1"
 
19
 
20
 
21
  [build-system]
 
16
  uvicorn = "^0.32.0"
17
  gradio = "^5.8.0"
18
  langchain-groq = "^0.2.1"
19
+ pythainlp = "^5.0.4"
20
 
21
 
22
  [build-system]
tts/tts.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from fastapi import FastAPI, HTTPException
2
  from pythainlp.tokenize import sent_tokenize
3
  from pydantic import BaseModel , Field
@@ -7,6 +8,8 @@ import uuid
7
  from dotenv import load_dotenv
8
  import os
9
 
 
 
10
  # Load environment variables
11
  load_dotenv()
12
 
@@ -62,8 +65,8 @@ def generate_voice(audio_id, text, text_delay, speaker, volume, speed, type_medi
62
  def download_mp3(url, output_path):
63
  headers = {
64
  "Accept-Encoding": "identity;q=1, *;q=0",
65
- "Range": "bytes=0-",
66
  "Referer": "https://voice.botnoi.ai/",
 
67
  }
68
 
69
  response = requests.get(url, headers=headers, stream=True)
@@ -73,12 +76,15 @@ def download_mp3(url, output_path):
73
  if chunk: # Filter out keep-alive chunks
74
  file.write(chunk)
75
  else:
76
- raise HTTPException(status_code=response.status_code, detail="Failed to download MP3")
77
 
78
  # FastAPI endpoint to generate and download voice
79
  @app.post("/generate_voice_botnoi/")
80
  def generate_voice_endpoint(request: VoiceRequest):
81
- text_delay = auto_generate_text_delay_with_pythainlp(request.text)
 
 
 
82
  audio_url = generate_voice(
83
  audio_id=request.audio_id,
84
  text=request.text,
@@ -92,7 +98,10 @@ def generate_voice_endpoint(request: VoiceRequest):
92
  )
93
 
94
  # Generate unique filename for the MP3
95
- output_file = f"{uuid.uuid4()}.mp3"
 
 
 
96
  download_mp3(audio_url, output_file)
97
 
98
  return FileResponse(output_file, media_type="audio/mpeg", filename="output.mp3")
@@ -105,10 +114,6 @@ class Vaja9Request(BaseModel):
105
  phrase_break: int = 0
106
  audiovisual: int = 0
107
 
108
- def split_text_into_chunks(text: str, chunk_size: int = 20) -> list:
109
- words = text.split()
110
- return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
111
-
112
  def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual: int):
113
  url = 'https://api.aiforthai.in.th/vaja9/synth_audiovisual'
114
  headers = {
@@ -136,7 +141,10 @@ def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual
136
  raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
137
 
138
  def download_vaja9_wav(url: str, output_path: str):
139
- headers = {'Apikey': os.getenv("VAJA9_API_KEY")}
 
 
 
140
  try:
141
  response = requests.get(url, headers=headers, timeout=60) # Increased timeout to 60 seconds
142
  if response.status_code == 200:
@@ -153,34 +161,43 @@ def download_vaja9_wav(url: str, output_path: str):
153
 
154
  @app.post("/generate_voice_vaja9/")
155
  def generate_voice_vaja9_endpoint(request: Vaja9Request):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  try:
157
- # Split text into chunks of 20 words if needed
158
- text_chunks = split_text_into_chunks(request.text)
159
- output_files = []
 
 
 
160
 
161
- # Process each chunk
162
- for chunk in text_chunks:
163
- audio_url = generate_vaja9_voice(
164
- text=chunk,
165
- speaker=request.speaker,
166
- phrase_break=request.phrase_break,
167
- audiovisual=request.audiovisual
168
- )
169
-
170
- # Generate unique filename for each chunk
171
- output_file = f"{uuid.uuid4()}.wav"
172
- download_vaja9_wav(audio_url, output_file)
173
- output_files.append(output_file)
174
 
175
- # If only one chunk, return it directly
176
- if len(output_files) == 1:
177
- return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
178
-
179
- # TODO: If multiple chunks, they should be combined into a single audio file
180
- # For now, return the first chunk
181
- return FileResponse(output_files[0], media_type="audio/wav", filename="output.wav")
182
 
183
- except HTTPException:
184
- raise
185
  except Exception as e:
186
- raise HTTPException(status_code=502, detail=f"Bad Gateway - Unexpected error: {str(e)}")
 
 
 
 
 
1
+ import time
2
  from fastapi import FastAPI, HTTPException
3
  from pythainlp.tokenize import sent_tokenize
4
  from pydantic import BaseModel , Field
 
8
  from dotenv import load_dotenv
9
  import os
10
 
11
+ import uvicorn
12
+
13
  # Load environment variables
14
  load_dotenv()
15
 
 
65
  def download_mp3(url, output_path):
66
  headers = {
67
  "Accept-Encoding": "identity;q=1, *;q=0",
 
68
  "Referer": "https://voice.botnoi.ai/",
69
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
70
  }
71
 
72
  response = requests.get(url, headers=headers, stream=True)
 
76
  if chunk: # Filter out keep-alive chunks
77
  file.write(chunk)
78
  else:
79
+ raise HTTPException(status_code=response.status_code, detail=f"{response.status_code} Failed to download MP3")
80
 
81
  # FastAPI endpoint to generate and download voice
82
  @app.post("/generate_voice_botnoi/")
83
  def generate_voice_endpoint(request: VoiceRequest):
84
+
85
+ text_delay = request.text
86
+ # text_delay = auto_generate_text_delay_with_pythainlp(request.text)
87
+ print("Text delay:", text_delay)
88
  audio_url = generate_voice(
89
  audio_id=request.audio_id,
90
  text=request.text,
 
98
  )
99
 
100
  # Generate unique filename for the MP3
101
+ print("botnoi url: ", audio_url)
102
+ output_dir = "generated_voice/botnoi"
103
+ os.makedirs(output_dir, exist_ok=True)
104
+ output_file = os.path.join(output_dir, f"{int(time.time())}.mp3")
105
  download_mp3(audio_url, output_file)
106
 
107
  return FileResponse(output_file, media_type="audio/mpeg", filename="output.mp3")
 
114
  phrase_break: int = 0
115
  audiovisual: int = 0
116
 
 
 
 
 
117
  def generate_vaja9_voice(text: str, speaker: int, phrase_break: int, audiovisual: int):
118
  url = 'https://api.aiforthai.in.th/vaja9/synth_audiovisual'
119
  headers = {
 
141
  raise HTTPException(status_code=502, detail=f"Bad Gateway - Connection error: {str(e)}")
142
 
143
  def download_vaja9_wav(url: str, output_path: str):
144
+ api_key = os.getenv("VAJA9_API_KEY")
145
+ if not api_key:
146
+ raise HTTPException(status_code=500, detail="VAJA9_API_KEY environment variable not set")
147
+ headers = {'Apikey': api_key}
148
  try:
149
  response = requests.get(url, headers=headers, timeout=60) # Increased timeout to 60 seconds
150
  if response.status_code == 200:
 
161
 
162
  @app.post("/generate_voice_vaja9/")
163
  def generate_voice_vaja9_endpoint(request: Vaja9Request):
164
+ """
165
+ Generate a voice file using the Vaja9 endpoint.
166
+ - text (str): ข้อความที่ต้องการสังเคราะห์เสียง (สูงสุดไม่เกิน 300 ตัวอักษร)
167
+ - speaker (int): ประเภทของเสียงที่ต้องการ
168
+ 0 : เสียงผู้ชาย
169
+ 1 : เสียงผู้หญิง
170
+ 2 : เสียงเด็กผู้ชาย
171
+ 3 : เสียงเด็กผู้หญิง
172
+ - phrase_break (int): ประเภทของการหยุดเว้นวรรค
173
+ 0 : หยุดเว้นวรรคแบบอัตโนมัติ
174
+ 1 : ไม่หยุดเว้นวรรค
175
+ - audiovisual (int): ประเภทของโมเดล
176
+ 0 : โมเดลสังเคราะห์เสียง
177
+ 1 : โมเดลสังเคราะห์เสียง และภาพ
178
+ """
179
+
180
  try:
181
+ audio_url = generate_vaja9_voice(
182
+ text=request.text,
183
+ speaker=request.speaker,
184
+ phrase_break=request.phrase_break,
185
+ audiovisual=request.audiovisual
186
+ )
187
 
188
+ # Generate unique filename for the WAV file
189
+ output_dir = "generated_voice/vaja9"
190
+ os.makedirs(output_dir, exist_ok=True)
191
+ output_file = os.path.join(output_dir, f"{int(time.time())}.wav")
192
+ download_vaja9_wav(audio_url, output_file)
 
 
 
 
 
 
 
 
193
 
194
+ return FileResponse(output_file, media_type="audio/wav", filename="output.wav")
 
 
 
 
 
 
195
 
196
+ except HTTPException as e:
197
+ raise e
198
  except Exception as e:
199
+ raise HTTPException(status_code=502, detail=f"Bad Gateway - Unexpected error: {str(e)}")
200
+
201
+
202
+ if __name__ == "__main__":
203
+ uvicorn.run("tts:app", host="0.0.0.0", port=8001, reload=True)