TaiYouWeb commited on
Commit
5ca847f
1 Parent(s): 6ba53fa

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +95 -0
  2. config.py +3 -0
  3. download.py +17 -0
  4. requirements.txt +12 -0
  5. run.py +11 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import tempfile
4
+ from typing import List, Optional
5
+
6
+ import TTS.api
7
+ import torch
8
+ from pydub import AudioSegment
9
+
10
+ from fastapi import FastAPI, File, Form, UploadFile, HTTPException
11
+ from fastapi.responses import StreamingResponse, Response
12
+
13
+ import config
14
+
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+ models = {}
18
+
19
+ for id, model in config.models.items():
20
+ models[id] = TTS.api.TTS(model).to(device)
21
+
22
+ class SynthesizeResponse(Response):
23
+ media_type = 'audio/wav'
24
+
25
+ app = FastAPI()
26
+
27
+ @app.post('/tts', response_class=SynthesizeResponse)
28
+ async def synthesize(
29
+ text: str = Form('Hello,World!'),
30
+ speaker_wavs: List[UploadFile] = File(None),
31
+ speaker_idx: str = Form('Ana Florence'),
32
+ language: str = Form('ja'),
33
+ temperature: float = Form(0.65),
34
+ length_penalty: float = Form(1.0),
35
+ repetition_penalty: float = Form(2.0),
36
+ top_k: int = Form(50),
37
+ top_p: float = Form(0.8),
38
+ speed: float = Form(1.0),
39
+ enable_text_splitting: bool = Form(True)
40
+ ) -> StreamingResponse:
41
+ temp_files = []
42
+ try:
43
+ if speaker_wavs:
44
+ # Process each uploaded file
45
+ for speaker_wav in speaker_wavs:
46
+ speaker_wav_bytes = await speaker_wav.read()
47
+ # Convert the uploaded audio file to a WAV format using pydub
48
+ try:
49
+ audio = AudioSegment.from_file(io.BytesIO(speaker_wav_bytes))
50
+ wav_buffer = io.BytesIO()
51
+ audio.export(wav_buffer, format="wav")
52
+ wav_buffer.seek(0) # Reset buffer position to the beginning
53
+ except Exception as e:
54
+ raise HTTPException(status_code=400, detail=f"Error processing audio file: {e}")
55
+
56
+ temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
57
+ temp_wav_file.write(wav_buffer.read())
58
+ temp_wav_file.close()
59
+ temp_files.append(temp_wav_file.name)
60
+
61
+ output_buffer = io.BytesIO()
62
+ if temp_files:
63
+ models['multi'].tts_to_file(
64
+ text=text,
65
+ speaker_wav=temp_files,
66
+ language=language,
67
+ file_path=output_buffer,
68
+ temperature=temperature,
69
+ length_penalty=length_penalty,
70
+ repetition_penalty=repetition_penalty,
71
+ top_k=top_k,
72
+ top_p=top_p,
73
+ speed=speed,
74
+ enable_text_splitting=enable_text_splitting
75
+ )
76
+ else:
77
+ models['multi'].tts_to_file(
78
+ text=text,
79
+ speaker=speaker_idx,
80
+ language=language,
81
+ file_path=output_buffer,
82
+ temperature=temperature,
83
+ length_penalty=length_penalty,
84
+ repetition_penalty=repetition_penalty,
85
+ top_k=top_k,
86
+ top_p=top_p,
87
+ speed=speed,
88
+ enable_text_splitting=enable_text_splitting
89
+ )
90
+ output_buffer.seek(0)
91
+ return StreamingResponse(output_buffer, media_type="audio/wav")
92
+ finally:
93
+ for temp_file in temp_files:
94
+ if isinstance(temp_file, str) and os.path.exists(temp_file):
95
+ os.remove(temp_file)
config.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ models = {
2
+ 'multi': 'tts_models/multilingual/multi-dataset/xtts_v2'
3
+ }
download.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import TTS.api
2
+ import TTS.utils.manage as manage
3
+ import config
4
+
5
+ # 定义一个函数来自动接受许可条款
6
+ def ask_tos_patch(self, output_path):
7
+ print("Automatically accepting the terms of service.")
8
+ return True
9
+
10
+ # 使用我们定义的函数替换原有的 ask_tos 方法
11
+ manage.ModelManager.ask_tos = ask_tos_patch
12
+
13
+ # 初始化 TTS 实例并下载模型
14
+ tts = TTS.api.TTS()
15
+
16
+ for id, model in config.models.items():
17
+ tts.download_model_by_name(model)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+ torch
3
+ torchaudio
4
+ coqui-tts
5
+ hangul-romanize
6
+ coqui-tts[ja]
7
+ coqui-tts[zh]
8
+ uvicorn
9
+ fastapi
10
+ python-multipart
11
+ python-dotenv
12
+ pydub
run.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ import os
3
+
4
+ from app import app
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+ port = int(os.getenv('PORT', 3151))
9
+
10
+ if __name__ == '__main__':
11
+ uvicorn.run(app, host='0.0.0.0', port=port)