Spaces:
Running
Running
added back limitation
Browse files
app.py
CHANGED
@@ -39,10 +39,8 @@ tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voice
|
|
39 |
tts_voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
40 |
|
41 |
# RVC models
|
42 |
-
model_root = snapshot_download(
|
43 |
-
|
44 |
-
models = [d for d in os.listdir(
|
45 |
-
model_root) if os.path.isdir(f"{model_root}/{d}")]
|
46 |
models.sort()
|
47 |
|
48 |
|
@@ -61,14 +59,12 @@ def model_data(model_name):
|
|
61 |
version = cpt.get("version", "v1")
|
62 |
if version == "v1":
|
63 |
if if_f0 == 1:
|
64 |
-
net_g = SynthesizerTrnMs256NSFsid(
|
65 |
-
*cpt["config"], is_half=config.is_half)
|
66 |
else:
|
67 |
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
68 |
elif version == "v2":
|
69 |
if if_f0 == 1:
|
70 |
-
net_g = SynthesizerTrnMs768NSFsid(
|
71 |
-
*cpt["config"], is_half=config.is_half)
|
72 |
else:
|
73 |
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
|
74 |
else:
|
@@ -133,9 +129,15 @@ def tts(
|
|
133 |
print(tts_text)
|
134 |
print(f"tts_voice: {tts_voice}, speed: {speed}")
|
135 |
print(f"Model name: {model_name}")
|
136 |
-
print(
|
137 |
-
f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
138 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
t0 = time.time()
|
140 |
if speed >= 0:
|
141 |
speed_str = f"+{speed}%"
|
@@ -151,6 +153,13 @@ def tts(
|
|
151 |
audio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
|
152 |
duration = len(audio) / sr
|
153 |
print(f"Audio duration: {duration}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
f0_up_key = int(f0_up_key)
|
155 |
|
156 |
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
@@ -265,8 +274,7 @@ with app:
|
|
265 |
step=10,
|
266 |
interactive=True,
|
267 |
)
|
268 |
-
tts_text = gr.Textbox(label="Input Text",
|
269 |
-
value="こんにちは、私の名前は初音ミクです!")
|
270 |
with gr.Column():
|
271 |
but0 = gr.Button("Convert", variant="primary")
|
272 |
info_text = gr.Textbox(label="Output info")
|
|
|
39 |
tts_voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
40 |
|
41 |
# RVC models
|
42 |
+
model_root = snapshot_download(repo_id="NoCrypt/miku_RVC", token=os.environ["TOKEN"])
|
43 |
+
models = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
|
|
|
|
|
44 |
models.sort()
|
45 |
|
46 |
|
|
|
59 |
version = cpt.get("version", "v1")
|
60 |
if version == "v1":
|
61 |
if if_f0 == 1:
|
62 |
+
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
|
|
|
63 |
else:
|
64 |
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
65 |
elif version == "v2":
|
66 |
if if_f0 == 1:
|
67 |
+
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
|
|
|
68 |
else:
|
69 |
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
|
70 |
else:
|
|
|
129 |
print(tts_text)
|
130 |
print(f"tts_voice: {tts_voice}, speed: {speed}")
|
131 |
print(f"Model name: {model_name}")
|
132 |
+
print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
|
|
133 |
try:
|
134 |
+
if limitation and len(tts_text) > 1000:
|
135 |
+
print("Error: Text too long")
|
136 |
+
return (
|
137 |
+
f"Text characters should be at most 1000 in this huggingface space, but got {len(tts_text)} characters.",
|
138 |
+
None,
|
139 |
+
None,
|
140 |
+
)
|
141 |
t0 = time.time()
|
142 |
if speed >= 0:
|
143 |
speed_str = f"+{speed}%"
|
|
|
153 |
audio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
|
154 |
duration = len(audio) / sr
|
155 |
print(f"Audio duration: {duration}s")
|
156 |
+
if limitation and duration >= 200:
|
157 |
+
print("Error: Audio too long")
|
158 |
+
return (
|
159 |
+
f"Audio should be less than 200 seconds in this huggingface space, but got {duration}s.",
|
160 |
+
edge_output_filename,
|
161 |
+
None,
|
162 |
+
)
|
163 |
f0_up_key = int(f0_up_key)
|
164 |
|
165 |
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
|
|
274 |
step=10,
|
275 |
interactive=True,
|
276 |
)
|
277 |
+
tts_text = gr.Textbox(label="Input Text", value="こんにちは、私の名前は初音ミクです!")
|
|
|
278 |
with gr.Column():
|
279 |
but0 = gr.Button("Convert", variant="primary")
|
280 |
info_text = gr.Textbox(label="Output info")
|