kevinwang676 commited on
Commit
5937d96
0 Parent(s):

Duplicate from kevinwang676/web-singer

Browse files
Files changed (7) hide show
  1. .gitattributes +36 -0
  2. Dockerfile +103 -0
  3. README.md +11 -0
  4. app.py +414 -0
  5. config.json +5 -0
  6. requirements.txt +15 -0
  7. syz.zip +3 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ stefanie/added_IVF704_Flat_nprobe_1.index filter=lfs diff=lfs merge=lfs -text
36
+ stefanie1 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Agung Wijaya
2
+ # Installing Gradio via Dockerfile
3
+
4
+ # pull docker
5
+ FROM python:3.8.16-slim-bullseye
6
+
7
+ # install virtualenv
8
+ RUN apt update \
9
+ && apt install -y aria2 wget curl tree unzip ffmpeg build-essential \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # clean up
13
+ RUN apt-get clean; \
14
+ rm -rf /etc/machine-id /var/lib/dbus/machine-id /var/lib/apt/lists/* /tmp/* /var/tmp/*; \
15
+ find /var/log -name "*.log" -type f -delete
16
+
17
+ # set tmp
18
+ RUN mkdir -p /content/tmp
19
+ RUN chmod -R 777 /content/tmp
20
+ RUN rm -rf /tmp
21
+ RUN ln -s /content/tmp /tmp
22
+
23
+ # make dir
24
+ RUN mkdir -p /content
25
+ RUN chmod -R 777 /content
26
+
27
+ # try fix mplconfigdir
28
+ RUN mkdir -p /content/mplconfig
29
+ RUN chmod -R 777 /content/mplconfig
30
+
31
+ # try fix
32
+ # RuntimeError: cannot cache function '__shear_dense': no locator available for file '/usr/local/lib/python3.8/site-packages/librosa/util/utils.py'
33
+ RUN mkdir -p /content/numbacache
34
+ RUN chmod -R 777 /content/numbacache
35
+
36
+ # try fix
37
+ # PermissionError: [Errno 13] Permission denied: '/.cache' (demucs)
38
+ RUN mkdir -p /content/demucscache
39
+ RUN chmod -R 777 /content/demucscache
40
+ RUN ln -s /content/demucscache /.cache
41
+
42
+ # set workdir
43
+ WORKDIR /content
44
+
45
+ # set environment
46
+ # PYTORCH_NO_CUDA_MEMORY_CACHING is can help users with even smaller RAM such as 2GB (Demucs)
47
+ ENV PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
48
+ MPLCONFIGDIR=/content/mplconfig \
49
+ NUMBA_CACHE_DIR=/content/numbacache
50
+
51
+ # upgrade pip
52
+ RUN python -m pip install --no-cache-dir --upgrade pip
53
+
54
+ # install library
55
+ RUN pip install --no-cache-dir --upgrade gradio
56
+ RUN pip install --no-cache-dir --upgrade setuptools wheel
57
+ RUN pip install --no-cache-dir faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2
58
+
59
+ # copying requirements.txt
60
+ COPY requirements.txt /content/requirements.txt
61
+
62
+ # install requirements
63
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
64
+
65
+ # copying files
66
+ COPY . .
67
+
68
+ # download hubert_base
69
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content -o hubert_base.pt
70
+
71
+ # download library infer_pack
72
+ RUN mkdir -p infer_pack
73
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/attentions.py -d /content/infer_pack -o attentions.py
74
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/commons.py -d /content/infer_pack -o commons.py
75
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models.py -d /content/infer_pack -o models.py
76
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models_onnx.py -d /content/infer_pack -o models_onnx.py
77
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models_onnx_moess.py -d /content/infer_pack -o models_onnx_moess.py
78
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/modules.py -d /content/infer_pack -o modules.py
79
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/transforms.py -d /content/infer_pack -o transforms.py
80
+
81
+ # download library infer_pipeline.py
82
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/vc_infer_pipeline.py -d /content -o vc_infer_pipeline.py
83
+
84
+ # download library config.py and util.py
85
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/config.py -d /content -o config.py
86
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/util.py -d /content -o util.py
87
+
88
+ # extract models
89
+ RUN mkdir -p model
90
+ RUN unzip -x syz.zip
91
+ RUN mv -v syz model
92
+ RUN rm syz.zip
93
+
94
+ # check /tmp
95
+ RUN ls -l /tmp
96
+
97
+ # expose port gradio
98
+ EXPOSE 7860
99
+
100
+ # run app
101
+ CMD ["python", "app.py"]
102
+
103
+ # Enjoy run Gradio!
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Web UI
3
+ emoji: 🌍
4
+ colorFrom: pink
5
+ colorTo: pink
6
+ sdk: docker
7
+ pinned: false
8
+ duplicated_from: kevinwang676/web-singer
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agung Wijaya - WebUI 2023 - Gradio
2
+ # file app.py
3
+
4
+ # Import
5
+ import os
6
+ import psutil
7
+ import shutil
8
+ import numpy as np
9
+ import gradio as gr
10
+ import subprocess
11
+ from pathlib import Path
12
+ import ffmpeg
13
+ import json
14
+ import re
15
+ import time
16
+ import random
17
+ import torch
18
+ import librosa
19
+ import util
20
+
21
+ from config import device
22
+ from infer_pack.models import (
23
+ SynthesizerTrnMs256NSFsid,
24
+ SynthesizerTrnMs256NSFsid_nono
25
+ )
26
+ from vc_infer_pipeline import VC
27
+ from typing import Union
28
+ from os import path, getenv
29
+ from datetime import datetime
30
+ from scipy.io.wavfile import write
31
+
32
+ # Reference: https://huggingface.co/spaces/zomehwh/rvc-models/blob/main/app.py#L21 # noqa
33
+ in_hf_space = getenv('SYSTEM') == 'spaces'
34
+
35
+ # Set High Quality (.wav) or not (.mp3)
36
+ high_quality = True
37
+
38
+ # Read config.json
39
+ config_json = json.loads(open("config.json").read())
40
+
41
+ # Load hubert model
42
+ hubert_model = util.load_hubert_model(device, 'hubert_base.pt')
43
+ hubert_model.eval()
44
+
45
+ # Load models
46
+ loaded_models = []
47
+ for model_name in config_json.get('models'):
48
+ print(f'Loading model: {model_name}')
49
+
50
+ # Load model info
51
+ model_info = json.load(
52
+ open(path.join('model', model_name, 'config.json'), 'r')
53
+ )
54
+
55
+ # Load RVC checkpoint
56
+ cpt = torch.load(
57
+ path.join('model', model_name, model_info['model']),
58
+ map_location='cpu'
59
+ )
60
+
61
+ tgt_sr = cpt['config'][-1]
62
+
63
+ cpt['config'][-3] = cpt['weight']['emb_g.weight'].shape[0] # n_spk
64
+
65
+ if_f0 = cpt.get('f0', 1)
66
+ net_g: Union[SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono]
67
+ if if_f0 == 1:
68
+ net_g = SynthesizerTrnMs256NSFsid(
69
+ *cpt['config'],
70
+ is_half=util.is_half(device)
71
+ )
72
+ else:
73
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt['config'])
74
+
75
+ del net_g.enc_q
76
+
77
+ # According to original code, this thing seems necessary.
78
+ print(net_g.load_state_dict(cpt['weight'], strict=False))
79
+
80
+ net_g.eval().to(device)
81
+ net_g = net_g.half() if util.is_half(device) else net_g.float()
82
+
83
+ vc = VC(tgt_sr, device, util.is_half(device))
84
+
85
+ loaded_models.append(dict(
86
+ name=model_name,
87
+ metadata=model_info,
88
+ vc=vc,
89
+ net_g=net_g,
90
+ if_f0=if_f0,
91
+ target_sr=tgt_sr
92
+ ))
93
+ print(f'Models loaded: {len(loaded_models)}')
94
+
95
+ # Command line test
96
+ def command_line_test():
97
+ command = "df -h /home/user/app"
98
+ process = subprocess.run(command.split(), stdout=subprocess.PIPE)
99
+ result = process.stdout.decode()
100
+ return gr.HTML(value=result)
101
+
102
+ # Check junk files && delete
103
+ def check_junk():
104
+ # Find and delete all files after 10 minutes
105
+ os.system("find ./ytaudio/* -mmin +10 -delete")
106
+ os.system("find ./output/* -mmin +10 -delete")
107
+ os.system("find /tmp/gradio/* -mmin +5 -delete")
108
+ os.system("find /tmp/*.wav -mmin +5 -delete")
109
+ print("Junk files has been deleted!")
110
+
111
+ # Function Information
112
+ def information():
113
+ stats = os.system("du -s /content -h")
114
+ disk_usage = "Disk usage: "+str(stats)
115
+ info = "<p>"+disk_usage+"<br/></p>"
116
+ return gr.HTML(value=info)
117
+
118
+ # Function YouTube Downloader Audio
119
+ def youtube_downloader(
120
+ video_identifier,
121
+ start_time,
122
+ end_time,
123
+ output_filename="track.wav",
124
+ num_attempts=5,
125
+ url_base="",
126
+ quiet=False,
127
+ force=True,
128
+ ):
129
+ output_path = Path(output_filename)
130
+ if output_path.exists():
131
+ if not force:
132
+ return output_path
133
+ else:
134
+ output_path.unlink()
135
+
136
+ quiet = "--quiet --no-warnings" if quiet else ""
137
+ command = f"""
138
+ yt-dlp {quiet} -x --audio-format wav -f bestaudio -o "{output_filename}" --download-sections "*{start_time}-{end_time}" "{url_base}{video_identifier}" # noqa: E501
139
+ """.strip()
140
+
141
+ attempts = 0
142
+ while True:
143
+ try:
144
+ _ = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
145
+ except subprocess.CalledProcessError:
146
+ attempts += 1
147
+ if attempts == num_attempts:
148
+ return None
149
+ else:
150
+ break
151
+
152
+ if output_path.exists():
153
+ return output_path
154
+ else:
155
+ return None
156
+
157
+ # Function Audio Separated
158
+ def audio_separated(audio_input, progress=gr.Progress()):
159
+ # start progress
160
+ progress(progress=0, desc="Starting...")
161
+ time.sleep(1)
162
+
163
+ # check file input
164
+ if audio_input is None:
165
+ # show progress
166
+ for i in progress.tqdm(range(100), desc="Please wait..."):
167
+ time.sleep(0.1)
168
+
169
+ return (None, None, 'Please input audio.')
170
+
171
+ # create filename
172
+ filename = str(random.randint(10000,99999))+datetime.now().strftime("%d%m%Y%H%M%S")
173
+
174
+ # progress
175
+ progress(progress=0.10, desc="Please wait...")
176
+
177
+ # make dir output
178
+ os.makedirs("output", exist_ok=True)
179
+
180
+ # progress
181
+ progress(progress=0.20, desc="Please wait...")
182
+
183
+ # write
184
+ if high_quality:
185
+ write(filename+".wav", audio_input[0], audio_input[1])
186
+ else:
187
+ write(filename+".mp3", audio_input[0], audio_input[1])
188
+
189
+ # progress
190
+ progress(progress=0.50, desc="Please wait...")
191
+
192
+ # demucs process
193
+ if high_quality:
194
+ command_demucs = "python3 -m demucs --two-stems=vocals -d cpu "+filename+".wav -o output"
195
+ else:
196
+ command_demucs = "python3 -m demucs --two-stems=vocals --mp3 --mp3-bitrate 128 -d cpu "+filename+".mp3 -o output"
197
+
198
+ os.system(command_demucs)
199
+
200
+ # progress
201
+ progress(progress=0.70, desc="Please wait...")
202
+
203
+ # remove file audio
204
+ if high_quality:
205
+ command_delete = "rm -v ./"+filename+".wav"
206
+ else:
207
+ command_delete = "rm -v ./"+filename+".mp3"
208
+
209
+ os.system(command_delete)
210
+
211
+ # progress
212
+ progress(progress=0.80, desc="Please wait...")
213
+
214
+ # progress
215
+ for i in progress.tqdm(range(80,100), desc="Please wait..."):
216
+ time.sleep(0.1)
217
+
218
+ if high_quality:
219
+ return "./output/htdemucs/"+filename+"/vocals.wav","./output/htdemucs/"+filename+"/no_vocals.wav","Successfully..."
220
+ else:
221
+ return "./output/htdemucs/"+filename+"/vocals.mp3","./output/htdemucs/"+filename+"/no_vocals.mp3","Successfully..."
222
+
223
+ # Function Voice Changer
224
+ def voice_changer(audio_input, model_index, pitch_adjust, f0_method, feat_ratio, progress=gr.Progress()):
225
+ # start progress
226
+ progress(progress=0, desc="Starting...")
227
+ time.sleep(1)
228
+
229
+ # check file input
230
+ if audio_input is None:
231
+ # progress
232
+ for i in progress.tqdm(range(100), desc="Please wait..."):
233
+ time.sleep(0.1)
234
+
235
+ return (None, 'Please input audio.')
236
+
237
+ # check model input
238
+ if model_index is None:
239
+ # progress
240
+ for i in progress.tqdm(range(100), desc="Please wait..."):
241
+ time.sleep(0.1)
242
+
243
+ return (None, 'Please select a model.')
244
+
245
+ model = loaded_models[model_index]
246
+
247
+ # Reference: so-vits
248
+ (audio_samp, audio_npy) = audio_input
249
+
250
+ # progress
251
+ progress(progress=0.10, desc="Please wait...")
252
+
253
+ # https://huggingface.co/spaces/zomehwh/rvc-models/blob/main/app.py#L49
254
+ if (audio_npy.shape[0] / audio_samp) > 60 and in_hf_space:
255
+
256
+ # progress
257
+ for i in progress.tqdm(range(10,100), desc="Please wait..."):
258
+ time.sleep(0.1)
259
+
260
+ return (None, 'Input audio is longer than 60 secs.')
261
+
262
+ # Bloody hell: https://stackoverflow.com/questions/26921836/
263
+ if audio_npy.dtype != np.float32: # :thonk:
264
+ audio_npy = (
265
+ audio_npy / np.iinfo(audio_npy.dtype).max
266
+ ).astype(np.float32)
267
+
268
+ # progress
269
+ progress(progress=0.30, desc="Please wait...")
270
+
271
+ if len(audio_npy.shape) > 1:
272
+ audio_npy = librosa.to_mono(audio_npy.transpose(1, 0))
273
+
274
+ # progress
275
+ progress(progress=0.40, desc="Please wait...")
276
+
277
+ if audio_samp != 16000:
278
+ audio_npy = librosa.resample(
279
+ audio_npy,
280
+ orig_sr=audio_samp,
281
+ target_sr=16000
282
+ )
283
+
284
+ # progress
285
+ progress(progress=0.50, desc="Please wait...")
286
+
287
+ pitch_int = int(pitch_adjust)
288
+
289
+ times = [0, 0, 0]
290
+ output_audio = model['vc'].pipeline(
291
+ hubert_model,
292
+ model['net_g'],
293
+ model['metadata'].get('speaker_id', 0),
294
+ audio_npy,
295
+ times,
296
+ pitch_int,
297
+ f0_method,
298
+ path.join('model', model['name'], model['metadata']['feat_index']),
299
+ path.join('model', model['name'], model['metadata']['feat_npy']),
300
+ feat_ratio,
301
+ model['if_f0']
302
+ )
303
+
304
+ # progress
305
+ progress(progress=0.80, desc="Please wait...")
306
+
307
+ print(f'npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s')
308
+
309
+ # progress
310
+ for i in progress.tqdm(range(80,100), desc="Please wait..."):
311
+ time.sleep(0.1)
312
+
313
+ return ((model['target_sr'], output_audio), 'Successfully...')
314
+
315
+ # Function Text to Voice
316
+ def text_to_voice(text_input, model_index):
317
+ # start progress
318
+ progress(progress=0, desc="Starting...")
319
+ time.sleep(1)
320
+
321
+ # check text input
322
+ if text_input is None:
323
+ # progress
324
+ for i in progress.tqdm(range(2,100), desc="Please wait..."):
325
+ time.sleep(0.1)
326
+
327
+ return (None, 'Please write text.')
328
+
329
+ # check model input
330
+ if model_index is None:
331
+ # progress
332
+ for i in progress.tqdm(range(2,100), desc="Please wait..."):
333
+ time.sleep(0.1)
334
+
335
+ return (None, 'Please select a model.')
336
+
337
+ # progress
338
+ for i in progress.tqdm(range(2,100), desc="Please wait..."):
339
+ time.sleep(0.1)
340
+
341
+ return None, "Sorry, you can't use it yet because this program is being developed!"
342
+
343
+ # Themes
344
+ theme = gr.themes.Base()
345
+
346
+ # CSS
347
+ css = "footer {visibility: hidden}"
348
+
349
+ # Blocks
350
+ with gr.Blocks(theme=theme, css=css) as App:
351
+
352
+ # Header
353
+ gr.HTML("<center>"
354
+ "<h1>Web UI Tools - Agung Wijaya</h1>"
355
+ "</center>")
356
+
357
+ # Information
358
+ with gr.Accordion("Just information!"):
359
+ information()
360
+
361
+ # Tab YouTube Downloader
362
+ with gr.Tab("YouTube Video to Audio"):
363
+ with gr.Row():
364
+ with gr.Column():
365
+ ydl_url_input = gr.Textbox(label="Enter URL YouTube")
366
+ start = gr.Number(value=0, label="Start Time (seconds)")
367
+ end = gr.Number(value=15, label="End Time (seconds)")
368
+ ydl_url_submit = gr.Button("Convert Now", variant="primary")
369
+ with gr.Column():
370
+ ydl_audio_output = gr.Audio(label="Audio from YouTube")
371
+
372
+ with gr.Row():
373
+ with gr.Column():
374
+ as_audio_input = ydl_audio_output
375
+ as_audio_submit = gr.Button("Separated Now", variant="primary")
376
+ with gr.Column():
377
+ as_audio_vocals = gr.Audio(label="Vocal only")
378
+ as_audio_no_vocals = gr.Audio(label="Music only")
379
+ as_audio_message = gr.Textbox(label="Message", visible=False)
380
+
381
+ ydl_url_submit.click(fn=youtube_downloader, inputs=[ydl_url_input, start, end], outputs=[ydl_audio_output])
382
+ as_audio_submit.click(fn=audio_separated, inputs=[as_audio_input], outputs=[as_audio_vocals, as_audio_no_vocals, as_audio_message], show_progress=True, queue=True)
383
+
384
+ # Tab Voice Changer
385
+ with gr.Tab("Voice to AI Models"):
386
+ with gr.Row():
387
+ with gr.Column():
388
+ vc_audio_input = as_audio_vocals
389
+ vc_model_index = gr.Dropdown(
390
+ [
391
+ '%s' % (
392
+ m['metadata'].get('name')
393
+ )
394
+ for m in loaded_models
395
+ ],
396
+ label='Models',
397
+ type='index'
398
+ )
399
+ vc_pitch_adjust = gr.Slider(label='Pitch', minimum=-24, maximum=24, step=1, value=0)
400
+ vc_f0_method = gr.Radio(label='F0 methods', choices=['pm', 'harvest'], value='pm', interactive=True)
401
+ vc_feat_ratio = gr.Slider(label='Feature ratio', minimum=0, maximum=1, step=0.1, value=0.6)
402
+ vc_audio_submit = gr.Button("Convert Now", variant="primary")
403
+ with gr.Column():
404
+ vc_audio_output = gr.Audio(label="Result audio", type="numpy")
405
+ vc_audio_message = gr.Textbox(label="Message")
406
+ vc_audio_submit.click(fn=voice_changer, inputs=[vc_audio_input, vc_model_index, vc_pitch_adjust, vc_f0_method, vc_feat_ratio], outputs=[vc_audio_output, vc_audio_message], show_progress=True, queue=True)
407
+
408
+ # Check Junk
409
+ check_junk()
410
+
411
+ # Launch
412
+ App.queue(concurrency_count=1, max_size=20).launch(server_name="0.0.0.0", server_port=7860)
413
+
414
+ # Enjoy
config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "models": [
3
+ "yanzi"
4
+ ]
5
+ }
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ psutil
2
+ demucs
3
+ scipy
4
+ yt-dlp
5
+ ffmpeg
6
+ torch
7
+ torchaudio
8
+ fairseq==0.12.2
9
+ scipy==1.9.3
10
+ pyworld>=0.3.2
11
+ faiss-cpu==1.7.2 ; python_version < "3.11"
12
+ faiss-cpu==1.7.3 ; python_version > "3.10"
13
+ praat-parselmouth>=0.4.3
14
+ librosa==0.9.2
15
+ edge_tts
syz.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3789fc814af195c129094cd480d4b98542abe63393d147ab5484213c315e69
3
+ size 110561233