kevinwang676 commited on
Commit
df51520
0 Parent(s):

Duplicate from kevinwang676/web-singer-new

Browse files
Files changed (9) hide show
  1. .gitattributes +36 -0
  2. Dockerfile +109 -0
  3. README.md +11 -0
  4. app.py +422 -0
  5. config.json +7 -0
  6. requirements.txt +16 -0
  7. stefanie_sun.zip +3 -0
  8. vae.zip +3 -0
  9. yanzi_sun.zip +3 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ stefanie/added_IVF704_Flat_nprobe_1.index filter=lfs diff=lfs merge=lfs -text
36
+ stefanie1 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023 Agung Wijaya
2
+ # Installing Gradio via Dockerfile
3
+
4
+ # pull docker
5
+ FROM python:3.8.16-slim-bullseye
6
+
7
+ # install virtualenv
8
+ RUN apt update \
9
+ && apt install -y aria2 wget curl tree unzip ffmpeg build-essential \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # clean up
13
+ RUN apt-get clean; \
14
+ rm -rf /etc/machine-id /var/lib/dbus/machine-id /var/lib/apt/lists/* /tmp/* /var/tmp/*; \
15
+ find /var/log -name "*.log" -type f -delete
16
+
17
+ # set tmp
18
+ RUN mkdir -p /content/tmp
19
+ RUN chmod -R 777 /content/tmp
20
+ RUN rm -rf /tmp
21
+ RUN ln -s /content/tmp /tmp
22
+
23
+ # make dir
24
+ RUN mkdir -p /content
25
+ RUN chmod -R 777 /content
26
+
27
+ # try fix mplconfigdir
28
+ RUN mkdir -p /content/mplconfig
29
+ RUN chmod -R 777 /content/mplconfig
30
+
31
+ # try fix
32
+ # RuntimeError: cannot cache function '__shear_dense': no locator available for file '/usr/local/lib/python3.8/site-packages/librosa/util/utils.py'
33
+ RUN mkdir -p /content/numbacache
34
+ RUN chmod -R 777 /content/numbacache
35
+
36
+ # try fix
37
+ # PermissionError: [Errno 13] Permission denied: '/.cache' (demucs)
38
+ RUN mkdir -p /content/demucscache
39
+ RUN chmod -R 777 /content/demucscache
40
+ RUN ln -s /content/demucscache /.cache
41
+
42
+ # set workdir
43
+ WORKDIR /content
44
+
45
+ # set environment
46
+ # PYTORCH_NO_CUDA_MEMORY_CACHING is can help users with even smaller RAM such as 2GB (Demucs)
47
+ ENV PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
48
+ MPLCONFIGDIR=/content/mplconfig \
49
+ NUMBA_CACHE_DIR=/content/numbacache
50
+
51
+ # upgrade pip
52
+ RUN python -m pip install --no-cache-dir --upgrade pip
53
+
54
+ # install library
55
+ RUN pip install --no-cache-dir --upgrade gradio
56
+ RUN pip install --no-cache-dir --upgrade setuptools wheel
57
+ RUN pip install --no-cache-dir faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2
58
+
59
+ # copying requirements.txt
60
+ COPY requirements.txt /content/requirements.txt
61
+
62
+ # install requirements
63
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
64
+
65
+ # copying files
66
+ COPY . .
67
+
68
+ # download hubert_base
69
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content -o hubert_base.pt
70
+
71
+ # download library infer_pack
72
+ RUN mkdir -p infer_pack
73
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/attentions.py -d /content/infer_pack -o attentions.py
74
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/commons.py -d /content/infer_pack -o commons.py
75
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models.py -d /content/infer_pack -o models.py
76
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models_onnx.py -d /content/infer_pack -o models_onnx.py
77
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models_onnx_moess.py -d /content/infer_pack -o models_onnx_moess.py
78
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/modules.py -d /content/infer_pack -o modules.py
79
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/transforms.py -d /content/infer_pack -o transforms.py
80
+
81
+ # download library infer_pipeline.py
82
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/vc_infer_pipeline.py -d /content -o vc_infer_pipeline.py
83
+
84
+ # download library config.py and util.py
85
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/config.py -d /content -o config.py
86
+ RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/util.py -d /content -o util.py
87
+
88
+ # extract models
89
+ RUN mkdir -p model
90
+ RUN unzip -x yanzi_sun.zip
91
+ RUN mv -v yanzi_sun model
92
+ RUN rm yanzi_sun.zip
93
+ RUN unzip -x vae.zip
94
+ RUN mv -v vae model
95
+ RUN rm vae.zip
96
+ RUN unzip -x stefanie_sun.zip
97
+ RUN mv -v stefanie_sun model
98
+ RUN rm stefanie_sun.zip
99
+
100
+ # check /tmp
101
+ RUN ls -l /tmp
102
+
103
+ # expose port gradio
104
+ EXPOSE 7860
105
+
106
+ # run app
107
+ CMD ["python", "app.py"]
108
+
109
+ # Enjoy run Gradio!
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Web UI
3
+ emoji: 🌍
4
+ colorFrom: pink
5
+ colorTo: pink
6
+ sdk: docker
7
+ pinned: false
8
+ duplicated_from: kevinwang676/web-singer-new
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agung Wijaya - WebUI 2023 - Gradio
2
+ # file app.py
3
+
4
+ # Import
5
+ import os
6
+ import psutil
7
+ import shutil
8
+ import numpy as np
9
+ import gradio as gr
10
+ import subprocess
11
+ from pathlib import Path
12
+ import ffmpeg
13
+ import json
14
+ import re
15
+ import time
16
+ import random
17
+ import torch
18
+ import librosa
19
+ import util
20
+
21
+ from config import device
22
+ from infer_pack.models import (
23
+ SynthesizerTrnMs256NSFsid,
24
+ SynthesizerTrnMs256NSFsid_nono
25
+ )
26
+ from vc_infer_pipeline import VC
27
+ from typing import Union
28
+ from os import path, getenv
29
+ from datetime import datetime
30
+ from scipy.io.wavfile import write
31
+ from pydub import AudioSegment
32
+
33
+ title_markdown = ("""
34
+ <h1 align="center"><img src="https://upload.wikimedia.org/wikipedia/zh/d/d0/Kite_album.jpg", alt="syz" border="0" style="margin: 0 auto; height: 300px;" /> </h1>
35
+ """)
36
+ title_markdown2 = ("""
37
+ <h1 align="center"><img src="https://e.snmc.io/i/600/w/28577c025a5c82728a1b957ee373bc43/9622540/%E8%AE%B8%E5%B5%A9-xu-song-%E8%87%AA%E5%AE%9A%E7%BE%A9-cover-art.jpg", alt="vae" border="0" style="margin: 0 auto; height: 300px;" /> </h1>
38
+ """)
39
+ # Reference: https://huggingface.co/spaces/zomehwh/rvc-models/blob/main/app.py#L21 # noqa
40
+ in_hf_space = getenv('SYSTEM') == 'spaces'
41
+
42
+ # Set High Quality (.wav) or not (.mp3)
43
+ high_quality = True
44
+
45
+ # Read config.json
46
+ config_json = json.loads(open("config.json").read())
47
+
48
+ # Load hubert model
49
+ hubert_model = util.load_hubert_model(device, 'hubert_base.pt')
50
+ hubert_model.eval()
51
+
52
+ # Load models
53
+ loaded_models = []
54
+ for model_name in config_json.get('models'):
55
+ print(f'Loading model: {model_name}')
56
+
57
+ # Load model info
58
+ model_info = json.load(
59
+ open(path.join('model', model_name, 'config.json'), 'r')
60
+ )
61
+
62
+ # Load RVC checkpoint
63
+ cpt = torch.load(
64
+ path.join('model', model_name, model_info['model']),
65
+ map_location='cpu'
66
+ )
67
+
68
+ tgt_sr = cpt['config'][-1]
69
+
70
+ cpt['config'][-3] = cpt['weight']['emb_g.weight'].shape[0] # n_spk
71
+
72
+ if_f0 = cpt.get('f0', 1)
73
+ net_g: Union[SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono]
74
+ if if_f0 == 1:
75
+ net_g = SynthesizerTrnMs256NSFsid(
76
+ *cpt['config'],
77
+ is_half=util.is_half(device)
78
+ )
79
+ else:
80
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt['config'])
81
+
82
+ del net_g.enc_q
83
+
84
+ # According to original code, this thing seems necessary.
85
+ print(net_g.load_state_dict(cpt['weight'], strict=False))
86
+
87
+ net_g.eval().to(device)
88
+ net_g = net_g.half() if util.is_half(device) else net_g.float()
89
+
90
+ vc = VC(tgt_sr, device, util.is_half(device))
91
+
92
+ loaded_models.append(dict(
93
+ name=model_name,
94
+ metadata=model_info,
95
+ vc=vc,
96
+ net_g=net_g,
97
+ if_f0=if_f0,
98
+ target_sr=tgt_sr
99
+ ))
100
+ print(f'Models loaded: {len(loaded_models)}')
101
+
102
+ # Command line test
103
+ def command_line_test():
104
+ command = "df -h /home/user/app"
105
+ process = subprocess.run(command.split(), stdout=subprocess.PIPE)
106
+ result = process.stdout.decode()
107
+ return gr.HTML(value=result)
108
+
109
+ def mix(audio1, audio2):
110
+ sound1 = AudioSegment.from_file(audio1)
111
+ sound2 = AudioSegment.from_file(audio2)
112
+ length = len(sound1)
113
+ mixed = sound1[:length].overlay(sound2)
114
+
115
+ mixed.export("song.wav", format="wav")
116
+
117
+ return "song.wav"
118
+
119
+
120
+ # Function YouTube Downloader Audio
121
+ def youtube_downloader(
122
+ video_identifier,
123
+ start_time,
124
+ end_time,
125
+ output_filename="track.wav",
126
+ num_attempts=5,
127
+ url_base="",
128
+ quiet=False,
129
+ force=True,
130
+ ):
131
+ output_path = Path(output_filename)
132
+ if output_path.exists():
133
+ if not force:
134
+ return output_path
135
+ else:
136
+ output_path.unlink()
137
+
138
+ quiet = "--quiet --no-warnings" if quiet else ""
139
+ command = f"""
140
+ yt-dlp {quiet} -x --audio-format wav -f bestaudio -o "{output_filename}" --download-sections "*{start_time}-{end_time}" "{url_base}{video_identifier}" # noqa: E501
141
+ """.strip()
142
+
143
+ attempts = 0
144
+ while True:
145
+ try:
146
+ _ = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
147
+ except subprocess.CalledProcessError:
148
+ attempts += 1
149
+ if attempts == num_attempts:
150
+ return None
151
+ else:
152
+ break
153
+
154
+ if output_path.exists():
155
+ return output_path
156
+ else:
157
+ return None
158
+
159
+ # Function Audio Separated
160
+ def audio_separated(audio_input, progress=gr.Progress()):
161
+ # start progress
162
+ progress(progress=0, desc="Starting...")
163
+ time.sleep(0.1)
164
+
165
+ # check file input
166
+ if audio_input is None:
167
+ # show progress
168
+ for i in progress.tqdm(range(100), desc="Please wait..."):
169
+ time.sleep(0.01)
170
+
171
+ return (None, None, 'Please input audio.')
172
+
173
+ # create filename
174
+ filename = str(random.randint(10000,99999))+datetime.now().strftime("%d%m%Y%H%M%S")
175
+
176
+ # progress
177
+ progress(progress=0.10, desc="Please wait...")
178
+
179
+ # make dir output
180
+ os.makedirs("output", exist_ok=True)
181
+
182
+ # progress
183
+ progress(progress=0.20, desc="Please wait...")
184
+
185
+ # write
186
+ if high_quality:
187
+ write(filename+".wav", audio_input[0], audio_input[1])
188
+ else:
189
+ write(filename+".mp3", audio_input[0], audio_input[1])
190
+
191
+ # progress
192
+ progress(progress=0.50, desc="Please wait...")
193
+
194
+ # demucs process
195
+ if high_quality:
196
+ command_demucs = "python3 -m demucs --two-stems=vocals -d cpu "+filename+".wav -o output"
197
+ else:
198
+ command_demucs = "python3 -m demucs --two-stems=vocals --mp3 --mp3-bitrate 128 -d cpu "+filename+".mp3 -o output"
199
+
200
+ os.system(command_demucs)
201
+
202
+ # progress
203
+ progress(progress=0.70, desc="Please wait...")
204
+
205
+ # remove file audio
206
+ if high_quality:
207
+ command_delete = "rm -v ./"+filename+".wav"
208
+ else:
209
+ command_delete = "rm -v ./"+filename+".mp3"
210
+
211
+ os.system(command_delete)
212
+
213
+ # progress
214
+ progress(progress=0.80, desc="Please wait...")
215
+
216
+ # progress
217
+ for i in progress.tqdm(range(80,100), desc="Please wait..."):
218
+ time.sleep(0.1)
219
+
220
+ if high_quality:
221
+ return "./output/htdemucs/"+filename+"/vocals.wav","./output/htdemucs/"+filename+"/no_vocals.wav","Successfully..."
222
+ else:
223
+ return "./output/htdemucs/"+filename+"/vocals.mp3","./output/htdemucs/"+filename+"/no_vocals.mp3","Successfully..."
224
+
225
+ # Function Voice Changer
226
+ def voice_changer(audio_input, model_index, pitch_adjust, f0_method, feat_ratio, progress=gr.Progress()):
227
+ # start progress
228
+ progress(progress=0, desc="Starting...")
229
+ time.sleep(1)
230
+
231
+ # check file input
232
+ if audio_input is None:
233
+ # progress
234
+ for i in progress.tqdm(range(100), desc="Please wait..."):
235
+ time.sleep(0.1)
236
+
237
+ return (None, 'Please input audio.')
238
+
239
+ # check model input
240
+ if model_index is None:
241
+ # progress
242
+ for i in progress.tqdm(range(100), desc="Please wait..."):
243
+ time.sleep(0.1)
244
+
245
+ return (None, 'Please select a model.')
246
+
247
+ model = loaded_models[model_index]
248
+
249
+ # Reference: so-vits
250
+ (audio_samp, audio_npy) = audio_input
251
+
252
+ # progress
253
+ progress(progress=0.10, desc="Please wait...")
254
+
255
+ # https://huggingface.co/spaces/zomehwh/rvc-models/blob/main/app.py#L49
256
+ if (audio_npy.shape[0] / audio_samp) > 60 and in_hf_space:
257
+
258
+ # progress
259
+ for i in progress.tqdm(range(10,100), desc="Please wait..."):
260
+ time.sleep(0.1)
261
+
262
+ return (None, 'Input audio is longer than 60 secs.')
263
+
264
+ # Bloody hell: https://stackoverflow.com/questions/26921836/
265
+ if audio_npy.dtype != np.float32: # :thonk:
266
+ audio_npy = (
267
+ audio_npy / np.iinfo(audio_npy.dtype).max
268
+ ).astype(np.float32)
269
+
270
+ # progress
271
+ progress(progress=0.30, desc="Please wait...")
272
+
273
+ if len(audio_npy.shape) > 1:
274
+ audio_npy = librosa.to_mono(audio_npy.transpose(1, 0))
275
+
276
+ # progress
277
+ progress(progress=0.40, desc="Please wait...")
278
+
279
+ if audio_samp != 16000:
280
+ audio_npy = librosa.resample(
281
+ audio_npy,
282
+ orig_sr=audio_samp,
283
+ target_sr=16000
284
+ )
285
+
286
+ # progress
287
+ progress(progress=0.50, desc="Please wait...")
288
+
289
+ pitch_int = int(pitch_adjust)
290
+
291
+ times = [0, 0, 0]
292
+ output_audio = model['vc'].pipeline(
293
+ hubert_model,
294
+ model['net_g'],
295
+ model['metadata'].get('speaker_id', 0),
296
+ audio_npy,
297
+ times,
298
+ pitch_int,
299
+ f0_method,
300
+ path.join('model', model['name'], model['metadata']['feat_index']),
301
+ path.join('model', model['name'], model['metadata']['feat_npy']),
302
+ feat_ratio,
303
+ model['if_f0']
304
+ )
305
+
306
+ # progress
307
+ progress(progress=0.80, desc="Please wait...")
308
+
309
+ print(f'npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s')
310
+
311
+ # progress
312
+ for i in progress.tqdm(range(80,100), desc="Please wait..."):
313
+ time.sleep(0.1)
314
+
315
+ return ((model['target_sr'], output_audio), 'Successfully...')
316
+
317
+ # Function Text to Voice
318
+ def text_to_voice(text_input, model_index):
319
+ # start progress
320
+ progress(progress=0, desc="Starting...")
321
+ time.sleep(1)
322
+
323
+ # check text input
324
+ if text_input is None:
325
+ # progress
326
+ for i in progress.tqdm(range(2,100), desc="Please wait..."):
327
+ time.sleep(0.1)
328
+
329
+ return (None, 'Please write text.')
330
+
331
+ # check model input
332
+ if model_index is None:
333
+ # progress
334
+ for i in progress.tqdm(range(2,100), desc="Please wait..."):
335
+ time.sleep(0.1)
336
+
337
+ return (None, 'Please select a model.')
338
+
339
+ # progress
340
+ for i in progress.tqdm(range(2,100), desc="Please wait..."):
341
+ time.sleep(0.1)
342
+
343
+ return None, "Sorry, you can't use it yet because this program is being developed!"
344
+
345
+ # Themes
346
+ theme = gr.themes.Base()
347
+
348
+ # CSS
349
+ css = "footer {visibility: hidden}"
350
+
351
+ # Blocks
352
+ with gr.Blocks(theme=theme, css=css) as App:
353
+
354
+ # Header
355
+ gr.HTML("<center>"
356
+ "<h1>🥳🎶🎡 - AI歌手,RVC歌声转换</h1>"
357
+ "</center>")
358
+ gr.Markdown("### <center>🦄 - 能够自动提取视频中的声音,并去除背景音;Powered by [RVC-Project](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)</center>")
359
+ gr.Markdown("### <center>更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
360
+ # Information
361
+ with gr.Accordion("🎙️您的AI歌手Stefanie:"):
362
+ gr.Markdown(title_markdown)
363
+ with gr.Accordion("🎤您的AI歌手Vae:"):
364
+ gr.Markdown(title_markdown2)
365
+
366
+ # Tab YouTube Downloader
367
+ with gr.Tab("🤗 - B站视频提取声音"):
368
+ with gr.Row():
369
+ with gr.Column():
370
+ ydl_url_input = gr.Textbox(label="B站视频网址(请填写相应的BV号)", value = "https://www.bilibili.com/video/BV...")
371
+ start = gr.Number(value=0, label="起始时间 (秒)")
372
+ end = gr.Number(value=15, label="结束时间 (秒)")
373
+ ydl_url_submit = gr.Button("提取声音文件吧", variant="primary")
374
+ as_audio_submit = gr.Button("去除背景音吧", variant="primary")
375
+ with gr.Column():
376
+ ydl_audio_output = gr.Audio(label="Audio from Bilibili")
377
+ as_audio_input = ydl_audio_output
378
+ as_audio_vocals = gr.Audio(label="Vocal only")
379
+ as_audio_no_vocals = gr.Audio(label="Music only", type="filepath")
380
+ as_audio_message = gr.Textbox(label="Message", visible=False)
381
+
382
+ ydl_url_submit.click(fn=youtube_downloader, inputs=[ydl_url_input, start, end], outputs=[ydl_audio_output])
383
+ as_audio_submit.click(fn=audio_separated, inputs=[as_audio_input], outputs=[as_audio_vocals, as_audio_no_vocals, as_audio_message], show_progress=True, queue=True)
384
+
385
+ # Tab Voice Changer
386
+ with gr.Tab("🎶 - 歌声转换"):
387
+ with gr.Row():
388
+ with gr.Column():
389
+ vc_audio_input = as_audio_vocals
390
+ vc_model_index = gr.Dropdown(
391
+ [
392
+ '%s' % (
393
+ m['metadata'].get('name')
394
+ )
395
+ for m in loaded_models
396
+ ],
397
+ label='Models',
398
+ type='index'
399
+ )
400
+ vc_pitch_adjust = gr.Slider(label='Pitch', minimum=-24, maximum=24, step=1, value=0)
401
+ vc_f0_method = gr.Radio(label='F0 methods', choices=['pm', 'harvest'], value='pm', interactive=True)
402
+ vc_feat_ratio = gr.Slider(label='Feature ratio', minimum=0, maximum=1, step=0.1, value=0.6)
403
+ vc_audio_submit = gr.Button("进行歌声转换吧!", variant="primary")
404
+ full_song = gr.Button("加入歌曲伴奏吧!", variant="primary")
405
+ with gr.Column():
406
+ vc_audio_output = gr.Audio(label="Result audio", type="filepath")
407
+ vc_audio_message = gr.Textbox(label="Message")
408
+ new_song = gr.Audio(label="Full song", type="filepath")
409
+ vc_audio_submit.click(fn=voice_changer, inputs=[vc_audio_input, vc_model_index, vc_pitch_adjust, vc_f0_method, vc_feat_ratio], outputs=[vc_audio_output, vc_audio_message], show_progress=True, queue=True)
410
+ full_song.click(fn=mix, inputs=[vc_audio_output, as_audio_no_vocals], outputs=[new_song])
411
+ gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
412
+ gr.HTML('''
413
+ <div class="footer">
414
+ <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
415
+ </p>
416
+ </div>
417
+ ''')
418
+
419
+ # Launch
420
+ App.queue(concurrency_count=1, max_size=20).launch(server_name="0.0.0.0", server_port=7860)
421
+
422
+ # Enjoy
config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "models": [
3
+ "yanzi_sun",
4
+ "vae",
5
+ "stefanie_sun"
6
+ ]
7
+ }
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ psutil
2
+ pydub
3
+ demucs
4
+ scipy
5
+ yt-dlp
6
+ ffmpeg
7
+ torch
8
+ torchaudio
9
+ fairseq==0.12.2
10
+ scipy==1.9.3
11
+ pyworld>=0.3.2
12
+ faiss-cpu==1.7.2 ; python_version < "3.11"
13
+ faiss-cpu==1.7.3 ; python_version > "3.10"
14
+ praat-parselmouth>=0.4.3
15
+ librosa==0.9.2
16
+ edge_tts
stefanie_sun.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29401458ad1c81de89a37a7ea85bb3b539981ab70fc5201f56fca20417d8fa0
3
+ size 110050690
vae.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4973899deb5cdc8a8a84bbb270cb760b69be3d2c3bb188554adfa22d92c70fe
3
+ size 92628751
yanzi_sun.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3cd290654a3483818e51d7a768b6b9a79124a89a7b1d8b5a4773e6c8ee816b
3
+ size 107140442