web-singer-2

Runtime error

App Files Files Community

kevinwang676 commited on May 14, 2023

Commit

5937d96

•

0 Parent(s):

Duplicate from kevinwang676/web-singer

Browse files

Files changed (7) hide show

.gitattributes +36 -0
Dockerfile +103 -0
README.md +11 -0
app.py +414 -0
config.json +5 -0
requirements.txt +15 -0
syz.zip +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+stefanie/added_IVF704_Flat_nprobe_1.index filter=lfs diff=lfs merge=lfs -text
+stefanie1 filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,103 @@

+# Copyright (c) 2023 Agung Wijaya
+# Installing Gradio via Dockerfile
+# pull docker
+FROM python:3.8.16-slim-bullseye
+# install virtualenv
+RUN apt update \
+    && apt install -y aria2 wget curl tree unzip ffmpeg build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# clean up
+RUN apt-get clean; \
+    rm -rf /etc/machine-id /var/lib/dbus/machine-id /var/lib/apt/lists/* /tmp/* /var/tmp/*; \
+    find /var/log -name "*.log" -type f -delete
+# set tmp
+RUN mkdir -p /content/tmp
+RUN chmod -R 777 /content/tmp
+RUN rm -rf /tmp
+RUN ln -s /content/tmp /tmp
+# make dir
+RUN mkdir -p /content
+RUN chmod -R 777 /content
+# try fix mplconfigdir
+RUN mkdir -p /content/mplconfig
+RUN chmod -R 777 /content/mplconfig
+# try fix
+# RuntimeError: cannot cache function '__shear_dense': no locator available for file '/usr/local/lib/python3.8/site-packages/librosa/util/utils.py'
+RUN mkdir -p /content/numbacache
+RUN chmod -R 777 /content/numbacache
+# try fix
+# PermissionError: [Errno 13] Permission denied: '/.cache' (demucs)
+RUN mkdir -p /content/demucscache
+RUN chmod -R 777 /content/demucscache
+RUN ln -s /content/demucscache /.cache
+# set workdir
+WORKDIR /content
+# set environment
+# PYTORCH_NO_CUDA_MEMORY_CACHING is can help users with even smaller RAM such as 2GB  (Demucs)
+ENV PYTORCH_NO_CUDA_MEMORY_CACHING=1 \
+    MPLCONFIGDIR=/content/mplconfig \
+    NUMBA_CACHE_DIR=/content/numbacache
+# upgrade pip
+RUN python -m pip install --no-cache-dir --upgrade pip
+# install library
+RUN pip install --no-cache-dir --upgrade gradio
+RUN pip install --no-cache-dir --upgrade setuptools wheel
+RUN pip install --no-cache-dir faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2
+# copying requirements.txt
+COPY requirements.txt /content/requirements.txt
+# install requirements
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# copying files
+COPY . .
+# download hubert_base
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content -o hubert_base.pt
+# download library infer_pack
+RUN mkdir -p infer_pack
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/attentions.py        -d /content/infer_pack -o attentions.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/commons.py           -d /content/infer_pack -o commons.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models.py            -d /content/infer_pack -o models.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models_onnx.py       -d /content/infer_pack -o models_onnx.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/models_onnx_moess.py -d /content/infer_pack -o models_onnx_moess.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/modules.py           -d /content/infer_pack -o modules.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://raw.githubusercontent.com/fumiama/Retrieval-based-Voice-Conversion-WebUI/main/infer_pack/transforms.py        -d /content/infer_pack -o transforms.py
+# download library infer_pipeline.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/vc_infer_pipeline.py -d /content -o vc_infer_pipeline.py
+# download library config.py and util.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/config.py -d /content -o config.py
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/spaces/DJQmUKV/rvc-inference/raw/main/util.py -d /content -o util.py
+# extract models
+RUN mkdir -p model
+RUN unzip -x syz.zip
+RUN mv -v syz model
+RUN rm syz.zip
+# check /tmp
+RUN ls -l /tmp
+# expose port gradio
+EXPOSE 7860
+# run app
+CMD ["python", "app.py"]
+# Enjoy run Gradio!

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Web UI
+emoji: 🌍
+colorFrom: pink
+colorTo: pink
+sdk: docker
+pinned: false
+duplicated_from: kevinwang676/web-singer
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,414 @@

+# Agung Wijaya - WebUI 2023 - Gradio
+# file app.py
+# Import
+import os
+import psutil
+import shutil
+import numpy as np
+import gradio as gr
+import subprocess
+from pathlib import Path
+import ffmpeg
+import json
+import re
+import time
+import random
+import torch
+import librosa
+import util
+from config import device
+from infer_pack.models import (
+    SynthesizerTrnMs256NSFsid,
+    SynthesizerTrnMs256NSFsid_nono
+)
+from vc_infer_pipeline import VC
+from typing import Union
+from os import path, getenv
+from datetime import datetime
+from scipy.io.wavfile import write
+# Reference: https://huggingface.co/spaces/zomehwh/rvc-models/blob/main/app.py#L21  # noqa
+in_hf_space = getenv('SYSTEM') == 'spaces'
+# Set High Quality (.wav) or not (.mp3)
+high_quality = True
+# Read config.json
+config_json = json.loads(open("config.json").read())
+# Load hubert model
+hubert_model = util.load_hubert_model(device, 'hubert_base.pt')
+hubert_model.eval()
+# Load models
+loaded_models = []
+for model_name in config_json.get('models'):
+    print(f'Loading model: {model_name}')
+    # Load model info
+    model_info = json.load(
+        open(path.join('model', model_name, 'config.json'), 'r')
+    )
+    # Load RVC checkpoint
+    cpt = torch.load(
+        path.join('model', model_name, model_info['model']),
+        map_location='cpu'
+    )
+    tgt_sr = cpt['config'][-1]
+    cpt['config'][-3] = cpt['weight']['emb_g.weight'].shape[0]  # n_spk
+    if_f0 = cpt.get('f0', 1)
+    net_g: Union[SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono]
+    if if_f0 == 1:
+        net_g = SynthesizerTrnMs256NSFsid(
+            *cpt['config'],
+            is_half=util.is_half(device)
+        )
+    else:
+        net_g = SynthesizerTrnMs256NSFsid_nono(*cpt['config'])
+    del net_g.enc_q
+    # According to original code, this thing seems necessary.
+    print(net_g.load_state_dict(cpt['weight'], strict=False))
+    net_g.eval().to(device)
+    net_g = net_g.half() if util.is_half(device) else net_g.float()
+    vc = VC(tgt_sr, device, util.is_half(device))
+    loaded_models.append(dict(
+        name=model_name,
+        metadata=model_info,
+        vc=vc,
+        net_g=net_g,
+        if_f0=if_f0,
+        target_sr=tgt_sr
+    ))
+print(f'Models loaded: {len(loaded_models)}')
+# Command line test
+def command_line_test():
+    command = "df -h /home/user/app"
+    process = subprocess.run(command.split(), stdout=subprocess.PIPE)
+    result  = process.stdout.decode()
+    return gr.HTML(value=result)
+# Check junk files && delete
+def check_junk():
+    # Find and delete all files after 10 minutes
+    os.system("find ./ytaudio/* -mmin +10 -delete")
+    os.system("find ./output/* -mmin +10 -delete")
+    os.system("find /tmp/gradio/* -mmin +5 -delete")
+    os.system("find /tmp/*.wav -mmin +5 -delete")
+    print("Junk files has been deleted!")
+# Function Information
+def information():
+    stats = os.system("du -s /content -h")
+    disk_usage  = "Disk usage: "+str(stats)
+    info = "<p>"+disk_usage+"<br/></p>"
+    return gr.HTML(value=info)
+# Function YouTube Downloader Audio
+def youtube_downloader(
+    video_identifier,
+    start_time,
+    end_time,
+    output_filename="track.wav",
+    num_attempts=5,
+    url_base="",
+    quiet=False,
+    force=True,
+):
+    output_path = Path(output_filename)
+    if output_path.exists():
+        if not force:
+            return output_path
+        else:
+            output_path.unlink()
+    quiet = "--quiet --no-warnings" if quiet else ""
+    command = f"""
+        yt-dlp {quiet} -x --audio-format wav -f bestaudio -o "{output_filename}" --download-sections "*{start_time}-{end_time}" "{url_base}{video_identifier}"  # noqa: E501
+    """.strip()
+    attempts = 0
+    while True:
+        try:
+            _ = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError:
+            attempts += 1
+            if attempts == num_attempts:
+                return None
+        else:
+            break
+    if output_path.exists():
+        return output_path
+    else:
+        return None
+# Function Audio Separated
+def audio_separated(audio_input, progress=gr.Progress()):
+    # start progress
+    progress(progress=0, desc="Starting...")
+    time.sleep(1)
+    # check file input
+    if audio_input is None:
+        # show progress
+        for i in progress.tqdm(range(100), desc="Please wait..."):
+            time.sleep(0.1)
+        return (None, None, 'Please input audio.')
+    # create filename
+    filename = str(random.randint(10000,99999))+datetime.now().strftime("%d%m%Y%H%M%S")
+    # progress
+    progress(progress=0.10, desc="Please wait...")
+    # make dir output
+    os.makedirs("output", exist_ok=True)
+    # progress
+    progress(progress=0.20, desc="Please wait...")
+    # write
+    if high_quality:
+        write(filename+".wav", audio_input[0], audio_input[1])
+    else:
+        write(filename+".mp3", audio_input[0], audio_input[1])
+    # progress
+    progress(progress=0.50, desc="Please wait...")
+    # demucs process
+    if high_quality:
+        command_demucs = "python3 -m demucs --two-stems=vocals -d cpu "+filename+".wav -o output"
+    else:
+        command_demucs = "python3 -m demucs --two-stems=vocals --mp3 --mp3-bitrate 128 -d cpu "+filename+".mp3 -o output"
+    os.system(command_demucs)
+    # progress
+    progress(progress=0.70, desc="Please wait...")
+    # remove file audio
+    if high_quality:
+        command_delete = "rm -v ./"+filename+".wav"
+    else:
+        command_delete = "rm -v ./"+filename+".mp3"
+    os.system(command_delete)
+    # progress
+    progress(progress=0.80, desc="Please wait...")
+    # progress
+    for i in progress.tqdm(range(80,100), desc="Please wait..."):
+        time.sleep(0.1)
+    if high_quality:
+        return "./output/htdemucs/"+filename+"/vocals.wav","./output/htdemucs/"+filename+"/no_vocals.wav","Successfully..."
+    else:
+        return "./output/htdemucs/"+filename+"/vocals.mp3","./output/htdemucs/"+filename+"/no_vocals.mp3","Successfully..."
+# Function Voice Changer
+def voice_changer(audio_input, model_index, pitch_adjust, f0_method, feat_ratio, progress=gr.Progress()):
+    # start progress
+    progress(progress=0, desc="Starting...")
+    time.sleep(1)
+    # check file input
+    if audio_input is None:
+        # progress
+        for i in progress.tqdm(range(100), desc="Please wait..."):
+            time.sleep(0.1)
+        return (None, 'Please input audio.')
+    # check model input
+    if model_index is None:
+        # progress
+        for i in progress.tqdm(range(100), desc="Please wait..."):
+            time.sleep(0.1)
+        return (None, 'Please select a model.')
+    model = loaded_models[model_index]
+    # Reference: so-vits
+    (audio_samp, audio_npy) = audio_input
+    # progress
+    progress(progress=0.10, desc="Please wait...")
+    # https://huggingface.co/spaces/zomehwh/rvc-models/blob/main/app.py#L49
+    if (audio_npy.shape[0] / audio_samp) > 60 and in_hf_space:
+        # progress
+        for i in progress.tqdm(range(10,100), desc="Please wait..."):
+            time.sleep(0.1)
+        return (None, 'Input audio is longer than 60 secs.')
+    # Bloody hell: https://stackoverflow.com/questions/26921836/
+    if audio_npy.dtype != np.float32:  # :thonk:
+        audio_npy = (
+            audio_npy / np.iinfo(audio_npy.dtype).max
+        ).astype(np.float32)
+    # progress
+    progress(progress=0.30, desc="Please wait...")
+    if len(audio_npy.shape) > 1:
+        audio_npy = librosa.to_mono(audio_npy.transpose(1, 0))
+    # progress
+    progress(progress=0.40, desc="Please wait...")
+    if audio_samp != 16000:
+        audio_npy = librosa.resample(
+            audio_npy,
+            orig_sr=audio_samp,
+            target_sr=16000
+        )
+    # progress
+    progress(progress=0.50, desc="Please wait...")
+    pitch_int = int(pitch_adjust)
+    times = [0, 0, 0]
+    output_audio = model['vc'].pipeline(
+        hubert_model,
+        model['net_g'],
+        model['metadata'].get('speaker_id', 0),
+        audio_npy,
+        times,
+        pitch_int,
+        f0_method,
+        path.join('model', model['name'], model['metadata']['feat_index']),
+        path.join('model', model['name'], model['metadata']['feat_npy']),
+        feat_ratio,
+        model['if_f0']
+    )
+    # progress
+    progress(progress=0.80, desc="Please wait...")
+    print(f'npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s')
+    # progress
+    for i in progress.tqdm(range(80,100), desc="Please wait..."):
+        time.sleep(0.1)
+    return ((model['target_sr'], output_audio), 'Successfully...')
+# Function Text to Voice
+def text_to_voice(text_input, model_index):
+    # start progress
+    progress(progress=0, desc="Starting...")
+    time.sleep(1)
+    # check text input
+    if text_input is None:
+        # progress
+        for i in progress.tqdm(range(2,100), desc="Please wait..."):
+            time.sleep(0.1)
+        return (None, 'Please write text.')
+    # check model input
+    if model_index is None:
+        # progress
+        for i in progress.tqdm(range(2,100), desc="Please wait..."):
+            time.sleep(0.1)
+        return (None, 'Please select a model.')
+    # progress
+    for i in progress.tqdm(range(2,100), desc="Please wait..."):
+        time.sleep(0.1)
+    return None, "Sorry, you can't use it yet because this program is being developed!"
+# Themes
+theme = gr.themes.Base()
+# CSS
+css = "footer {visibility: hidden}"
+# Blocks
+with gr.Blocks(theme=theme, css=css) as App:
+    # Header
+    gr.HTML("<center>"
+            "<h1>Web UI Tools - Agung Wijaya</h1>"
+            "</center>")
+    # Information
+    with gr.Accordion("Just information!"):
+        information()
+    # Tab YouTube Downloader
+    with gr.Tab("YouTube Video to Audio"):
+        with gr.Row():
+            with gr.Column():
+                ydl_url_input  = gr.Textbox(label="Enter URL YouTube")
+                start = gr.Number(value=0, label="Start Time (seconds)")
+                end = gr.Number(value=15, label="End Time (seconds)")
+                ydl_url_submit = gr.Button("Convert Now", variant="primary")
+            with gr.Column():
+                ydl_audio_output = gr.Audio(label="Audio from YouTube")
+        with gr.Row():
+            with gr.Column():
+                as_audio_input  = ydl_audio_output
+                as_audio_submit = gr.Button("Separated Now", variant="primary")
+            with gr.Column():
+                as_audio_vocals    = gr.Audio(label="Vocal only")
+                as_audio_no_vocals = gr.Audio(label="Music only")
+                as_audio_message   = gr.Textbox(label="Message", visible=False)
+    ydl_url_submit.click(fn=youtube_downloader, inputs=[ydl_url_input, start, end], outputs=[ydl_audio_output])
+    as_audio_submit.click(fn=audio_separated, inputs=[as_audio_input], outputs=[as_audio_vocals, as_audio_no_vocals, as_audio_message], show_progress=True, queue=True)
+    # Tab Voice Changer
+    with gr.Tab("Voice to AI Models"):
+        with gr.Row():
+            with gr.Column():
+                vc_audio_input  = as_audio_vocals
+                vc_model_index  = gr.Dropdown(
+                    [
+                        '%s' % (
+                            m['metadata'].get('name')
+                        )
+                        for m in loaded_models
+                    ],
+                    label='Models',
+                    type='index'
+                )
+                vc_pitch_adjust = gr.Slider(label='Pitch', minimum=-24, maximum=24, step=1, value=0)
+                vc_f0_method    = gr.Radio(label='F0 methods', choices=['pm', 'harvest'], value='pm', interactive=True)
+                vc_feat_ratio   = gr.Slider(label='Feature ratio', minimum=0, maximum=1, step=0.1, value=0.6)
+                vc_audio_submit = gr.Button("Convert Now", variant="primary")
+            with gr.Column():
+                vc_audio_output  = gr.Audio(label="Result audio", type="numpy")
+                vc_audio_message = gr.Textbox(label="Message")
+    vc_audio_submit.click(fn=voice_changer, inputs=[vc_audio_input, vc_model_index, vc_pitch_adjust, vc_f0_method, vc_feat_ratio], outputs=[vc_audio_output, vc_audio_message], show_progress=True, queue=True)
+# Check Junk
+check_junk()
+# Launch
+App.queue(concurrency_count=1, max_size=20).launch(server_name="0.0.0.0", server_port=7860)
+# Enjoy

config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "models": [
+        "yanzi"
+    ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+psutil
+demucs
+scipy
+yt-dlp
+ffmpeg
+torch
+torchaudio
+fairseq==0.12.2
+scipy==1.9.3
+pyworld>=0.3.2
+faiss-cpu==1.7.2 ; python_version < "3.11"
+faiss-cpu==1.7.3 ; python_version > "3.10"
+praat-parselmouth>=0.4.3
+librosa==0.9.2
+edge_tts

syz.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db3789fc814af195c129094cd480d4b98542abe63393d147ab5484213c315e69
+size 110561233