File size: 4,200 Bytes
7048940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import numpy as np
import faiss
from sklearn.cluster import MiniBatchKMeans
import traceback

# Set the working directory
os.chdir('/content/RVC')

# Parameters
model_name = 'My-Voice'
dataset_folder = '/content/dataset'

def calculate_audio_duration(file_path):
    # Placeholder function - replace with actual implementation
    return 0

# Check cache status based on audio duration
try:
    duration = calculate_audio_duration(dataset_folder)
    cache = duration < 600
except:
    cache = False

# Ensure dataset folder is not empty
while len(os.listdir(dataset_folder)) < 1:
    input("Your dataset folder is empty.")

os.makedirs(f'./logs/{model_name}', exist_ok=True)

# Run the preprocessing script
os.system(f'python infer/modules/train/preprocess.py {dataset_folder} 32000 2 ./logs/{model_name} False 3.0 > /dev/null 2>&1')

with open(f'./logs/{model_name}/preprocess.log', 'r') as f:
    if 'end preprocess' in f.read():
        print("✔ Success")
    else:
        print("Error preprocessing data... Make sure your dataset folder is correct.")

f0method = "rmvpe_gpu"

# Run the feature extraction scripts
if f0method != "rmvpe_gpu":
    os.system(f'python infer/modules/train/extract/extract_f0_print.py ./logs/{model_name} 2 {f0method}')
else:
    os.system(f'python infer/modules/train/extract/extract_f0_rmvpe.py 1 0 0 ./logs/{model_name} True')

os.system(f'python infer/modules/train/extract_feature_print.py cuda:0 1 0 ./logs/{model_name} v2 True')

with open(f'./logs/{model_name}/extract_f0_feature.log', 'r') as f:
    if 'all-feature-done' in f.read():
        print("✔ Success")
    else:
        print("Error preprocessing data... Make sure your data was preprocessed.")

def train_index(exp_dir1, version19):
    exp_dir = f"logs/{exp_dir1}"
    os.makedirs(exp_dir, exist_ok=True)
    feature_dir = f"{exp_dir}/3_feature256" if version19 == "v1" else f"{exp_dir}/3_feature768"
    
    if not os.path.exists(feature_dir):
        return "请先进行特征提取!"
    
    listdir_res = list(os.listdir(feature_dir))
    if len(listdir_res) == 0:
        return "请先进行特征提取!"
    
    infos = []
    npys = []
    
    for name in sorted(listdir_res):
        phone = np.load(f"{feature_dir}/{name}")
        npys.append(phone)
    
    big_npy = np.concatenate(npys, 0)
    big_npy_idx = np.arange(big_npy.shape[0])
    np.random.shuffle(big_npy_idx)
    big_npy = big_npy[big_npy_idx]
    
    if big_npy.shape[0] > 2e5:
        infos.append(f"Trying doing kmeans {big_npy.shape[0]} shape to 10k centers.")
        yield "\n".join(infos)
        
        try:
            big_npy = MiniBatchKMeans(
                n_clusters=10000,
                verbose=True,
                batch_size=256,
                compute_labels=False,
                init="random"
            ).fit(big_npy).cluster_centers_
        except:
            info = traceback.format_exc()
            infos.append(info)
            yield "\n".join(infos)
    
    np.save(f"{exp_dir}/total_fea.npy", big_npy)
    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
    infos.append(f"{big_npy.shape},{n_ivf}")
    yield "\n".join(infos)
    
    index = faiss.index_factory(256 if version19 == "v1" else 768, f"IVF{n_ivf},Flat")
    infos.append("training")
    yield "\n".join(infos)
    
    index_ivf = faiss.extract_index_ivf(index)
    index_ivf.nprobe = 1
    index.train(big_npy)
    faiss.write_index(
        index,
        f"{exp_dir}/trained_IVF{n_ivf}_Flat_nprobe_{index_ivf.nprobe}_{exp_dir1}_{version19}.index"
    )
    
    infos.append("adding")
    yield "\n".join(infos)
    
    batch_size_add = 8192
    for i in range(0, big_npy.shape[0], batch_size_add):
        index.add(big_npy[i: i + batch_size_add])
    
    faiss.write_index(
        index,
        f"{exp_dir}/added_IVF{n_ivf}_Flat_nprobe_{index_ivf.nprobe}_{exp_dir1}_{version19}.index"
    )
    
    infos.append(f"成功构建索引,added_IVF{n_ivf}_Flat_nprobe_{index_ivf.nprobe}_{exp_dir1}_{version19}.index")

training_log = train_index(model_name, 'v2')

for line in training_log:
    print(line)
    if 'adding' in line:
        print("✔ Success")