MUSTAR's picture
Upload 2 files
7105a54 verified
CONDA_ROOT=/home/$(whoami)/miniconda3
source ${CONDA_ROOT}/etc/profile.d/conda.sh
conda activate contentvec
mkdir -p feature/lab
# Generate manifest files
python3 fairseq/examples/wav2vec/wav2vec_manifest.py dataset --dest feature --valid-percent 0.1
# Filter out files with silence and update manifests
python remove_silence_files.py feature/train.tsv feature/valid.tsv feature/filtered
cp feature/filtered/train.tsv feature/lab/train.tsv
cp feature/filtered/valid.tsv feature/lab/valid.tsv
# Continue with feature extraction
rm -rf fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py
cp dump_hubert_feature.py fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py
tsv_dir="feature/lab"
split="train"
ckpt_path="checkpoint_best_legacy_500.pt"
layer=12
nshard=1
rank=0
feat_dir="feature"
km_path="feature/${split}.km"
lab_dir="feature/lab"
n_clusters=100
python speaker.py
# Extract features
python fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py $tsv_dir $split $ckpt_path $layer $nshard $rank $feat_dir