|
|
|
|
|
classifier_free_guidance:
|
|
training_dropout: 0.2
|
|
inference_coef: 3.0
|
|
|
|
attribute_dropout:
|
|
args:
|
|
active_on_eval: false
|
|
text: {}
|
|
wav:
|
|
self_wav: 0.5
|
|
|
|
fuser:
|
|
cross_attention_pos_emb: false
|
|
cross_attention_pos_emb_scale: 1
|
|
sum: []
|
|
prepend: [self_wav, description]
|
|
cross: []
|
|
input_interpolate: []
|
|
|
|
conditioners:
|
|
self_wav:
|
|
model: chroma_stem
|
|
chroma_stem:
|
|
sample_rate: ${sample_rate}
|
|
n_chroma: 12
|
|
radix2_exp: 14
|
|
argmax: true
|
|
match_len_on_eval: false
|
|
eval_wavs: null
|
|
n_eval_wavs: 100
|
|
cache_path: null
|
|
description:
|
|
model: t5
|
|
t5:
|
|
name: t5-base
|
|
finetune: false
|
|
word_dropout: 0.2
|
|
normalize_text: false
|
|
|
|
dataset:
|
|
train:
|
|
merge_text_p: 0.25
|
|
drop_desc_p: 0.5
|
|
drop_other_p: 0.5
|
|
|