Spaces:
Runtime error
Runtime error
config: conf/tuning/train_asr_hubert_transformer_adam_specaug.yaml | |
print_config: false | |
log_level: INFO | |
dry_run: false | |
iterator_type: sequence | |
output_dir: exp/asr_train_asr_hubert_transformer_adam_specaug_raw_en_word | |
ngpu: 1 | |
seed: 0 | |
num_workers: 1 | |
num_att_plot: 3 | |
dist_backend: nccl | |
dist_init_method: env:// | |
dist_world_size: null | |
dist_rank: null | |
local_rank: 0 | |
dist_master_addr: null | |
dist_master_port: null | |
dist_launcher: null | |
multiprocessing_distributed: false | |
unused_parameters: false | |
sharded_ddp: false | |
cudnn_enabled: true | |
cudnn_benchmark: false | |
cudnn_deterministic: true | |
collect_stats: false | |
write_collected_feats: false | |
max_epoch: 200 | |
patience: null | |
val_scheduler_criterion: | |
- valid | |
- loss | |
early_stopping_criterion: | |
- valid | |
- loss | |
- min | |
best_model_criterion: | |
- - train | |
- loss | |
- min | |
- - valid | |
- loss | |
- min | |
- - train | |
- acc | |
- max | |
- - valid | |
- acc | |
- max | |
keep_nbest_models: 5 | |
grad_clip: 5.0 | |
grad_clip_type: 2.0 | |
grad_noise: false | |
accum_grad: 1 | |
no_forward_run: false | |
resume: true | |
train_dtype: float32 | |
use_amp: false | |
log_interval: null | |
use_tensorboard: true | |
use_wandb: false | |
wandb_project: null | |
wandb_id: null | |
wandb_entity: null | |
wandb_name: null | |
wandb_model_log_interval: -1 | |
detect_anomaly: false | |
pretrain_path: null | |
init_param: [] | |
ignore_init_mismatch: false | |
freeze_param: | |
- frontend.upstream | |
num_iters_per_epoch: null | |
batch_size: 20 | |
valid_batch_size: null | |
batch_bins: 1000000 | |
valid_batch_bins: null | |
train_shape_file: | |
- exp/asr_stats_raw_en_word/train/speech_shape | |
- exp/asr_stats_raw_en_word/train/text_shape.word | |
valid_shape_file: | |
- exp/asr_stats_raw_en_word/valid/speech_shape | |
- exp/asr_stats_raw_en_word/valid/text_shape.word | |
batch_type: folded | |
valid_batch_type: null | |
fold_length: | |
- 80000 | |
- 150 | |
sort_in_batch: descending | |
sort_batch: descending | |
multiple_iterator: false | |
chunk_length: 500 | |
chunk_shift_ratio: 0.5 | |
num_cache_chunks: 1024 | |
train_data_path_and_name_and_type: | |
- - dump/raw/train/wav.scp | |
- speech | |
- sound | |
- - dump/raw/train/text | |
- text | |
- text | |
valid_data_path_and_name_and_type: | |
- - dump/raw/valid/wav.scp | |
- speech | |
- sound | |
- - dump/raw/valid/text | |
- text | |
- text | |
allow_variable_data_keys: false | |
max_cache_size: 0.0 | |
max_cache_fd: 32 | |
valid_max_cache_size: null | |
optim: adam | |
optim_conf: | |
lr: 0.0002 | |
scheduler: warmuplr | |
scheduler_conf: | |
warmup_steps: 25000 | |
token_list: | |
- <blank> | |
- <unk> | |
- the | |
- Turn | |
- in | |
- lights | |
- 'on' | |
- up | |
- down | |
- temperature | |
- heat | |
- 'off' | |
- Switch | |
- increase_volume_none | |
- kitchen | |
- language | |
- decrease_volume_none | |
- bedroom | |
- washroom | |
- volume | |
- my | |
- to | |
- bathroom | |
- Decrease | |
- increase_heat_washroom | |
- decrease_heat_washroom | |
- Increase | |
- music | |
- heating | |
- Bring | |
- increase_heat_none | |
- decrease_heat_none | |
- me | |
- change_language_none_none | |
- activate_lights_washroom | |
- Set | |
- Lights | |
- activate_lights_kitchen | |
- I | |
- activate_music_none | |
- too | |
- it | |
- increase_heat_bedroom | |
- decrease_heat_bedroom | |
- sound | |
- increase_heat_kitchen | |
- decrease_heat_kitchen | |
- deactivate_music_none | |
- lamp | |
- Make | |
- deactivate_lights_bedroom | |
- deactivate_lights_kitchen | |
- bring_newspaper_none | |
- newspaper | |
- activate_lights_bedroom | |
- bring_socks_none | |
- socks | |
- bring_shoes_none | |
- shoes | |
- need | |
- Volume | |
- activate_lights_none | |
- deactivate_lights_none | |
- bring_juice_none | |
- juice | |
- deactivate_lights_washroom | |
- change_language_Chinese_none | |
- deactivate_lamp_none | |
- activate_lamp_none | |
- Kitchen | |
- turn | |
- some | |
- Could | |
- you | |
- Bedroom | |
- Go | |
- get | |
- Washroom | |
- Chinese | |
- phone's | |
- change_language_English_none | |
- Get | |
- change_language_Korean_none | |
- OK | |
- now | |
- switch | |
- main | |
- change_language_German_none | |
- practice | |
- Louder | |
- Stop | |
- loud | |
- increase | |
- Play | |
- hear | |
- Change | |
- quiet | |
- Bathroom | |
- Fetch | |
- Korean | |
- English | |
- German | |
- Pause | |
- Lamp | |
- Resume | |
- louder | |
- Heat | |
- audio | |
- Its | |
- loud, | |
- heating? | |
- Far | |
- a | |
- different | |
- please? | |
- decrease | |
- Too | |
- settings | |
- Put | |
- Start | |
- Quieter | |
- please | |
- Thats | |
- softer | |
- max | |
- mute | |
- lower | |
- phone | |
- couldn't | |
- anything, | |
- Reduce | |
- this, | |
- More | |
- That's | |
- Lower | |
- levels | |
- Use | |
- hotter | |
- languages | |
- Allow | |
- can't | |
- that | |
- Less | |
- system | |
- cooler | |
- This | |
- video | |
- is | |
- low, | |
- device | |
- Chinese. | |
- quieter | |
- English. | |
- Language | |
- Open | |
- German. | |
- Korean. | |
- <sos/eos> | |
init: null | |
input_size: null | |
ctc_conf: | |
dropout_rate: 0.0 | |
ctc_type: builtin | |
reduce: true | |
ignore_nan_grad: true | |
model_conf: | |
ctc_weight: 0.3 | |
lsm_weight: 0.1 | |
length_normalized_loss: false | |
extract_feats_in_collect_stats: false | |
use_preprocessor: true | |
token_type: word | |
bpemodel: null | |
non_linguistic_symbols: null | |
cleaner: null | |
g2p: null | |
speech_volume_normalize: null | |
rir_scp: null | |
rir_apply_prob: 1.0 | |
noise_scp: null | |
noise_apply_prob: 1.0 | |
noise_db_range: '13_15' | |
frontend: s3prl | |
frontend_conf: | |
frontend_conf: | |
upstream: hubert_large_ll60k | |
download_dir: ./hub | |
multilayer_feature: true | |
fs: 16k | |
specaug: specaug | |
specaug_conf: | |
apply_time_warp: true | |
time_warp_window: 5 | |
time_warp_mode: bicubic | |
apply_freq_mask: true | |
freq_mask_width_range: | |
- 0 | |
- 30 | |
num_freq_mask: 2 | |
apply_time_mask: true | |
time_mask_width_range: | |
- 0 | |
- 40 | |
num_time_mask: 2 | |
normalize: utterance_mvn | |
normalize_conf: {} | |
preencoder: linear | |
preencoder_conf: | |
input_size: 1024 | |
output_size: 80 | |
encoder: transformer | |
encoder_conf: | |
output_size: 256 | |
attention_heads: 4 | |
linear_units: 2048 | |
num_blocks: 12 | |
dropout_rate: 0.1 | |
positional_dropout_rate: 0.1 | |
attention_dropout_rate: 0.0 | |
input_layer: conv2d | |
normalize_before: true | |
postencoder: null | |
postencoder_conf: {} | |
decoder: transformer | |
decoder_conf: | |
attention_heads: 4 | |
linear_units: 2048 | |
num_blocks: 6 | |
dropout_rate: 0.1 | |
positional_dropout_rate: 0.1 | |
self_attention_dropout_rate: 0.0 | |
src_attention_dropout_rate: 0.0 | |
required: | |
- output_dir | |
- token_list | |
version: 0.10.3a2 | |
distributed: false | |