File size: 1,526 Bytes
c21d951 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
name: whisper_speech_recognition
config_type: model
vocab_size: 51865
num_mel_bins: 80
encoder_layers: 12
encoder_attention_heads: 12
decoder_layers: 12
decoder_attention_heads: 12
num_hidden_layers: 12
decoder_ffn_dim: 3072
encoder_ffn_dim: 3072
encoder_layerdrop: 0.0
decoder_layerdrop: 0.0
decoder_start_token_id: 50258
use_cache: true
sampling_rate: 16000
is_encoder_decoder: true
activation_function: gelu
d_model: 768
dropout: 0.0
torch_dtype: float32
attention_dropout: 0.0
activation_dropout: 0.0
init_std: 0.02
scale_embedding: false
max_source_positions: 1500
max_target_positions: 448
pad_token_id: 50257
bos_token_id: 50257
eos_token_id: 50257
suppress_tokens:
- 1
- 2
- 7
- 8
- 9
- 10
- 14
- 25
- 26
- 27
- 28
- 29
- 31
- 58
- 59
- 60
- 61
- 62
- 63
- 90
- 91
- 92
- 93
- 359
- 503
- 522
- 542
- 873
- 893
- 902
- 918
- 922
- 931
- 1350
- 1853
- 1982
- 2460
- 2627
- 3246
- 3253
- 3268
- 3536
- 3846
- 3961
- 4183
- 4667
- 6585
- 6647
- 7273
- 9061
- 9383
- 10428
- 10929
- 11938
- 12033
- 12331
- 12562
- 13793
- 14157
- 14635
- 15265
- 15618
- 16553
- 16604
- 18362
- 18956
- 20075
- 21675
- 22520
- 26130
- 26161
- 26435
- 28279
- 29464
- 31650
- 32302
- 32470
- 36865
- 42863
- 47425
- 49870
- 50254
- 50258
- 50360
- 50361
- 50362
begin_suppress_tokens:
- 220
- 50256
use_weighted_layer_sum: false
classifier_proj_size: 256
apply_spec_augment: false
mask_time_prob: 0.05
mask_time_length: 10
mask_time_min_masks: 2
mask_feature_prob: 0.0
mask_feature_length: 10
mask_feature_min_masks: 0
max_new_tokens: 448
|