jonatasgrosman
commited on
Commit
•
77c5e23
1
Parent(s):
9cc8676
add LM
Browse files- alphabet.json +1 -0
- config.json +0 -1
- language_model/attrs.json +1 -0
- language_model/lm.binary +3 -0
- language_model/unigrams.txt +3 -0
- preprocessor_config.json +2 -1
- vocab.json +1 -1
alphabet.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"labels": ["", "<s>", "</s>", "⁇", " ", "-", "ё", "а", "б", "в", "г", "д", "е", "ж", "з", "и", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", "ь", "э", "ю", "я"], "is_bpe": false}
|
config.json
CHANGED
@@ -48,7 +48,6 @@
|
|
48 |
"feat_proj_dropout": 0.05,
|
49 |
"feat_quantizer_dropout": 0.0,
|
50 |
"final_dropout": 0.0,
|
51 |
-
"gradient_checkpointing": true,
|
52 |
"hidden_act": "gelu",
|
53 |
"hidden_dropout": 0.05,
|
54 |
"hidden_size": 1024,
|
|
|
48 |
"feat_proj_dropout": 0.05,
|
49 |
"feat_quantizer_dropout": 0.0,
|
50 |
"final_dropout": 0.0,
|
|
|
51 |
"hidden_act": "gelu",
|
52 |
"hidden_dropout": 0.05,
|
53 |
"hidden_size": 1024,
|
language_model/attrs.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
|
language_model/lm.binary
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01a8fba99ad0d5157e060ea7bee83adb7bc7025606e6f98a783da1413b6edbaf
|
3 |
+
size 1486935543
|
language_model/unigrams.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7c72ae916afa54951fe3bd6094c95e2895c1c7881a20e02ef640aa27cdb4874
|
3 |
+
size 32849086
|
preprocessor_config.json
CHANGED
@@ -5,5 +5,6 @@
|
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
7 |
"return_attention_mask": true,
|
8 |
-
"sampling_rate": 16000
|
|
|
9 |
}
|
|
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
7 |
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000,
|
9 |
+
"processor_class": "Wav2Vec2ProcessorWithLM"
|
10 |
}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "-": 5, "
|
|
|
1 |
+
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "-": 5, "ё": 6, "а": 7, "б": 8, "в": 9, "г": 10, "д": 11, "е": 12, "ж": 13, "з": 14, "и": 15, "й": 16, "к": 17, "л": 18, "м": 19, "н": 20, "о": 21, "п": 22, "р": 23, "с": 24, "т": 25, "у": 26, "ф": 27, "х": 28, "ц": 29, "ч": 30, "ш": 31, "щ": 32, "ъ": 33, "ы": 34, "ь": 35, "э": 36, "ю": 37, "я": 38}
|