Training in progress, step 400
Browse files- added_tokens.json +1 -1
- config.json +3 -11
- pytorch_model.bin +2 -2
- special_tokens_map.json +1 -1
- tokenizer_config.json +1 -1
- training_args.bin +2 -2
- vocab.json +1 -1
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<s>":
|
|
|
1 |
+
{"<s>": 177, "</s>": 178}
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "facebook/wav2vec2-
|
3 |
"activation_dropout": 0.0,
|
4 |
"adapter_kernel_size": 3,
|
5 |
"adapter_stride": 2,
|
@@ -59,20 +59,12 @@
|
|
59 |
"intermediate_size": 4096,
|
60 |
"layer_norm_eps": 1e-05,
|
61 |
"layerdrop": 0.0,
|
62 |
-
"mask_channel_length": 10,
|
63 |
-
"mask_channel_min_space": 1,
|
64 |
-
"mask_channel_other": 0.0,
|
65 |
-
"mask_channel_prob": 0.0,
|
66 |
-
"mask_channel_selection": "static",
|
67 |
"mask_feature_length": 10,
|
68 |
"mask_feature_min_masks": 0,
|
69 |
"mask_feature_prob": 0.0,
|
70 |
"mask_time_length": 10,
|
71 |
"mask_time_min_masks": 2,
|
72 |
-
"mask_time_min_space": 1,
|
73 |
-
"mask_time_other": 0.0,
|
74 |
"mask_time_prob": 0.05,
|
75 |
-
"mask_time_selection": "static",
|
76 |
"model_type": "wav2vec2",
|
77 |
"num_adapter_layers": 3,
|
78 |
"num_attention_heads": 16,
|
@@ -84,7 +76,7 @@
|
|
84 |
"num_hidden_layers": 24,
|
85 |
"num_negatives": 100,
|
86 |
"output_hidden_size": 1024,
|
87 |
-
"pad_token_id":
|
88 |
"proj_codevector_dim": 768,
|
89 |
"tdnn_dilation": [
|
90 |
1,
|
@@ -110,6 +102,6 @@
|
|
110 |
"torch_dtype": "float32",
|
111 |
"transformers_version": "4.16.0.dev0",
|
112 |
"use_weighted_layer_sum": false,
|
113 |
-
"vocab_size":
|
114 |
"xvector_output_dim": 512
|
115 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "facebook/wav2vec2-xls-r-300m",
|
3 |
"activation_dropout": 0.0,
|
4 |
"adapter_kernel_size": 3,
|
5 |
"adapter_stride": 2,
|
|
|
59 |
"intermediate_size": 4096,
|
60 |
"layer_norm_eps": 1e-05,
|
61 |
"layerdrop": 0.0,
|
|
|
|
|
|
|
|
|
|
|
62 |
"mask_feature_length": 10,
|
63 |
"mask_feature_min_masks": 0,
|
64 |
"mask_feature_prob": 0.0,
|
65 |
"mask_time_length": 10,
|
66 |
"mask_time_min_masks": 2,
|
|
|
|
|
67 |
"mask_time_prob": 0.05,
|
|
|
68 |
"model_type": "wav2vec2",
|
69 |
"num_adapter_layers": 3,
|
70 |
"num_attention_heads": 16,
|
|
|
76 |
"num_hidden_layers": 24,
|
77 |
"num_negatives": 100,
|
78 |
"output_hidden_size": 1024,
|
79 |
+
"pad_token_id": 176,
|
80 |
"proj_codevector_dim": 768,
|
81 |
"tdnn_dilation": [
|
82 |
1,
|
|
|
102 |
"torch_dtype": "float32",
|
103 |
"transformers_version": "4.16.0.dev0",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
+
"vocab_size": 179,
|
106 |
"xvector_output_dim": 512
|
107 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88f20f6e89a675b120eba8601af65ec70b8a6b3f20a32bf672b18ee257570478
|
3 |
+
size 1262657585
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "../
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "../wav2vec2-xls-r-300m-italian", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:417385b9ff1065433b0750b3d81a236a56bc44cd5710c59b255b972eb9b3a94e
|
3 |
+
size 3119
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<": 1, "=": 2, ">": 3, "[": 4, "]": 5, "_": 6, "`": 7, "a": 8, "b": 9, "c": 10, "d": 11, "e": 12, "f": 13, "g": 14, "h": 15, "i": 16, "j": 17, "k": 18, "l": 19, "m": 20, "n": 21, "o": 22, "p": 23, "q": 24, "r": 25, "s": 26, "t": 27, "u": 28, "v": 29, "w": 30, "x": 31, "y": 32, "z": 33, "
|
|
|
1 |
+
{"<": 1, "=": 2, ">": 3, "[": 4, "]": 5, "_": 6, "`": 7, "a": 8, "b": 9, "c": 10, "d": 11, "e": 12, "f": 13, "g": 14, "h": 15, "i": 16, "j": 17, "k": 18, "l": 19, "m": 20, "n": 21, "o": 22, "p": 23, "q": 24, "r": 25, "s": 26, "t": 27, "u": 28, "v": 29, "w": 30, "x": 31, "y": 32, "z": 33, "{": 34, "}": 35, "~": 36, "¡": 37, "«": 38, "°": 39, "´": 40, "µ": 41, "º": 42, "»": 43, "ß": 44, "à": 45, "á": 46, "ã": 47, "ä": 48, "å": 49, "æ": 50, "è": 51, "é": 52, "ê": 53, "ë": 54, "ì": 55, "í": 56, "î": 57, "ï": 58, "ð": 59, "ñ": 60, "ò": 61, "ó": 62, "ô": 63, "ö": 64, "ø": 65, "ù": 66, "ú": 67, "û": 68, "ü": 69, "þ": 70, "ÿ": 71, "ā": 72, "ą": 73, "ć": 74, "č": 75, "đ": 76, "ė": 77, "ę": 78, "ě": 79, "ğ": 80, "ħ": 81, "ī": 82, "ı": 83, "ľ": 84, "ł": 85, "ń": 86, "ň": 87, "ō": 88, "ő": 89, "œ": 90, "ř": 91, "ś": 92, "ş": 93, "š": 94, "ū": 95, "ŭ": 96, "ź": 97, "ż": 98, "ž": 99, "ș": 100, "ț": 101, "ə": 102, "ʹ": 103, "ʻ": 104, "ʼ": 105, "ʾ": 106, "ʿ": 107, "ː": 108, "̇": 109, "̨": 110, "а": 111, "б": 112, "д": 113, "е": 114, "л": 115, "н": 116, "о": 117, "с": 118, "у": 119, "ц": 120, "ъ": 121, "ё": 122, "љ": 123, "ң": 124, "ד": 125, "ה": 126, "ו": 127, "ة": 128, "ر": 129, "س": 130, "ص": 131, "غ": 132, "ل": 133, "ي": 134, "ḥ": 135, "ṛ": 136, "ṣ": 137, "ṭ": 138, "ễ": 139, "‑": 140, "–": 141, "—": 142, "’": 143, "„": 144, "…": 145, "′": 146, "☆": 147, "あ": 148, "ア": 149, "カ": 150, "キ": 151, "サ": 152, "ザ": 153, "ノ": 154, "フ": 155, "リ": 156, "ン": 157, "・": 158, "ー": 159, "万": 160, "三": 161, "丰": 162, "古": 163, "多": 164, "家": 165, "峰": 166, "張": 167, "旅": 168, "禅": 169, "ꞌ": 170, "심": 171, "악": 172, "음": 173, "중": 174, "|": 0, "[UNK]": 175, "[PAD]": 176}
|