updated the model and its readme file
Browse files- README.md +3 -3
- config.json +3 -3
- pytorch_model.bin +2 -2
- vocab.json +1 -1
README.md
CHANGED
@@ -23,7 +23,7 @@ model-index:
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
-
value:
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Large-XLSR-Indonesian
|
@@ -117,10 +117,10 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
|
117 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
118 |
```
|
119 |
|
120 |
-
**Test Result**:
|
121 |
|
122 |
## Training
|
123 |
|
124 |
-
The Common Voice `train`, `validation`, and
|
125 |
|
126 |
The script used for training can be found [here](https://github.com/indonesian-nlp/wav2vec2-indonesian)
|
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
+
value: 14.29
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Large-XLSR-Indonesian
|
|
|
117 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
118 |
```
|
119 |
|
120 |
+
**Test Result**: 14.29 %
|
121 |
|
122 |
## Training
|
123 |
|
124 |
+
The Common Voice `train`, `validation`, and [synthetic voice datasets](https://cloud.uncool.ai/index.php/s/P5YiJ42GfxTQcm7) were used for training.
|
125 |
|
126 |
The script used for training can be found [here](https://github.com/indonesian-nlp/wav2vec2-indonesian)
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_dropout": 0.055,
|
4 |
"apply_spec_augment": true,
|
5 |
"architectures": [
|
@@ -70,7 +70,7 @@
|
|
70 |
"num_conv_pos_embeddings": 128,
|
71 |
"num_feat_extract_layers": 7,
|
72 |
"num_hidden_layers": 24,
|
73 |
-
"pad_token_id":
|
74 |
"transformers_version": "4.5.0.dev0",
|
75 |
-
"vocab_size":
|
76 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/root/Work/indonesian-speech-recognition/wav2vec2-large-xlsr-indonesian-artificial/epoch-30",
|
3 |
"activation_dropout": 0.055,
|
4 |
"apply_spec_augment": true,
|
5 |
"architectures": [
|
|
|
70 |
"num_conv_pos_embeddings": 128,
|
71 |
"num_feat_extract_layers": 7,
|
72 |
"num_hidden_layers": 24,
|
73 |
+
"pad_token_id": 27,
|
74 |
"transformers_version": "4.5.0.dev0",
|
75 |
+
"vocab_size": 28
|
76 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ac83f343489f702212f47f54db0e5ca203e4ac6cef58b9f4c9d2f5e038fd75e
|
3 |
+
size 1262048599
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"m": 0, "h": 1, "r": 2, "e": 3, "z": 4, "a": 5, "k": 7, "g": 8, "s": 9, "x": 10, "l": 11, "f": 12, "b": 13, "v": 14, "n": 15, "c": 16, "d": 17, "p": 18, "o": 19, "t": 20, "y": 21, "w": 22, "i": 23, "j": 24, "u": 25, "|": 6, "[UNK]": 26, "[PAD]": 27}
|