cahya commited on
Commit
4b6823a
1 Parent(s): cf86933

updated the model and its readme file

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. config.json +3 -3
  3. pytorch_model.bin +2 -2
  4. vocab.json +1 -1
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
- value: 17.84
27
  ---
28
 
29
  # Wav2Vec2-Large-XLSR-Indonesian
@@ -117,10 +117,10 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
117
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
118
  ```
119
 
120
- **Test Result**: 17.84 %
121
 
122
  ## Training
123
 
124
- The Common Voice `train`, `validation`, and ... datasets were used for training as well as ... and ... # TODO
125
 
126
  The script used for training can be found [here](https://github.com/indonesian-nlp/wav2vec2-indonesian)
 
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
+ value: 14.29
27
  ---
28
 
29
  # Wav2Vec2-Large-XLSR-Indonesian
 
117
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
118
  ```
119
 
120
+ **Test Result**: 14.29 %
121
 
122
  ## Training
123
 
124
+ The Common Voice `train`, `validation`, and [synthetic voice datasets](https://cloud.uncool.ai/index.php/s/P5YiJ42GfxTQcm7) were used for training.
125
 
126
  The script used for training can be found [here](https://github.com/indonesian-nlp/wav2vec2-indonesian)
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
  "activation_dropout": 0.055,
4
  "apply_spec_augment": true,
5
  "architectures": [
@@ -70,7 +70,7 @@
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
- "pad_token_id": 30,
74
  "transformers_version": "4.5.0.dev0",
75
- "vocab_size": 31
76
  }
 
1
  {
2
+ "_name_or_path": "/root/Work/indonesian-speech-recognition/wav2vec2-large-xlsr-indonesian-artificial/epoch-30",
3
  "activation_dropout": 0.055,
4
  "apply_spec_augment": true,
5
  "architectures": [
 
70
  "num_conv_pos_embeddings": 128,
71
  "num_feat_extract_layers": 7,
72
  "num_hidden_layers": 24,
73
+ "pad_token_id": 27,
74
  "transformers_version": "4.5.0.dev0",
75
+ "vocab_size": 28
76
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cdef175f42faa1fb540d457cf11259bbf0c529ebf6f4ebb5e84c0ad3e25bf48
3
- size 1262060951
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ac83f343489f702212f47f54db0e5ca203e4ac6cef58b9f4c9d2f5e038fd75e
3
+ size 1262048599
vocab.json CHANGED
@@ -1 +1 @@
1
- {"n": 0, "l": 1, "u": 2, "j": 3, "d": 4, "s": 5, "r": 6, "x": 7, "m": 8, "é": 9, "k": 10, "": 11, "t": 12, "a": 13, "y": 15, "v": 16, "h": 17, "o": 18, "b": 19, "c": 20, "w": 21, "p": 22, "e": 23, "": 24, "f": 25, "g": 26, "i": 27, "z": 28, "|": 14, "[UNK]": 29, "[PAD]": 30}
 
1
+ {"m": 0, "h": 1, "r": 2, "e": 3, "z": 4, "a": 5, "k": 7, "g": 8, "s": 9, "x": 10, "l": 11, "f": 12, "b": 13, "v": 14, "n": 15, "c": 16, "d": 17, "p": 18, "o": 19, "t": 20, "y": 21, "w": 22, "i": 23, "j": 24, "u": 25, "|": 6, "[UNK]": 26, "[PAD]": 27}