Mofe commited on
Commit
7c36805
1 Parent(s): 2a24af1
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - ha
4
+ license: apache-2.0
5
+ tags:
6
+ - automatic-speech-recognition
7
+ - mozilla-foundation/common_voice_8_0
8
+ - generated_from_trainer
9
+ - "ha"
10
+ - "robust-speech-event"
11
+ datasets:
12
+ - common_voice
13
+ model-index:
14
+ - name: ''
15
+ results: []
16
+ ---
17
+
18
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
+ should probably proofread and complete it, then remove this comment. -->
20
+
21
+ #
22
+
23
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - HA dataset.
24
+ It achieves the following results on the evaluation set:
25
+ - Loss: 0.4925
26
+ - Wer: 0.5714
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 7.5e-05
46
+ - train_batch_size: 8
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 4
50
+ - total_train_batch_size: 32
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_steps: 2000
54
+ - num_epochs: 80.0
55
+ - mixed_precision_training: Native AMP
56
+
57
+ ### Training results
58
+
59
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
60
+ |:-------------:|:-----:|:----:|:---------------:|:------:|
61
+ | 3.1674 | 8.33 | 500 | 3.0295 | 1.0 |
62
+ | 2.6987 | 16.66 | 1000 | 2.6878 | 1.0 |
63
+ | 1.3454 | 24.99 | 1500 | 0.6814 | 0.6981 |
64
+ | 1.1227 | 33.33 | 2000 | 0.5791 | 0.6513 |
65
+ | 0.9972 | 41.66 | 2500 | 0.5235 | 0.5718 |
66
+ | 0.9123 | 49.99 | 3000 | 0.5104 | 0.5633 |
67
+ | 0.836 | 58.33 | 3500 | 0.4927 | 0.5580 |
68
+ | 0.7725 | 66.66 | 4000 | 0.5078 | 0.5779 |
69
+ | 0.7297 | 74.99 | 4500 | 0.4939 | 0.5737 |
70
+
71
+
72
+ ### Framework versions
73
+
74
+ - Transformers 4.17.0.dev0
75
+ - Pytorch 1.10.2+cu113
76
+ - Datasets 1.18.4.dev0
77
+ - Tokenizers 0.11.0
.ipynb_checkpoints/eval-checkpoint.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import re
4
+ from typing import Dict
5
+
6
+ import torch
7
+ from datasets import Audio, Dataset, load_dataset, load_metric
8
+
9
+ from transformers import AutoFeatureExtractor, pipeline
10
+
11
+
12
+ def log_results(result: Dataset, args: Dict[str, str]):
13
+ """DO NOT CHANGE. This function computes and logs the result metrics."""
14
+
15
+ log_outputs = args.log_outputs
16
+ dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
17
+
18
+ # load metric
19
+ wer = load_metric("wer")
20
+ cer = load_metric("cer")
21
+
22
+ # compute metrics
23
+ wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
24
+ cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
25
+
26
+ # print & log results
27
+ result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
28
+ print(result_str)
29
+
30
+ with open(f"{dataset_id}_eval_results.txt", "w") as f:
31
+ f.write(result_str)
32
+
33
+ # log all results in text file. Possibly interesting for analysis
34
+ if log_outputs is not None:
35
+ pred_file = f"log_{dataset_id}_predictions.txt"
36
+ target_file = f"log_{dataset_id}_targets.txt"
37
+
38
+ with open(pred_file, "w") as p, open(target_file, "w") as t:
39
+
40
+ # mapping function to write output
41
+ def write_to_file(batch, i):
42
+ p.write(f"{i}" + "\n")
43
+ p.write(batch["prediction"] + "\n")
44
+ t.write(f"{i}" + "\n")
45
+ t.write(batch["target"] + "\n")
46
+
47
+ result.map(write_to_file, with_indices=True)
48
+
49
+
50
+ def normalize_text(text: str) -> str:
51
+ """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
52
+
53
+ chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
54
+
55
+ text = re.sub(chars_to_ignore_regex, "", text.lower())
56
+
57
+ # In addition, we can normalize the target text, e.g. removing new lines characters etc...
58
+ # note that order is important here!
59
+ token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
60
+
61
+ for t in token_sequences_to_ignore:
62
+ text = " ".join(text.split(t))
63
+
64
+ return text
65
+
66
+
67
+ def main(args):
68
+ # load dataset
69
+ dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
70
+
71
+ # for testing: only process the first two examples as a test
72
+ # dataset = dataset.select(range(10))
73
+
74
+ # load processor
75
+ feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
76
+ sampling_rate = feature_extractor.sampling_rate
77
+
78
+ # resample audio
79
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
80
+
81
+ # load eval pipeline
82
+ if args.device is None:
83
+ args.device = 0 if torch.cuda.is_available() else -1
84
+ asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
85
+
86
+ # map function to decode audio
87
+ def map_to_pred(batch):
88
+ prediction = asr(
89
+ batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
90
+ )
91
+
92
+ batch["prediction"] = prediction["text"]
93
+ batch["target"] = normalize_text(batch["sentence"])
94
+ return batch
95
+
96
+ # run inference on all examples
97
+ result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
98
+
99
+ # compute and log_results
100
+ # do not change function below
101
+ log_results(result, args)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ parser = argparse.ArgumentParser()
106
+
107
+ parser.add_argument(
108
+ "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
109
+ )
110
+ parser.add_argument(
111
+ "--dataset",
112
+ type=str,
113
+ required=True,
114
+ help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
115
+ )
116
+ parser.add_argument(
117
+ "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
118
+ )
119
+ parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
120
+ parser.add_argument(
121
+ "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
122
+ )
123
+ parser.add_argument(
124
+ "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second."
125
+ )
126
+ parser.add_argument(
127
+ "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
128
+ )
129
+ parser.add_argument(
130
+ "--device",
131
+ type=int,
132
+ default=None,
133
+ help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
134
+ )
135
+ args = parser.parse_args()
136
+
137
+ main(args)
README.md CHANGED
@@ -6,6 +6,8 @@ tags:
6
  - automatic-speech-recognition
7
  - mozilla-foundation/common_voice_8_0
8
  - generated_from_trainer
 
 
9
  datasets:
10
  - common_voice
11
  model-index:
 
6
  - automatic-speech-recognition
7
  - mozilla-foundation/common_voice_8_0
8
  - generated_from_trainer
9
+ - "ha"
10
+ - "robust-speech-event"
11
  datasets:
12
  - common_voice
13
  model-index:
eval.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import re
4
+ from typing import Dict
5
+
6
+ import torch
7
+ from datasets import Audio, Dataset, load_dataset, load_metric
8
+
9
+ from transformers import AutoFeatureExtractor, pipeline
10
+
11
+
12
+ def log_results(result: Dataset, args: Dict[str, str]):
13
+ """DO NOT CHANGE. This function computes and logs the result metrics."""
14
+
15
+ log_outputs = args.log_outputs
16
+ dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
17
+
18
+ # load metric
19
+ wer = load_metric("wer")
20
+ cer = load_metric("cer")
21
+
22
+ # compute metrics
23
+ wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
24
+ cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
25
+
26
+ # print & log results
27
+ result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
28
+ print(result_str)
29
+
30
+ with open(f"{dataset_id}_eval_results.txt", "w") as f:
31
+ f.write(result_str)
32
+
33
+ # log all results in text file. Possibly interesting for analysis
34
+ if log_outputs is not None:
35
+ pred_file = f"log_{dataset_id}_predictions.txt"
36
+ target_file = f"log_{dataset_id}_targets.txt"
37
+
38
+ with open(pred_file, "w") as p, open(target_file, "w") as t:
39
+
40
+ # mapping function to write output
41
+ def write_to_file(batch, i):
42
+ p.write(f"{i}" + "\n")
43
+ p.write(batch["prediction"] + "\n")
44
+ t.write(f"{i}" + "\n")
45
+ t.write(batch["target"] + "\n")
46
+
47
+ result.map(write_to_file, with_indices=True)
48
+
49
+
50
+ def normalize_text(text: str) -> str:
51
+ """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
52
+
53
+ chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
54
+
55
+ text = re.sub(chars_to_ignore_regex, "", text.lower())
56
+
57
+ # In addition, we can normalize the target text, e.g. removing new lines characters etc...
58
+ # note that order is important here!
59
+ token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
60
+
61
+ for t in token_sequences_to_ignore:
62
+ text = " ".join(text.split(t))
63
+
64
+ return text
65
+
66
+
67
+ def main(args):
68
+ # load dataset
69
+ dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
70
+
71
+ # for testing: only process the first two examples as a test
72
+ # dataset = dataset.select(range(10))
73
+
74
+ # load processor
75
+ feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
76
+ sampling_rate = feature_extractor.sampling_rate
77
+
78
+ # resample audio
79
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
80
+
81
+ # load eval pipeline
82
+ if args.device is None:
83
+ args.device = 0 if torch.cuda.is_available() else -1
84
+ asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
85
+
86
+ # map function to decode audio
87
+ def map_to_pred(batch):
88
+ prediction = asr(
89
+ batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
90
+ )
91
+
92
+ batch["prediction"] = prediction["text"]
93
+ batch["target"] = normalize_text(batch["sentence"])
94
+ return batch
95
+
96
+ # run inference on all examples
97
+ result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
98
+
99
+ # compute and log_results
100
+ # do not change function below
101
+ log_results(result, args)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ parser = argparse.ArgumentParser()
106
+
107
+ parser.add_argument(
108
+ "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
109
+ )
110
+ parser.add_argument(
111
+ "--dataset",
112
+ type=str,
113
+ required=True,
114
+ help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
115
+ )
116
+ parser.add_argument(
117
+ "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
118
+ )
119
+ parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
120
+ parser.add_argument(
121
+ "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
122
+ )
123
+ parser.add_argument(
124
+ "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second."
125
+ )
126
+ parser.add_argument(
127
+ "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
128
+ )
129
+ parser.add_argument(
130
+ "--device",
131
+ type=int,
132
+ default=None,
133
+ help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
134
+ )
135
+ args = parser.parse_args()
136
+
137
+ main(args)
log_mozilla-foundation_common_voice_7_0_ha_test_predictions.txt ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0
2
+ kacin haɗe ye ke da kyautar
3
+ 1
4
+ yalzu waɗabiu ar matasa ra sunya tkanyu fata rauta
5
+ 2
6
+ za ta iya jin ciwo inan tayi haka
7
+ 3
8
+ shn bawane abuda za su yi ranarjin mua
9
+ 4
10
+ ganyan yaki da suk bushe suna iyu a sama ruwa
11
+ 5
12
+ ba wanda ya san yusuf tsohon muƙaryaci ne
13
+ 6
14
+ na ji da din ha ɗuwa da kai mustapha
15
+ 7
16
+ ya kuna wuter tun ka finya higackin kogon
17
+ 8
18
+ menene sunan kan
19
+ 9
20
+ da yau zan kirata
21
+ 10
22
+ zuwa gobar safe yarinyar ta kammala
23
+ 11
24
+ ina maban aure shi ba
25
+ 12
26
+ habibu ya nuna bai san kasan cewa nan
27
+ 13
28
+ aban farin cik akwai tsara rbutunim samun gamkafi
29
+ 14
30
+ ban taba kiran shi da wawa ba
31
+ 15
32
+ ban san yadda ake kamun kefi ba
33
+ 16
34
+ an kama matashin ne da hanno cikin wanirkici
35
+ 17
36
+ ina zama a wani ƙaramin ƙauye kiloyit hamsun tsekaninsu da birnii
37
+ 18
38
+ jalal ya rubu ta labari mai tashin hankali da ke da ƙarshe mai furin ciki
39
+ 19
40
+ zan buƙaci kuɗi fee da haga
41
+ 20
42
+ ina ayaki cikin aboka ni kuma ina rayowa cikin littafi
43
+ 21
44
+ muna huta wa tsirara a cikin ya shimai ɗume
45
+ 22
46
+ bana tsammanin jami na da tabbaci abin da laditu ki sanyi
47
+ 23
48
+ an haranta ma ɗalibai shan taba a farfe jir makarantar
49
+ 24
50
+ thia fade are a jaridocewa lafi ne na san zuca
51
+ 25
52
+ shibana san banyi zain dadacewa
53
+ 26
54
+ wajen ya yi kacikaca
55
+ 27
56
+ twannan uzare bekina barin aicewa
57
+ 28
58
+ sun dage sa sun yi tafiya zuwa ƙasar betnancikin kwanaki shi da kacall
59
+ 29
60
+ kamar be faruba
61
+ 30
62
+ marsirs hina ya finaka girma
63
+ 31
64
+ ya yi koƙarno na shijarime ne bayo an yagar kuwad she
65
+ 32
66
+ tu lokacin da sabe ta watu mijintya shiga cikin zurfin tunani
67
+ 33
68
+ kungiyar haɗaka da kan farin sum amince da sabon kungila
69
+ 34
70
+ ka kula da wannan labarin
71
+ 35
72
+ ko akwai ciwa sosa kirjin kuna ɓangare hagu
73
+ 36
74
+ abin na da ban tsoro
75
+ 37
76
+ saide shagon be da wani girma sosai
77
+ 38
78
+ ki tabbatar ba gitutar ra kankiwa
79
+ 39
80
+ za mu iya ba tare da taimakon ibrahimba
81
+ 40
82
+ aliyu ba ta ya tan tamar samun muta ne dewa bi haka
83
+ 41
84
+ iran ki yi sau ɗaya tu ka ki ƙaraye
85
+ 42
86
+ hadizu am ba ƙuwa ce a garnan ko ba haka ba
87
+ 43
88
+ jarumin ya mata a kanki masoyiya tasa raunya ta
89
+ 44
90
+ na so rarren zama tsawon yade shida mesa da cutun
91
+ 45
92
+ nayi na son ya abo zuwa tsaker doniya
93
+ 46
94
+ kuma kunce wannan matsin lambane a girjenku
95
+ 47
96
+ loletai rawa tare da gris
97
+ 48
98
+ amin cewa adaye mashe me sirrin haƙurnta
99
+ 49
100
+ ina turami isha ku dajumainazargi
101
+ 50
102
+ bayan kwaseshekaurugoma suna kaswanjin haɗan gwiwa sun yanke ukunshin rabawa
103
+ 51
104
+ kan fanumo ana talarci tallace me shekara shekara na yan dubu milyan
105
+ 52
106
+ abokina na son ya amsa laifi
107
+ 53
108
+ fasan banbanci sakanin silba da gongane
109
+ 54
110
+ rana tana bayaddazafi da haske da yawa
111
+ 55
112
+ yusus ya faɗawa lare yadda suku haɗo da hassan
113
+ 56
114
+ ƙwas lokuton gudin sira dara yakan bar mutun ba komai
115
+ 57
116
+ aliyu ya daɗe yan yin wannan aiki
117
+ 58
118
+ akwai mamaki a ce bai san labarin ba
119
+ 59
120
+ linda t gano ishakune mutu minda yayi mata fiaɗe
121
+ 60
122
+ yau ina fama da ciwon kirji
123
+ 61
124
+ fana karattun nwa san kwi kwayogame da tallafin karato na musa man
125
+ 62
126
+ an bukatar ɗalibai suyi aikin saa ɗayaga alƙuma a cowone sate
127
+ 63
128
+ ya jirwon saman ya a fuskarsan
129
+ 64
130
+ zazzaɓin ya fara kwanaki biyu da sukauce
131
+ 65
132
+ mutuka za me
133
+ 66
134
+ kozei ya shakwya k kuma ya haukace
135
+ 67
136
+ hbibu asabe hassan da ishenduk suna magana da faransanci
137
+ 68
138
+ ba na jin jauro zai jeba ki ruwa yao
139
+ 69
140
+ ƙoso lala tjjerin makamae
141
+ 70
142
+ ba kwa buƙatur shirya wani muhi min jawabi
143
+ 71
144
+ abdullai na ɗe daga cikin man malaƙan wanna gini
145
+ 72
146
+ haki ƙa binccin gize hrfar da ɗame ilo
147
+ 73
148
+ ƙarin daya gaba zu iya iso waje na ba ya fara fushi
149
+ 74
150
+ e ina fama da ciwo cikin kirji na sosai
151
+ 75
152
+ ynzu dama ce mai kyaut hukunta ishku game da abinda ya aikata
153
+ 76
154
+ ki bari mustapha ya sai make wannan
155
+ 77
156
+ bana tsammanin jauro da lamina da wahalar shaani
157
+ 78
158
+ bayin sati ɗaya abdullahi ya gundi ramutane
159
+ 79
160
+ zan tambe ya shigobe
161
+ 80
162
+ dafatin alat alaya farentmakarai
163
+ 81
164
+ lokacin da ya kai mata hari tana neman makullin cikin jakarta
165
+ 82
166
+ shin kun jila barin mu
167
+ 83
168
+ gamaskiya bane da wata alama
169
+ 84
170
+ na yi tunanin zan same coacen
171
+ 85
172
+ ha yaƙin yana shaƙeta
173
+ 86
174
+ kaɗan shakofi ina tunanin yana da daɗi
175
+ 87
176
+ ina bukaton ku bai yana min wani abu
177
+ 88
178
+ yin haka zai fidaɗi ko ma kuka gani
179
+ 89
180
+ nima na ji
181
+ 90
182
+ aliyu na tsoron yin magana dane ko ba haka ba
183
+ 91
184
+ kamar bitrus ya gaji sosai
185
+ 92
186
+ zan so idan hakan bai faru ba
187
+ 93
188
+ na damuƙorai da wannan za sin kirjin
189
+ 94
190
+ kina sanyi da kayan
191
+ 95
192
+ an gudanar da fatin ban kwana jiya sabida mrjons
193
+ 96
194
+ yabutun uskrin me chkuleti bayan cin abinci
195
+ 97
196
+ ina tsoron kada kucutar da alyu
197
+ 98
198
+ sunsayena cikin kananan kware
199
+ 99
200
+ ina collontunsaye lokacin dana ke zauni a borcon ina shan kofi
201
+ 100
202
+ babangida ya isalendon tare da wasu gungun masana
203
+ 101
204
+ ibrahim ya ce ya yi sammani hauwatu ta yi mamake
205
+ 102
206
+ an ce na seyoke ka hanyar dowowa ta daga ofis
207
+ 103
208
+ ya mubun soros da ya ga caton macijin
209
+ 104
210
+ abdullahi ya sancewa zai iya yin haka ɗikkyau
211
+ 105
212
+ na gagi kuma ina jin yanwa kuma haka kowa ye kerje
213
+ 106
214
+ zan komo kansa
215
+ 107
216
+ amurka zatu fice dagayarjejeniyar fris
217
+ 108
218
+ ina tunanin ya kamatu na faɗawa alik wajen da naji
219
+ 109
220
+ habibu ya iya wakan
221
+ 110
222
+ na yi da riyaso saaarda ciwon ciki
223
+ 111
224
+ na san hayenzu yina cin
225
+ 112
226
+ ina nin zafi a kirgi
227
+ 113
228
+ baburiga ka fi amma hukumomii da ban da ban suna ƙoƙarin samoriga kafin
229
+ 114
230
+ babangida ya ƙwre a wasan tenis
231
+ 115
232
+ ibrahim ya ce bya tunanin ko akwai wanda zi iya haka
233
+ 116
234
+ zukuma kuna da zazzaɓi yanzu
235
+ 117
236
+ ka gaida min da matarka
237
+ 118
238
+ birus ya fara farin ciki bayan yahar ba ƙwallin a ragarsa
239
+ 119
240
+ akwai buƙatar ƙarabin cike
241
+ 120
242
+ ban san hassan da mai muna ba su da lafiya ba
243
+ 121
244
+ zah a ƙara buƙatar gwaji nin gaba
245
+ 122
246
+ na gaji kasuwancin daga ma haishina
247
+ 123
248
+ gina zafi a tsakkiyar kirjina
249
+ 124
250
+ kana sun ka zauna
251
+ 125
252
+ ba wn da ya damu dani
253
+ 126
254
+ gogulda ama zan sun aiwatar da ƙuntetawa iri ɗaya
255
+ 127
256
+ gaskiyane ckin alunman amurkacewa na mijine shugaban gida
257
+ 128
258
+ mai wannan gidan gia baya taɓauseyar da gia a kan bashh
259
+ 129
260
+ ishaku bai damu da lare ba
261
+ 130
262
+ muna da abun mamaki
263
+ 131
264
+ na so yardadirshe
265
+ 132
266
+ makon jeyawani yara ba godiyar sa ga aikinm
267
+ 133
268
+ kirani bayan ku yi maganarsu
269
+ 134
270
+ kamar jauro na jin tsoron wani azu
271
+ 135
272
+ ina bukatar wanda zun yi maganar shi
273
+ 136
274
+ ibrahim ya fara kooruwa ckin azaba
275
+ 137
276
+ muna da gig dobeye da rijir a gidan rawa
277
+ 138
278
+ shi gbanrawa shi ne samu bubi
279
+ 139
280
+ jauroya ce na sun wancin do huka sa bashi
281
+ 140
282
+ ka can gargidan yarine za tu i ba da karye ba
283
+ 141
284
+ ina son jif karsawancin kasashin waje a nin gaba
285
+ 142
286
+ bun guda hakan jiya
287
+ 143
288
+ ban san yisuf yina baci ba
289
+ 144
290
+ ya kudai diskirfi go cnoss
291
+ 145
292
+ yaushe kyasayawa ganka babur
293
+ 146
294
+ bitrusa yana yawan tufiyan
295
+ 147
296
+ abdullahii be biyani kamar yaddae alƙawari ba
297
+ 148
298
+ ni wayayye ne
log_mozilla-foundation_common_voice_7_0_ha_test_targets.txt ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0
2
+ katin haɗe yake da kyautar
3
+ 1
4
+ ya zama ɗabi'ar matasa sanya takalmin fatarauta
5
+ 2
6
+ za ta iya jin ciwo idan ta yi haka
7
+ 3
8
+ shin ba wani abu da za su yi ranar juma'a
9
+ 4
10
+ ganyayyaki da suka bushe suna iyo a saman ruwa
11
+ 5
12
+ ba wanda ya san yusuf tsohon maƙaryaci ne
13
+ 6
14
+ na ji daɗin haɗuwa da kai mustapha
15
+ 7
16
+ ya kunna wutan tun kafin ya shiga cikin kogon
17
+ 8
18
+ menene sunanka
19
+ 9
20
+ da yau zan kira ta
21
+ 10
22
+ zuwa gobe da safe yarinyar ta kammala
23
+ 11
24
+ ina ma ban aure shi ba
25
+ 12
26
+ habibu ya nuna bai son kasancewa anan
27
+ 13
28
+ abin farin akwai tsarin rubutu na musamman ga makafi
29
+ 14
30
+ ban taba kiran shi da wawa ba
31
+ 15
32
+ ban san yadda ake kamun kifi ba
33
+ 16
34
+ an kama matashin ne da hannu cikin wani rikici
35
+ 17
36
+ ina zama a wani ƙaramin ƙauye kilo mita hamsin tsakaninsu da birni
37
+ 18
38
+ jalal ya rubuta labari mai tashin hankali da ke da ƙarshe mai farin ciki
39
+ 19
40
+ zan buƙaci kuɗi fiye da haka
41
+ 20
42
+ ina aiki cikin abokanai kuma ina rayuwa cikin littafai
43
+ 21
44
+ muna hutawa tsirara a cikin yashi mai dumi
45
+ 22
46
+ ba na tsammanin jami na da tabbacin abinda laditu ke son yi
47
+ 23
48
+ an haramta wa ɗalibai shan taba a farfajiyar makarantar
49
+ 24
50
+ an fada a jaridun cewar laifi ne na son zuciya
51
+ 25
52
+ na san ban yi zaɓin da ya dace ba
53
+ 26
54
+ wajen ya yi kacakaca
55
+ 27
56
+ wannan uzirin bai kai na barin aiki ba
57
+ 28
58
+ sun dage sai sun yi tafiya zuwa ƙarshen ƙasar vietnam cikin kwanaki shida kacal
59
+ 29
60
+ kamar bai faru ba
61
+ 30
62
+ mercedes na ya fi naka girma
63
+ 31
64
+ ya yi ƙoƙarin nuna shi jarumi ne bayan an yi garkuwa da shi
65
+ 32
66
+ tun lokacin da asabe ta mutu mijinta ya shiga cikin zurfin tunani
67
+ 33
68
+ kungiyar haɗaka da kamfanin sun amince da sabon kwangila
69
+ 34
70
+ ka kula da wannan labarin
71
+ 35
72
+ ko akwai ciwo sosai a kirjin ku na bangaren hagu
73
+ 36
74
+ abin na da ban tsoro
75
+ 37
76
+ sai dai shagon bai da wani girma sosai
77
+ 38
78
+ ki tabbatar ba ki cutar da kanki ba
79
+ 39
80
+ za mu iya ba tare da taimakon ibrahims ba
81
+ 40
82
+ aliyu ba ta yi tantamar samun mutane da yawa ba haka
83
+ 41
84
+ idan kin yi sau ɗaya to kar ki ƙara yi
85
+ 42
86
+ hadizaam bakuwa ce a garin nan ko ba haka ba
87
+ 43
88
+ jarumin ya mutu akan ki masoyiyata sarauniya ta
89
+ 44
90
+ na so wuraren zama tsawon yadi shida nesa da kotun
91
+ 45
92
+ nauyi na sanya abu zuwa tsakiyar duniya
93
+ 46
94
+ kuma kun ce wannan matsin lamba ne a kirjin ku
95
+ 47
96
+ lola ta yi rawa tare da grace
97
+ 48
98
+ amincewa da yanayi shi ne sirrin haƙurinta
99
+ 49
100
+ ina tunanin ishaku da jummai na zargi
101
+ 50
102
+ bayan kwashe shekaru goma suna kasuwancin hadin gwiwa sun yanke hukuncin rabawa
103
+ 51
104
+ kamfaninmu yana tallacetallace na shekarashekara na yen dubu miliyan
105
+ 52
106
+ abokina na son ya amsa laifi
107
+ 53
108
+ ka san banbanci tsakanin silba da gwangwani
109
+ 54
110
+ rana tana bayar da zafi da haske da yawa
111
+ 55
112
+ yusuf ya fadawa lare yadda suka hadu da hassan
113
+ 56
114
+ wasu lokutan gudun tsira da rai ya kan bar mutum ba komai
115
+ 57
116
+ aliyu ya daɗe yana yin wannan aiki
117
+ 58
118
+ akwai mamaki a ce bai san labarin ba
119
+ 59
120
+ linda ta gano ishaku ne mutumin da ya yi mata fyaɗe
121
+ 60
122
+ yau ina fama da ciwon kirji
123
+ 61
124
+ tana karatun wasan kwaikwayo game da tallafin karatu na musamman
125
+ 62
126
+ ana buƙatar ɗalibai su yi aikin sa'a ɗaya ga alʻumma a kowane sati
127
+ 63
128
+ ya ji ruwan saman ya a fuskarsa
129
+ 64
130
+ zazzabin ya fara kwanaki biyu da suka wuce
131
+ 65
132
+ mutu ƙazami
133
+ 66
134
+ ko dai ya sha ƙwya ko kuma ya haukace
135
+ 67
136
+ habibu asabe hassan da aishaam duk suna magana da faransanci
137
+ 68
138
+ bana jin jauro zai je bakin ruwa yau
139
+ 69
140
+ otolalata jerin makamai
141
+ 70
142
+ ba kwa buƙatar shirya wani muhimmini jawabi
143
+ 71
144
+ abdullahi na daya daga cikin mamallakan wannan ginin
145
+ 72
146
+ haƙiƙa bincikenka zai haifar da ɗa mai ido
147
+ 73
148
+ karen da yaga ba zai iya iso wajena ba ya fara haushi
149
+ 74
150
+ eh ina fama da ciwo cikin kirji na sosai
151
+ 75
152
+ yanzu dama ce mai kyau ta hukunta ishaku game da abinda ya aikata
153
+ 76
154
+ ki bari mustapha ya sai miki wannan
155
+ 77
156
+ bana tsammanin jauro da lami na da wahalar sha'ani
157
+ 78
158
+ bayan sati ɗaya abdullahi ya gundiri mutane
159
+ 79
160
+ zan tambaye shi gobe
161
+ 80
162
+ da fatan allah ta'ala ya faranta maka rai
163
+ 81
164
+ lokacin da ya kai mata hari tana neman makullan cikin jakarta
165
+ 82
166
+ shin kun ji labarin mu
167
+ 83
168
+ gaskiya bani da wata alama
169
+ 84
170
+ na yi tunani zan same ku a can
171
+ 85
172
+ hayaƙin yana shaƙe ta
173
+ 86
174
+ ka ɗan sha kofi ina tunanin yana da daɗi
175
+ 87
176
+ ina bukatan ku bayyana min wani abu
177
+ 88
178
+ yin haka zai fi dadi ko me kuka gani
179
+ 89
180
+ ni ma na ji
181
+ 90
182
+ aliyu na tsoron yin magana dani ko ba haka
183
+ 91
184
+ kamar bitrus ya gaji sosai
185
+ 92
186
+ zan so idan hakan bai faru ba
187
+ 93
188
+ na damu kwarai da wannan zafin kirji
189
+ 94
190
+ kina sanye da kaya
191
+ 95
192
+ an gudanar da fatin bankwana jiya sabida mr jones
193
+ 96
194
+ ya batun askirim me cakuletin bayan cin abinci
195
+ 97
196
+ ina tsoron kada ku cutar da aliyu
197
+ 98
198
+ tsuntsaye na cin ƙananan ƙwari
199
+ 99
200
+ ina kallon tsuntsayen lokacin da na ke zaune a balcony ina shan kofi
201
+ 100
202
+ babangida ya isa landan tare da wasu gungun masana
203
+ 101
204
+ ibrahim ya ce ya yi tsammanin hauwatu ta yi mamaki
205
+ 102
206
+ an ce na siyo kek a kan hanyar dawowata daga ofis
207
+ 103
208
+ ya mugun tsorata da ya ga ƙaton macijin
209
+ 104
210
+ abdullahi ya san cewa zai iya yin hakan da kyau
211
+ 105
212
+ na gaji kuma ina jin yinwa kuma haka kowa yake ji
213
+ 106
214
+ zan komo kansa
215
+ 107
216
+ amurka za ta fice daga yarjejeniyar paris
217
+ 108
218
+ ina tunanin ya kamata na fadawa aliko wajen da naje
219
+ 109
220
+ habibu ya iya waƙa
221
+ 110
222
+ na yi dariya sosai har da ciwon ciki
223
+ 111
224
+ na san har yanzu yana can
225
+ 112
226
+ yana min zafi a kirji
227
+ 113
228
+ babu rigakafi amma hukumomi dabamdabam suna kokarin samo rigakafin
229
+ 114
230
+ babangida ya kware a wasan tennis
231
+ 115
232
+ ibrahim ya ce baya tunanin ko akwai wanda zai iya haka
233
+ 116
234
+ kuma kuna da zazzaɓi yanzu
235
+ 117
236
+ ka gaida min da matarka
237
+ 118
238
+ bitrus ya fara farin ciki bayan ya harba ƙwallon a ragar sa
239
+ 119
240
+ akwai buƙatar ƙara bincike
241
+ 120
242
+ ban san hassan da maimuna ba su da lafiya ba
243
+ 121
244
+ za a ƙara buƙatar gwaji nan gaba
245
+ 122
246
+ na gaji kasuwancin daga mahaifina
247
+ 123
248
+ yana zafi a tsakiyar kirji na
249
+ 124
250
+ kana son ka zauna
251
+ 125
252
+ ba wanda ya damu da ni
253
+ 126
254
+ google da amazon sun aiwatar da ƙuntatawa iri daya
255
+ 127
256
+ gaskiya ne a cikin al'umman amurka cewa namiji ne shugaban gida
257
+ 128
258
+ mai wannan gidan giya baya taba sayar da giya akan bashi
259
+ 129
260
+ ishaku bai damu da lare ba
261
+ 130
262
+ muna da abin mamaki
263
+ 131
264
+ na so yarda da shi
265
+ 132
266
+ makon jiya wani ya raba godiyar sa ga aikin mu
267
+ 133
268
+ kira ni bayan kun yi magana da su
269
+ 134
270
+ kamar jauro na jin tsoron wani abu
271
+ 135
272
+ ina bukatar wanda zan yi magana da shi
273
+ 136
274
+ ibrahimu ya fara kururuwa cikin azaba
275
+ 137
276
+ muna da gig gobe da daddare a gidan rawa
277
+ 138
278
+ cigaban rayuwa shi ne samun buɗi
279
+ 139
280
+ jauro ya ce yana son wancan don haka na bashi
281
+ 140
282
+ katangar gidan yarin ba za ta iya bada kariya
283
+ 141
284
+ ina so in shiga kasuwancin kasashen waje a nan gaba
285
+ 142
286
+ mun gwada hakan jiya
287
+ 143
288
+ ban san yusuf yana bacci ba
289
+ 144
290
+ ya fi dai da suka rufe bakunan su
291
+ 145
292
+ yaushe ka sayawa kanka babur
293
+ 146
294
+ bitrusa yana yawan tafiya
295
+ 147
296
+ abdullahi bai biya ni kamar yadda ya yi alkawari ba
297
+ 148
298
+ ni wayyaye ne
mozilla-foundation_common_voice_7_0_ha_test_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.5106928999144568
2
+ CER: 0.13631704817218995