Spaces:
Runtime error
Runtime error
File size: 6,878 Bytes
59619e5 d754ac0 59619e5 8223a0d d754ac0 59619e5 d754ac0 59619e5 d754ac0 59619e5 7ae98ba 59619e5 d754ac0 59619e5 d754ac0 59619e5 d754ac0 59619e5 d754ac0 59619e5 ed26513 d754ac0 ed26513 59619e5 d754ac0 59619e5 d754ac0 59619e5 d754ac0 59619e5 d754ac0 ed26513 59619e5 d754ac0 9293b9d 0f68bbb 59619e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import gradio as gr
import pandas as pd
from css_html_js import custom_css
TITLE = """<h1 align="center" id="space-title">π²πΎ Malaysian Speech-to-Text Leaderboard</h1>"""
INTRODUCTION_TEXT = """
π The π²πΎ Malaysian Speech-to-Text Leaderboard aims to track, rank and evaluate Malaysian Speech-to-Text models. All notebooks at https://github.com/mesolitica/malaysian-stt-benchmarks
## Dataset
π We evaluate models based on 3 datasets,
1. Malaya-Speech test set, Malay language, https://huggingface.co/datasets/mesolitica/speech-test-set/tree/main/malaya-speech
2. Fleurs MS-MY test set, Malay language, https://huggingface.co/datasets/mesolitica/speech-test-set/tree/main/fleurs-ms-my
3. IMDA TTS first 700 audio files, English language but with Manglish slang, https://huggingface.co/datasets/mesolitica/IMDA-TTS
## Heavy postprocess test set
1. We filtered test set that contain numbers because malaya-speech transducer trained on normalized numbers.
2. We lower case because malaya-speech transducer trained on lower case.
3. We removed punctuation because malaya-speech transducer trained without punctuation.
"""
open_source = [
{
'model': 'goodtape.io',
'model size FP16 (MB)': None,
'Malaya-Speech test CER': 0.09504487340205486,
'Malaya-Speech test WER': 0.1691902868373457,
'Fleurs MY-MS CER': 0.03643102801583697,
'Fleurs MY-MS WER': 0.08672758155453257,
},
{
'model': 'openai/whisper-large-v3',
'model size FP16 (MB)': 3090,
'Malaya-Speech test CER': 0.0349251317825172,
'Malaya-Speech test WER': 0.1032828282828283,
'Fleurs MY-MS CER': 0.026055551396846878,
'Fleurs MY-MS WER': 0.07652049926522007,
'IMDA TTS CER': 0.016648493852990828,
'IMDA TTS WER': 0.0386282289139432,
},
{
'model': 'openai/whisper-medium',
'model size FP16 (MB)': 1530,
'Malaya-Speech test CER': 0.05064920144820262,
'Malaya-Speech test WER': 0.17534205321090568,
'Fleurs MY-MS CER': 0.04366882208520179,
'Fleurs MY-MS WER': 0.13546055192128273,
'IMDA TTS CER': 0.02065587879424904,
'IMDA TTS WER': 0.047277690563404855,
},
{
'model': 'openai/whisper-small',
'model size FP16 (MB)': 483.5,
'Malaya-Speech test CER': 0.07485209857268262,
'Malaya-Speech test WER': 0.25748516055893106,
'Fleurs MY-MS CER': 0.06781078047622793,
'Fleurs MY-MS WER': 0.21953142859857497,
'IMDA TTS CER': 0.024812471688517194,
'IMDA TTS WER': 0.058901277294134434,
},
{
'model': 'openai/whisper-base',
'model size FP16 (MB)': 145,
'Malaya-Speech test CER': 0.3574879236610538,
'Malaya-Speech test WER': 0.8303456599563157,
'Fleurs MY-MS CER': 0.1319124653794061,
'Fleurs MY-MS WER': 0.40499286081235003,
'IMDA TTS CER': 0.03914533450681607,
'IMDA TTS WER': 0.08951682444539587,
},
{
'model': 'openai/whisper-tiny',
'model size FP16 (MB)': 75.5,
'Malaya-Speech test CER': 0.26941094281472105,
'Malaya-Speech test WER': 0.7414099751189915,
'Fleurs MY-MS CER': 0.38749733168917505,
'Fleurs MY-MS WER': 0.812253445128297,
'IMDA TTS CER': 0.048805770734828904,
'IMDA TTS WER': 0.11150629529200957,
},
{
'model': 'mesolitica/malaysian-whisper-medium',
'model size FP16 (MB)': 1530,
'Malaya-Speech test CER': 0.05622483776367814,
'Malaya-Speech test WER': 0.14406629724252673,
'Fleurs MY-MS CER': 0.025543266604368554,
'Fleurs MY-MS WER': 0.07940219915492629,
'IMDA TTS CER': 0.01971214262944062,
'IMDA TTS WER': 0.047223078508792794,
},
{
'model': 'mesolitica/malaysian-whisper-small',
'model size FP16 (MB)': 483.5,
'Malaya-Speech test CER': 0.049162419174983304,
'Malaya-Speech test WER': 0.15926901346983313,
'Fleurs MY-MS CER': 0.035517572531147,
'Fleurs MY-MS WER': 0.10938718963023729,
'IMDA TTS CER': 0.024228721439634855,
'IMDA TTS WER': 0.05546294182008469,
},
{
'model': 'mesolitica/malaysian-whisper-base',
'model size FP16 (MB)': 145,
'Malaya-Speech test CER': 0.07242006488452603,
'Malaya-Speech test WER': 0.22081683495617924,
'Fleurs MY-MS CER': 0.06639564802362424,
'Fleurs MY-MS WER': 0.19675812232021192,
'IMDA TTS CER': 0.03982418421412676,
'IMDA TTS WER': 0.08917690642690643,
},
{
'model': 'mesolitica/malaysian-whisper-tiny',
'model size FP16 (MB)': 75.5,
'Malaya-Speech test CER': 0.09423990117534763,
'Malaya-Speech test WER': 0.295029492365558,
'Fleurs MY-MS CER': 0.13390519685940314,
'Fleurs MY-MS WER': 0.3461808122686204,
'IMDA TTS CER': 0.07957313474501154,
'IMDA TTS WER': 0.1421708648494363,
},
{
'model': 'mesolitica/conformer-large-malay-whisper',
'model size FP16 (MB)': 206.5,
'Malaya-Speech test CER': 0.025933167255719317,
'Malaya-Speech test WER': 0.0912131356803488,
'Fleurs MY-MS CER': 0.02548791948171514,
'Fleurs MY-MS WER': 0.08376713097429746,
},
{
'model': 'mesolitica/conformer-medium-malay-whisper',
'model size FP16 (MB)': 121.5,
'Malaya-Speech test CER': 0.024955598713609053,
'Malaya-Speech test WER': 0.09315638444736804,
'Fleurs MY-MS CER': 0.029205645523910067,
'Fleurs MY-MS WER': 0.09253131557833799,
},
{
'model': 'mesolitica/conformer-medium-mixed',
'model size FP16 (MB)': 121.5,
'Malaya-Speech test CER': 0.034618711056551774,
'Malaya-Speech test WER': 0.11179440626161938,
'Fleurs MY-MS CER': 0.032894184549728075,
'Fleurs MY-MS WER': 0.1026977414887425,
},
{
'model': 'mesolitica/conformer-tiny-ctc + mesolitica/kenlm-pseudolabel-whisper-large-v3',
'model size FP16 (MB)': 7.9,
'Malaya-Speech test CER': 0.0612581761581601,
'Malaya-Speech test WER': 0.21302693966628394,
'Fleurs MY-MS CER': 0.07573301800412188,
'Fleurs MY-MS WER': 0.2527434609577528,
},
{
'model': 'mesolitica/conformer-12M-ctc + mesolitica/kenlm-pseudolabel-whisper-large-v3',
'model size FP16 (MB)': 24.2,
'Malaya-Speech test CER': 0.06941749946814912,
'Malaya-Speech test WER': 0.22261096523391607,
'Fleurs MY-MS CER': 0.07657934690019219,
'Fleurs MY-MS WER': 0.263075623142674,
},
]
data = pd.DataFrame(open_source)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
gr.DataFrame(data)
demo.launch() |