mizoru commited on
Commit
4d5e2a1
1 Parent(s): ac7f9fb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
3
+
4
+ MODELS = {
5
+ "Tatar": {"model_id": "sammy786/wav2vec2-xlsr-tatar", "has_lm": False},
6
+ "Chuvash": {"model_id": "sammy786/wav2vec2-xlsr-chuvash", "has_lm": False},
7
+ "Bashkir": {"model_id": "AigizK/wav2vec2-large-xls-r-300m-bashkir-cv7_opt", "has_lm": True},
8
+ "Erzya": {"model_id": "DrishtiSharma/wav2vec2-large-xls-r-300m-myv-v1", "has_lm": False}
9
+ }
10
+
11
+ CACHED_MODELS_BY_ID = {}
12
+
13
+ LANGUAGES_ENG = list(MODELS.keys())
14
+ LANGUAGES_RUS = ["Татарский", "Чувашский", "Башкирский", "Эрзянский"]
15
+ RUS2ENG = {k:v for k,v in zip(LANGUAGES_RUS, LANGUAGES_ENG)}
16
+ LANG2YDX = {"Tatar": 'tt',
17
+ "Chuvash": "ba",
18
+ "Bashkir": "cv",
19
+ "Erzya": None,
20
+ "English": 'en',
21
+ 'Русский': 'ru'
22
+ }
23
+
24
+
25
+ def run(input_file, language, decoding_type, lang):
26
+ language = RUS2ENG.get(language, language)
27
+ model = MODELS.get(language, None)
28
+
29
+ model_instance = CACHED_MODELS_BY_ID.get(model["model_id"], None)
30
+ if model_instance is None:
31
+ model_instance = AutoModelForCTC.from_pretrained(model["model_id"])
32
+ CACHED_MODELS_BY_ID[model["model_id"]] = model_instance
33
+
34
+ if decoding_type == "LM":
35
+ processor = Wav2Vec2ProcessorWithLM.from_pretrained(model["model_id"])
36
+ asr = pipeline("automatic-speech-recognition", model=model_instance, tokenizer=processor.tokenizer,
37
+ feature_extractor=processor.feature_extractor, decoder=processor.decoder)
38
+ else:
39
+ processor = Wav2Vec2Processor.from_pretrained(model["model_id"])
40
+ asr = pipeline("automatic-speech-recognition", model=model_instance, tokenizer=processor.tokenizer,
41
+ feature_extractor=processor.feature_extractor, decoder=None)
42
+
43
+ transcription = asr(input_file, chunk_length_s=5, stride_length_s=1)["text"]
44
+
45
+ if LANG2YDX[language]:
46
+ url = 'https://translate.yandex.ru/?lang=' + LANG2YDX[language] + '-' + LANG2YDX[lang] + '&text=' + transcription # ru-fr&text=
47
+ if lang == "Русский":
48
+ label = 'Посмотреть перевод'
49
+ else: label = 'Check the translation'
50
+ html = f'<a href="{url}" target="_blank">{label}</a>'
51
+ else: html = None
52
+
53
+ return transcription, html
54
+
55
+
56
+ def update_decoding(language):
57
+ language = RUS2ENG.get(language, language)
58
+ if MODELS[language]['has_lm']:
59
+ return gr.Radio.update(visible=True)
60
+ else: return gr.Radio.update(visible=False, value='Greedy')
61
+
62
+
63
+ def update_interface(lang):
64
+ if lang == 'Русский':
65
+ languages = gr.Radio.update(label='Язык записи', choices=LANGUAGES_RUS)
66
+ audio = gr.Audio.update(label='Скажите что-нибудь...')
67
+ # btn = gr.Button.update(value='Расшифровать')
68
+ decoding = gr.Radio.update(label='Тип декодирования')
69
+
70
+ elif lang == 'English':
71
+ languages = gr.Radio.update(label='Language', choices=LANGUAGES_ENG)
72
+ audio = gr.Audio.update(label='Say something...')
73
+ # btn = gr.Button.update(value='Transcribe')
74
+ decoding = gr.Radio.update(label='Decoding type')
75
+
76
+ return languages, audio, decoding
77
+
78
+ with gr.Blocks() as blocks:
79
+ lang = gr.Radio(label="Выберите язык интерфейса / Interface language", choices=['Русский','English'])
80
+ languages = gr.Radio(label="Language", choices=LANGUAGES_RUS)
81
+ audio = gr.Audio(source="microphone", type="filepath", label="Скажите что-нибудь...")
82
+ decoding = gr.Radio(label="Тип декодирования", choices=["Greedy", "LM"], visible=False, type='index')
83
+ btn = gr.Button('Расшифровать / Transcribe')
84
+ output = gr.Textbox(show_label=False)
85
+
86
+ translation = gr.HTML()
87
+
88
+ languages.change(fn=update_decoding, inputs=[languages], outputs=[decoding])
89
+ lang.change(fn=update_interface, inputs=[lang], outputs=[languages, audio, decoding])
90
+ btn.click(fn=run, inputs=[audio, languages, decoding, lang], outputs=[output, translation])
91
+
92
+
93
+ blocks.launch(enable_queue=True, debug=True)