|
from transformers import pipeline |
|
import gradio as gr |
|
import time |
|
p = pipeline( |
|
task="automatic-speech-recognition", |
|
model="arthoho66/model_005_2000", |
|
token="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", |
|
|
|
) |
|
text = "" |
|
def recorded_process(recorded_audio_file) -> str: |
|
""" |
|
to get both input |
|
and use speech2text for get text |
|
""" |
|
text = p(recorded_audio_file)["text"] |
|
return text |
|
|
|
|
|
def streaming_process(streaming_audio_file) -> str: |
|
global text |
|
text = p(streaming_audio_file)["text"] |
|
return text |
|
|
|
def output_streaming(text_streaming,text01)-> str: |
|
text_streaming+=text01 |
|
return text_streaming |
|
|
|
def clear_inputs_and_outputs() -> list: |
|
""" |
|
Clears all inputs and outputs when the user clicks "Clear" button |
|
""" |
|
audio_chunk.remove_chunk() |
|
return [None, None, None, None] |
|
|
|
|
|
text_streaming = "" |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Tab("Record File"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
mic_input = gr.Microphone( type="filepath",label="Record voice") |
|
with gr.Row(): |
|
clr_btn = gr.Button(value="Clear", variant="secondary") |
|
sub_btn = gr.Button(value="submit") |
|
with gr.Column(): |
|
lbl_output = gr.Textbox(label="Result") |
|
|
|
clr_btn.click( |
|
fn=clear_inputs_and_outputs, |
|
inputs=[], |
|
outputs=[mic_input, lbl_output] |
|
) |
|
|
|
sub_btn.click( |
|
fn=recorded_process, |
|
inputs=[mic_input], |
|
outputs=[lbl_output] |
|
) |
|
|
|
with gr.Tab("streaming"): |
|
gr.Interface( |
|
fn=streaming_process, |
|
inputs=[ |
|
gr.Microphone(type="filepath", streaming=True)], |
|
outputs=[ |
|
|
|
gr.Textbox(type ="text", label="Result",)], |
|
live=True, |
|
allow_flagging="never" |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
print(text) |
|
text_streaming = output_streaming(text_streaming,text) |
|
gr.Textbox(value=text, label="Result", autofocus=True) |
|
|
|
demo.launch() |