from transformers import pipeline import gradio as gr import time p = pipeline( task="automatic-speech-recognition", model="arthoho66/model_005_2000", token="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", # device="cuda:0", ) text = "" def recorded_process(recorded_audio_file) -> str: """ to get both input and use speech2text for get text """ text = p(recorded_audio_file)["text"] return text def streaming_process(streaming_audio_file) -> str: global text text = p(streaming_audio_file)["text"] return text def output_streaming(text_streaming,text01)-> str: text_streaming+=text01 return text_streaming def clear_inputs_and_outputs() -> list: """ Clears all inputs and outputs when the user clicks "Clear" button """ audio_chunk.remove_chunk() return [None, None, None, None] text_streaming = "" with gr.Blocks() as demo: with gr.Tab("Record File"): with gr.Row(): with gr.Column(): mic_input = gr.Microphone( type="filepath",label="Record voice") with gr.Row(): clr_btn = gr.Button(value="Clear", variant="secondary") sub_btn = gr.Button(value="submit") with gr.Column(): lbl_output = gr.Textbox(label="Result") clr_btn.click( fn=clear_inputs_and_outputs, inputs=[], outputs=[mic_input, lbl_output] ) sub_btn.click( fn=recorded_process, inputs=[mic_input], outputs=[lbl_output] ) with gr.Tab("streaming"): gr.Interface( fn=streaming_process, inputs=[ gr.Microphone(type="filepath", streaming=True)], outputs=[ # # gr.HighlightedText(label="Result"), gr.Textbox(type ="text", label="Result",)], live=True, allow_flagging="never" ) with gr.Row(): with gr.Column(): print(text) text_streaming = output_streaming(text_streaming,text) gr.Textbox(value=text, label="Result", autofocus=True) demo.launch()