Spaces:
Runtime error
Runtime error
import torch | |
import gradio as gr | |
import pytube as pt | |
from transformers import pipeline | |
from huggingface_hub import model_info | |
MODEL_NAME = "openai/whisper-small" #this always needs to stay in line 8 :D sorry for the hackiness | |
lang = "en" | |
device = 0 if torch.cuda.is_available() else "cpu" | |
pipe = pipeline( | |
task="automatic-speech-recognition", | |
model=MODEL_NAME, | |
chunk_length_s=30, | |
device=device, | |
) | |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe") | |
def transcribe(microphone, file_upload): | |
warn_output = "" | |
if (microphone is not None) and (file_upload is not None): | |
warn_output = ( | |
"WARNING: You've uploaded an audio file and used the microphone. " | |
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n" | |
) | |
elif (microphone is None) and (file_upload is None): | |
return "ERROR: You have to either use the microphone or upload an audio file" | |
file = microphone if microphone is not None else file_upload | |
text = pipe(file)["text"] | |
return warn_output + text | |
demo = gr.Blocks() | |
css = """ | |
footer {display:none !important} | |
.output-markdown{display:none !important} | |
button.primary { | |
z-index: 14; | |
left: 0px; | |
top: 0px; | |
cursor: pointer !important; | |
background: none rgb(17, 20, 45) !important; | |
border: none !important; | |
color: rgb(255, 255, 255) !important; | |
line-height: 1 !important; | |
border-radius: 12px !important; | |
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; | |
box-shadow: none !important; | |
} | |
button.primary:hover{ | |
z-index: 14; | |
left: 0px; | |
top: 0px; | |
cursor: pointer !important; | |
background: none rgb(37, 56, 133) !important; | |
border: none !important; | |
color: rgb(255, 255, 255) !important; | |
line-height: 1 !important; | |
border-radius: 12px !important; | |
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; | |
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important; | |
} | |
.hover\:bg-orange-50:hover { | |
--tw-bg-opacity: 1 !important; | |
background-color: rgb(229,225,255) !important; | |
} | |
.to-orange-200 { | |
--tw-gradient-to: rgb(37 56 133 / 37%) !important; | |
} | |
.from-orange-400 { | |
--tw-gradient-from: rgb(17, 20, 45) !important; | |
--tw-gradient-to: rgb(255 150 51 / 0); | |
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important; | |
} | |
.group-hover\:from-orange-500{ | |
--tw-gradient-from:rgb(17, 20, 45) !important; | |
--tw-gradient-to: rgb(37 56 133 / 37%); | |
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important; | |
} | |
.group:hover .group-hover\:text-orange-500{ | |
--tw-text-opacity: 1 !important; | |
color:rgb(37 56 133 / var(--tw-text-opacity)) !important; | |
} | |
""" | |
examples = [ | |
['TestAudio1.mp3'], ['TestAudio2.wav'], ['TestAudio3.wav'], ['TestAudio4.wav'], ['TestAudio5.wav'], ['TestAudio6.wav'], ['TestAudio7.wav'], ['TestAudio8.wav'], ['TestAudio9.wav'], ['TestAudio10.wav'] | |
] | |
mf_transcribe = gr.Interface( | |
fn=transcribe, | |
inputs=[ | |
gr.inputs.Audio(source="microphone", type="filepath", optional=True), | |
gr.inputs.Audio(source="upload", type="filepath", optional=True) | |
], | |
outputs="text", | |
layout="horizontal", | |
theme="huggingface", | |
allow_flagging="never", | |
examples = examples, | |
css = css | |
).launch(enable_queue=True) | |
#used openai/whisper model |