Spaces:
Runtime error
Runtime error
File size: 4,877 Bytes
d6a25cd 023302c 7fe7ac2 023302c d6a25cd c731f97 d6a25cd 023302c d6a25cd 023302c 7fe7ac2 023302c 05f807a 023302c d7d6d65 023302c d7d6d65 c731f97 023302c d7d6d65 023302c d7d6d65 023302c d7d6d65 023302c 7fe7ac2 d7d6d65 7fe7ac2 d7d6d65 023302c 7fe7ac2 023302c 7fe7ac2 023302c d6a25cd 023302c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
from pytube import YouTube
from transformers import pipeline
import os
from moviepy.editor import VideoFileClip
pipe = pipeline(model="GIanlucaRub/whisper-small-it-3",task="automatic-speech-recognition")
def transcribe_yt(link):
yt = YouTube(link)
audio = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
text = pipe(audio)["text"]
os.remove(audio)
return text
def transcribe_audio(audio):
text = pipe(audio)["text"]
return text
def populate_metadata(link):
yt = YouTube(link)
return yt.thumbnail_url, yt.title
def transcribe_video(video):
clip = VideoFileClip(video)
audio = video[:-4] + ".mp3"
clip.audio.write_audiofile(audio)
clip.close()
os.remove(video)
text = transcribe_audio(audio)
os.remove(audio)
return text
block = gr.Blocks()
with block:
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
<div>
<h1 style="font-size: 400%;line-height: 1.2;">Whisper Italian Automatic Speech Recognition</h1>
</div>
<p style="margin-bottom: 10px; font-size: 150%;margin-top: 30px;line-height: 1.2;">
Realtime demo for Italian speech recognition using a fine-tuned Whisper Small model.You can use the model in 4 different ways.
</p>
</div>
"""
)
with gr.Group():
with gr.Box():
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
<p style="margin-bottom: 10px; font-size: 100%;margin-top: 10px;line-height: 1.2;">
Here you can see the transcription.
</p>
</div>
""")
text = gr.Textbox(
label="Transcription",
placeholder="Transcription Output",
lines=5)
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
<p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.0;">
You can record audio from your microphone.
</p>
</div>
""")
microphone=gr.Audio(source="microphone", type="filepath")
with gr.Row().style(mobile_collapse=False, equal_height=True):
btn_microphone = gr.Button("Transcribe microphone audio")
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
<p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.2;">
You can upload an audio file.
</p>
</div>
""")
audio_uploaded=gr.Audio(source="upload", type="filepath")
with gr.Row().style(mobile_collapse=False, equal_height=True):
btn_audio_uploaded = gr.Button("Transcribe audio uploaded")
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
<p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.2;">
You can upload a video file
</p>
</div>
""")
video_uploaded = gr.Video(source = "upload")
with gr.Row().style(mobile_collapse=False, equal_height=True):
btn_video_uploaded = gr.Button("Transcribe video uploaded")
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
<p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.2;">
You can put a youtube video link
</p>
</div>
""")
link = gr.Textbox(label="YouTube Link")
with gr.Row().style(mobile_collapse=False, equal_height=True):
btn_youtube = gr.Button("Transcribe Youtube video")
with gr.Row().style(mobile_collapse=False, equal_height=True):
title = gr.Label(label="Video Title", placeholder="Title")
img = gr.Image(label="Thumbnail")
# Events
btn_youtube.click(transcribe_yt, inputs=[link], outputs=[text])
btn_microphone.click(transcribe_audio, inputs=[microphone], outputs=[text])
btn_audio_uploaded.click(transcribe_audio, inputs=[audio_uploaded], outputs=[text])
btn_video_uploaded.click(transcribe_video, inputs=[video_uploaded], outputs=[text])
link.change(populate_metadata, inputs=[link], outputs=[img, title])
block.launch(debug=True) |