Spaces:

GIanlucaRub
/

whisper-it

Runtime error

File size: 4,877 Bytes

import gradio as gr
from pytube import YouTube
from transformers import pipeline
import os
from moviepy.editor import VideoFileClip


pipe = pipeline(model="GIanlucaRub/whisper-small-it-3",task="automatic-speech-recognition")

def transcribe_yt(link):
  yt = YouTube(link)
  audio = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
  text = pipe(audio)["text"]
  os.remove(audio)
  return text

def transcribe_audio(audio):
    text = pipe(audio)["text"]
    return text

def populate_metadata(link):
  yt = YouTube(link)
  return yt.thumbnail_url, yt.title

def transcribe_video(video):
    clip = VideoFileClip(video)
    audio = video[:-4] + ".mp3"
    clip.audio.write_audiofile(audio)
    clip.close()
    os.remove(video)
    text = transcribe_audio(audio)
    os.remove(audio)
    
    return text

block = gr.Blocks()

with block:
    gr.HTML(
        """

            <div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
              <div>
                <h1 style="font-size: 400%;line-height: 1.2;">Whisper Italian Automatic Speech Recognition</h1>
              </div>
              <p style="margin-bottom: 10px; font-size: 150%;margin-top: 30px;line-height: 1.2;">
                Realtime demo for Italian speech recognition using a fine-tuned Whisper Small model.You can use the model in 4 different ways.
              </p>
            </div>
        """
    )
    with gr.Group():
        with gr.Box():
          gr.HTML(
        """

            <div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
              <p style="margin-bottom: 10px; font-size: 100%;margin-top: 10px;line-height: 1.2;">
                  Here you can see the transcription.
              </p>
            </div>
        """)
          text = gr.Textbox(
              label="Transcription", 
              placeholder="Transcription Output",
              lines=5)
          gr.HTML(
        """

            <div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
              <p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.0;">
                  You can record audio from your microphone.
              </p>
            </div>
        """)  
          microphone=gr.Audio(source="microphone", type="filepath")
          with gr.Row().style(mobile_collapse=False, equal_height=True): 
              btn_microphone = gr.Button("Transcribe microphone audio")
          
        
          gr.HTML(
        """

            <div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
              <p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.2;">
                  You can upload an audio file.
              </p>
            </div>
        """)  
          audio_uploaded=gr.Audio(source="upload", type="filepath")
          with gr.Row().style(mobile_collapse=False, equal_height=True): 
              btn_audio_uploaded = gr.Button("Transcribe audio uploaded")
          
        
        
          gr.HTML(
        """

            <div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
              <p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.2;">
                  You can upload a video file
              </p>
            </div>
        """) 
          video_uploaded = gr.Video(source = "upload") 
          with gr.Row().style(mobile_collapse=False, equal_height=True): 
              btn_video_uploaded = gr.Button("Transcribe video uploaded")
            
          
        
          gr.HTML(
        """

            <div style="text-align: center; max-width: 500px; margin: 0 auto;margin-top: 10px">
              <p style="margin-bottom: 10px; font-size: 100%;margin-top: 20px;line-height: 1.2;">
                  You can put a youtube video link
              </p>
            </div>
        """) 
          link = gr.Textbox(label="YouTube Link")
          with gr.Row().style(mobile_collapse=False, equal_height=True): 
              btn_youtube = gr.Button("Transcribe Youtube video") 
    
          with gr.Row().style(mobile_collapse=False, equal_height=True):
            title = gr.Label(label="Video Title", placeholder="Title")
            img = gr.Image(label="Thumbnail")
          
                
          
          # Events
          btn_youtube.click(transcribe_yt, inputs=[link], outputs=[text])
          btn_microphone.click(transcribe_audio, inputs=[microphone], outputs=[text])
          btn_audio_uploaded.click(transcribe_audio, inputs=[audio_uploaded], outputs=[text])
          btn_video_uploaded.click(transcribe_video, inputs=[video_uploaded], outputs=[text])
          link.change(populate_metadata, inputs=[link], outputs=[img, title])

block.launch(debug=True)