Spaces:

anzorq
/

w2v-bert-2.0-kbd

Paused

App Files Files Community

w2v-bert-2.0-kbd / app.py

anzorq

Update app.py

29c16a4 verified 6 months ago

raw

history blame

2.49 kB

	import spaces
	import os
	import gradio as gr
	import torch
	import torchaudio
	from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
	from pytube import YouTube
	from transformers import pipeline

	pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0)

	@spaces.GPU
	def transcribe_speech(audio):
	if audio is None: # Handle the NoneType error for microphone input
	return "No audio received."

	return pipe(audio, chunk_length_s=10)['text']#, return_timestamps='word')

	def transcribe_from_youtube(url):
	# Download audio from YouTube using pytube
	yt = YouTube(url)
	audio_path = yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")

	# Transcribe the downloaded audio
	transcription = transcribe_speech(audio_path)

	# Clean up the downloaded file
	os.remove(audio_path)

	return transcription

	def populate_metadata(url):
	yt = YouTube(url)
	return yt.thumbnail_url, yt.title

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.HTML(
	"""
	<div style="text-align: center; max-width: 500px; margin: 0 auto;">
	<div>
	<h1>Youtube Speech Transcription</h1>
	</div>
	<p style="margin-bottom: 10px; font-size: 94%">
	Speech to text transcription of Youtube videos using Wav2Vec2-BERT
	</p>
	</div>
	"""
	)

	with gr.Tab("Microphone Input"):
	gr.Markdown("## Transcribe speech from microphone")
	mic_audio = gr.Audio(sources="microphone", type="filepath", label="Speak into your microphone")
	transcribe_button = gr.Button("Transcribe")
	transcription_output = gr.Textbox(label="Transcription")

	transcribe_button.click(fn=transcribe_speech, inputs=mic_audio, outputs=transcription_output)

	with gr.Tab("YouTube URL"):
	gr.Markdown("## Transcribe speech from YouTube video")
	youtube_url = gr.Textbox(label="Enter YouTube video URL")
	title = gr.Label(label="Video Title")
	img = gr.Image(label="Thumbnail", height=120, width=120)
	transcribe_button = gr.Button("Transcribe")
	transcription_output = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)

	transcribe_button.click(fn=transcribe_from_youtube, inputs=youtube_url, outputs=transcription_output)
	youtube_url.change(populate_metadata, inputs=[youtube_url], outputs=[img, title])

	demo.launch()