Spaces:
Runtime error
Runtime error
File size: 3,631 Bytes
ee0d33f 1ee9ade 038645c ee0d33f 1ee9ade ee0d33f 1ee9ade ee0d33f 038645c 1ee9ade ee0d33f 1ee9ade ee0d33f 1ee9ade 038645c 1ee9ade 038645c 1ee9ade 344c4fa 038645c 1ee9ade 344c4fa 1ee9ade 038645c 1ee9ade 038645c 1ee9ade 038645c 1ee9ade |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import whisper
from pytube import YouTube
import yake
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
class GradioInference():
def __init__(self):
self.sizes = list(whisper._MODELS.keys())
self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
self.current_size = "base"
self.loaded_model = whisper.load_model(self.current_size)
self.yt = None
# Initialize Facebook/BART-Large-CNN summarizer
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
self.keyword_model = T5ForConditionalGeneration.from_pretrained("Voicelab/vlt5-base-keywords")
self.tokenizer = T5Tokenizer.from_pretrained("Voicelab/vlt5-base-keywords")
def __call__(self, link, lang, size):
if self.yt is None:
self.yt = YouTube(link)
path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
if lang == "none":
lang = None
if size != self.current_size:
self.loaded_model = whisper.load_model(size)
self.current_size = size
results = self.loaded_model.transcribe(path, language=lang)
# Perform summarization on the transcription
transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
task_prefix = "Keywords: "
input_sequence = task_prefix + transcription
input_ids = tokenizer(
input_sequence, return_tensors="pt", truncation=False,
).input_ids
output = keyword_model.generate(input_ids, no_repeat_ngram_size=3, num_beams=4)
predicted = tokenizer.decode(output[0], skip_special_tokens=True)
keywords = [x.strip() for x in predicted.split(',') if x.strip()]
return results["text"], transcription_summary[0]["summary_text"], keywords
def populate_metadata(self, link):
self.yt = YouTube(link)
return self.yt.thumbnail_url, self.yt.title
gio = GradioInference()
title = "Youtube Insights"
description = "Your AI-powered Video Analytics"
block = gr.Blocks()
with block:
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
<div>
<h1>Youtube Insights</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Your AI-powered Video Analytics
</p>
</div>
"""
)
with gr.Group():
with gr.Box():
with gr.Row().style(equal_height=True):
sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
link = gr.Textbox(label="YouTube URL")
title = gr.Label(label="Video Title")
with gr.Row().style(equal_height=True):
img = gr.Image(label="Thumbnail")
text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
with gr.Row().style(equal_height=True):
summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
with gr.Row().style(equal_height=True):
btn = gr.Button("Get Video Insights")
btn.click(gio, inputs=[link, lang, sz], outputs=[text, summary, keywords])
link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
block.launch()
|