Spaces:

storresbusquets
/

demo1

Runtime error

App Files Files Community

storresbusquets commited on Sep 3, 2023

Commit

038645c

1 Parent(s): 344c4fa

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -11

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import whisper
 from pytube import YouTube
 import yake
-from transformers import pipeline
 class GradioInference():
     def __init__(self):
@@ -12,12 +12,12 @@ class GradioInference():
         self.loaded_model = whisper.load_model(self.current_size)
         self.yt = None
-        # Initialize YAKE keyword extractor
-        self.keyword_extractor = yake.KeywordExtractor(lan="en", n=3, dedupLim=0.9, dedupFunc="seqm", windowsSize=1, top=5, features=None)
         # Initialize Facebook/BART-Large-CNN summarizer
         self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
     def __call__(self, link, lang, size):
         if self.yt is None:
             self.yt = YouTube(link)
@@ -34,10 +34,17 @@ class GradioInference():
         # Perform summarization on the transcription
         transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
-        # Extract keywords from the transcription
-        keywords = self.keyword_extractor.extract_keywords(results["text"])
-        return results["text"], transcription_summary[0]["summary_text"], [kw[0] for kw in keywords]
     def populate_metadata(self, link):
         self.yt = YouTube(link)
@@ -46,7 +53,7 @@ class GradioInference():
 gio = GradioInference()
 title = "Youtube Insights"
-description = "Speech to text transcription, summary, and keyword extraction of Youtube videos using OpenAI's Whisper, Facebook/BART-Large-CNN, and YAKE"
 block = gr.Blocks()
 with block:
@@ -57,7 +64,7 @@ with block:
             <h1>Youtube Insights</h1>
           </div>
           <p style="margin-bottom: 10px; font-size: 94%">
-            Speech to text transcription, summary, and keyword extraction of Youtube videos using OpenAI's Whisper, Facebook/BART-Large-CNN, and YAKE
           </p>
         </div>
         """
@@ -67,7 +74,7 @@ with block:
             with gr.Row().style(equal_height=True):
                 sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
                 lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
-            link = gr.Textbox(label="YouTube Link")
             title = gr.Label(label="Video Title")
             with gr.Row().style(equal_height=True):
                 img = gr.Image(label="Thumbnail")
@@ -76,7 +83,7 @@ with block:
                 summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
                 keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
             with gr.Row().style(equal_height=True):
-                btn = gr.Button("Transcribe, Summarize & Extract Keywords")
             btn.click(gio, inputs=[link, lang, sz], outputs=[text, summary, keywords])
             link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])

 import whisper
 from pytube import YouTube
 import yake
+from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
 class GradioInference():
     def __init__(self):
         self.loaded_model = whisper.load_model(self.current_size)
         self.yt = None
         # Initialize Facebook/BART-Large-CNN summarizer
         self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+        self.keyword_model = T5ForConditionalGeneration.from_pretrained("Voicelab/vlt5-base-keywords")
+        self.tokenizer = T5Tokenizer.from_pretrained("Voicelab/vlt5-base-keywords")
     def __call__(self, link, lang, size):
         if self.yt is None:
             self.yt = YouTube(link)
         # Perform summarization on the transcription
         transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
+        task_prefix = "Keywords: "
+        input_sequence = task_prefix + transcription
+        input_ids = tokenizer(
+            input_sequence, return_tensors="pt", truncation=False,
+        ).input_ids
+        output = keyword_model.generate(input_ids, no_repeat_ngram_size=3, num_beams=4)
+        predicted = tokenizer.decode(output[0], skip_special_tokens=True)
+        keywords = [x.strip() for x in predicted.split(',') if x.strip()]
+        return results["text"], transcription_summary[0]["summary_text"], keywords
     def populate_metadata(self, link):
         self.yt = YouTube(link)
 gio = GradioInference()
 title = "Youtube Insights"
+description = "Your AI-powered Video Analytics"
 block = gr.Blocks()
 with block:
             <h1>Youtube Insights</h1>
           </div>
           <p style="margin-bottom: 10px; font-size: 94%">
+            Your AI-powered Video Analytics
           </p>
         </div>
         """
             with gr.Row().style(equal_height=True):
                 sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
                 lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
+            link = gr.Textbox(label="YouTube URL")
             title = gr.Label(label="Video Title")
             with gr.Row().style(equal_height=True):
                 img = gr.Image(label="Thumbnail")
                 summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
                 keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
             with gr.Row().style(equal_height=True):
+                btn = gr.Button("Get Video Insights")
             btn.click(gio, inputs=[link, lang, sz], outputs=[text, summary, keywords])
             link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])