storresbusquets commited on
Commit
038645c
·
1 Parent(s): 344c4fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import whisper
3
  from pytube import YouTube
4
  import yake
5
- from transformers import pipeline
6
 
7
  class GradioInference():
8
  def __init__(self):
@@ -12,12 +12,12 @@ class GradioInference():
12
  self.loaded_model = whisper.load_model(self.current_size)
13
  self.yt = None
14
 
15
- # Initialize YAKE keyword extractor
16
- self.keyword_extractor = yake.KeywordExtractor(lan="en", n=3, dedupLim=0.9, dedupFunc="seqm", windowsSize=1, top=5, features=None)
17
-
18
  # Initialize Facebook/BART-Large-CNN summarizer
19
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
20
 
 
 
 
21
  def __call__(self, link, lang, size):
22
  if self.yt is None:
23
  self.yt = YouTube(link)
@@ -34,10 +34,17 @@ class GradioInference():
34
  # Perform summarization on the transcription
35
  transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
36
 
37
- # Extract keywords from the transcription
38
- keywords = self.keyword_extractor.extract_keywords(results["text"])
 
 
 
 
 
 
 
39
 
40
- return results["text"], transcription_summary[0]["summary_text"], [kw[0] for kw in keywords]
41
 
42
  def populate_metadata(self, link):
43
  self.yt = YouTube(link)
@@ -46,7 +53,7 @@ class GradioInference():
46
 
47
  gio = GradioInference()
48
  title = "Youtube Insights"
49
- description = "Speech to text transcription, summary, and keyword extraction of Youtube videos using OpenAI's Whisper, Facebook/BART-Large-CNN, and YAKE"
50
 
51
  block = gr.Blocks()
52
  with block:
@@ -57,7 +64,7 @@ with block:
57
  <h1>Youtube Insights</h1>
58
  </div>
59
  <p style="margin-bottom: 10px; font-size: 94%">
60
- Speech to text transcription, summary, and keyword extraction of Youtube videos using OpenAI's Whisper, Facebook/BART-Large-CNN, and YAKE
61
  </p>
62
  </div>
63
  """
@@ -67,7 +74,7 @@ with block:
67
  with gr.Row().style(equal_height=True):
68
  sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
69
  lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
70
- link = gr.Textbox(label="YouTube Link")
71
  title = gr.Label(label="Video Title")
72
  with gr.Row().style(equal_height=True):
73
  img = gr.Image(label="Thumbnail")
@@ -76,7 +83,7 @@ with block:
76
  summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
77
  keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
78
  with gr.Row().style(equal_height=True):
79
- btn = gr.Button("Transcribe, Summarize & Extract Keywords")
80
  btn.click(gio, inputs=[link, lang, sz], outputs=[text, summary, keywords])
81
  link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
82
 
 
2
  import whisper
3
  from pytube import YouTube
4
  import yake
5
+ from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
6
 
7
  class GradioInference():
8
  def __init__(self):
 
12
  self.loaded_model = whisper.load_model(self.current_size)
13
  self.yt = None
14
 
 
 
 
15
  # Initialize Facebook/BART-Large-CNN summarizer
16
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
17
 
18
+ self.keyword_model = T5ForConditionalGeneration.from_pretrained("Voicelab/vlt5-base-keywords")
19
+ self.tokenizer = T5Tokenizer.from_pretrained("Voicelab/vlt5-base-keywords")
20
+
21
  def __call__(self, link, lang, size):
22
  if self.yt is None:
23
  self.yt = YouTube(link)
 
34
  # Perform summarization on the transcription
35
  transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
36
 
37
+ task_prefix = "Keywords: "
38
+
39
+ input_sequence = task_prefix + transcription
40
+ input_ids = tokenizer(
41
+ input_sequence, return_tensors="pt", truncation=False,
42
+ ).input_ids
43
+ output = keyword_model.generate(input_ids, no_repeat_ngram_size=3, num_beams=4)
44
+ predicted = tokenizer.decode(output[0], skip_special_tokens=True)
45
+ keywords = [x.strip() for x in predicted.split(',') if x.strip()]
46
 
47
+ return results["text"], transcription_summary[0]["summary_text"], keywords
48
 
49
  def populate_metadata(self, link):
50
  self.yt = YouTube(link)
 
53
 
54
  gio = GradioInference()
55
  title = "Youtube Insights"
56
+ description = "Your AI-powered Video Analytics"
57
 
58
  block = gr.Blocks()
59
  with block:
 
64
  <h1>Youtube Insights</h1>
65
  </div>
66
  <p style="margin-bottom: 10px; font-size: 94%">
67
+ Your AI-powered Video Analytics
68
  </p>
69
  </div>
70
  """
 
74
  with gr.Row().style(equal_height=True):
75
  sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
76
  lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
77
+ link = gr.Textbox(label="YouTube URL")
78
  title = gr.Label(label="Video Title")
79
  with gr.Row().style(equal_height=True):
80
  img = gr.Image(label="Thumbnail")
 
83
  summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
84
  keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
85
  with gr.Row().style(equal_height=True):
86
+ btn = gr.Button("Get Video Insights")
87
  btn.click(gio, inputs=[link, lang, sz], outputs=[text, summary, keywords])
88
  link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
89