storresbusquets commited on
Commit
f7ea072
·
1 Parent(s): 038645c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -35
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import whisper
3
  from pytube import YouTube
4
- import yake
5
  from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
6
 
7
  class GradioInference():
@@ -11,13 +10,12 @@ class GradioInference():
11
  self.current_size = "base"
12
  self.loaded_model = whisper.load_model(self.current_size)
13
  self.yt = None
14
-
15
- # Initialize Facebook/BART-Large-CNN summarizer
16
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
17
-
 
18
  self.keyword_model = T5ForConditionalGeneration.from_pretrained("Voicelab/vlt5-base-keywords")
19
- self.tokenizer = T5Tokenizer.from_pretrained("Voicelab/vlt5-base-keywords")
20
-
21
  def __call__(self, link, lang, size):
22
  if self.yt is None:
23
  self.yt = YouTube(link)
@@ -34,57 +32,78 @@ class GradioInference():
34
  # Perform summarization on the transcription
35
  transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
36
 
 
37
  task_prefix = "Keywords: "
38
-
39
- input_sequence = task_prefix + transcription
40
- input_ids = tokenizer(
41
- input_sequence, return_tensors="pt", truncation=False,
42
- ).input_ids
43
- output = keyword_model.generate(input_ids, no_repeat_ngram_size=3, num_beams=4)
44
- predicted = tokenizer.decode(output[0], skip_special_tokens=True)
45
  keywords = [x.strip() for x in predicted.split(',') if x.strip()]
46
-
47
  return results["text"], transcription_summary[0]["summary_text"], keywords
48
 
49
  def populate_metadata(self, link):
50
  self.yt = YouTube(link)
51
  return self.yt.thumbnail_url, self.yt.title
52
 
 
 
 
 
 
53
 
54
  gio = GradioInference()
55
  title = "Youtube Insights"
56
- description = "Your AI-powered Video Analytics"
57
 
58
  block = gr.Blocks()
59
- with block:
60
  gr.HTML(
61
  """
62
  <div style="text-align: center; max-width: 500px; margin: 0 auto;">
63
  <div>
64
- <h1>Youtube Insights</h1>
65
  </div>
66
  <p style="margin-bottom: 10px; font-size: 94%">
67
- Your AI-powered Video Analytics
68
  </p>
69
  </div>
70
  """
71
  )
72
  with gr.Group():
73
- with gr.Box():
74
- with gr.Row().style(equal_height=True):
75
- sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
76
- lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
77
- link = gr.Textbox(label="YouTube URL")
78
- title = gr.Label(label="Video Title")
79
- with gr.Row().style(equal_height=True):
80
- img = gr.Image(label="Thumbnail")
81
- text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
82
- with gr.Row().style(equal_height=True):
83
- summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
84
- keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
85
- with gr.Row().style(equal_height=True):
86
- btn = gr.Button("Get Video Insights")
87
- btn.click(gio, inputs=[link, lang, sz], outputs=[text, summary, keywords])
88
- link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- block.launch()
 
1
  import gradio as gr
2
  import whisper
3
  from pytube import YouTube
 
4
  from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
5
 
6
  class GradioInference():
 
10
  self.current_size = "base"
11
  self.loaded_model = whisper.load_model(self.current_size)
12
  self.yt = None
 
 
13
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
14
+
15
+ # Initialize VoiceLabT5 model and tokenizer
16
  self.keyword_model = T5ForConditionalGeneration.from_pretrained("Voicelab/vlt5-base-keywords")
17
+ self.keyword_tokenizer = T5Tokenizer.from_pretrained("Voicelab/vlt5-base-keywords")
18
+
19
  def __call__(self, link, lang, size):
20
  if self.yt is None:
21
  self.yt = YouTube(link)
 
32
  # Perform summarization on the transcription
33
  transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
34
 
35
+ # Extract keywords using VoiceLabT5
36
  task_prefix = "Keywords: "
37
+ input_sequence = task_prefix + results["text"]
38
+ input_ids = self.keyword_tokenizer(input_sequence, return_tensors="pt", truncation=False).input_ids
39
+ output = self.keyword_model.generate(input_ids, no_repeat_ngram_size=3, num_beams=4)
40
+ predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
41
  keywords = [x.strip() for x in predicted.split(',') if x.strip()]
42
+
43
  return results["text"], transcription_summary[0]["summary_text"], keywords
44
 
45
  def populate_metadata(self, link):
46
  self.yt = YouTube(link)
47
  return self.yt.thumbnail_url, self.yt.title
48
 
49
+ def transcribe_audio(audio_file):
50
+ model = whisper.load_model("base")
51
+ result = model.transcribe(audio_file)
52
+ return result["text"]
53
+
54
 
55
  gio = GradioInference()
56
  title = "Youtube Insights"
57
+ description = "Your AI-powered video analytics tool"
58
 
59
  block = gr.Blocks()
60
+ with block as demo:
61
  gr.HTML(
62
  """
63
  <div style="text-align: center; max-width: 500px; margin: 0 auto;">
64
  <div>
65
+ <h1>Youtube <span style="color: red;">Insights</span> 📹</h1>
66
  </div>
67
  <p style="margin-bottom: 10px; font-size: 94%">
68
+ Your AI-powered video analytics tool
69
  </p>
70
  </div>
71
  """
72
  )
73
  with gr.Group():
74
+ with gr.Tab("From YouTube"):
75
+ with gr.Box():
76
+ with gr.Row().style(equal_height=True):
77
+ size = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
78
+ lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
79
+ link = gr.Textbox(label="YouTube Link")
80
+ title = gr.Label(label="Video Title")
81
+ with gr.Row().style(equal_height=True):
82
+ img = gr.Image(label="Thumbnail")
83
+ text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
84
+ with gr.Row().style(equal_height=True):
85
+ summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
86
+ keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
87
+ with gr.Row().style(equal_height=True):
88
+ btn = gr.Button("Get video insights") # Updated button label
89
+ btn.click(gio, inputs=[link, lang, size], outputs=[text, summary, keywords])
90
+ link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
91
+
92
+ with gr.Tab("From Audio file"):
93
+ with gr.Box():
94
+ with gr.Row().style(equal_height=True):
95
+ size = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
96
+ lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
97
+ audio_file = gr.Audio(type="filepath")
98
+ with gr.Row().style(equal_height=True):
99
+ # img = gr.Image(label="Thumbnail")
100
+ text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
101
+ # with gr.Row().style(equal_height=True):
102
+ # summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
103
+ # keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
104
+ with gr.Row().style(equal_height=True):
105
+ btn = gr.Button("Get video insights") # Updated button label
106
+ btn.click(transcribe_audio, inputs=[audio_file], outputs=[text])
107
+ # link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
108
 
109
+ demo.launch()