storresbusquets commited on
Commit
1ee9ade
·
1 Parent(s): aecbf85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -16
app.py CHANGED
@@ -1,24 +1,83 @@
1
  import gradio as gr
2
  import whisper
 
 
 
3
 
 
 
 
 
 
 
 
4
 
5
- def transcribe_audio(audio_file):
6
- model = whisper.load_model("base")
7
- result = model.transcribe(audio_file)
8
- return result["text"]
9
 
 
 
10
 
11
- def main():
12
- audio_input = gr.Audio(source="upload", type="filepath")
13
- output_text = gr.Textbox()
14
-
15
- iface = gr.Interface(fn=transcribe_audio, inputs=audio_input,
16
- outputs=output_text, title="Audio Transcription App",
17
- description="Upload an audio file and hit the 'Submit'\
18
- button")
19
-
20
- iface.launch()
21
 
 
 
22
 
23
- if __name__ == '__main__':
24
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import whisper
3
+ from pytube import YouTube
4
+ import yake
5
+ from transformers import pipeline
6
 
7
+ class GradioInference():
8
+ def __init__(self):
9
+ self.sizes = list(whisper._MODELS.keys())
10
+ self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
11
+ self.current_size = "base"
12
+ self.loaded_model = whisper.load_model(self.current_size)
13
+ self.yt = None
14
 
15
+ # Initialize YAKE keyword extractor
16
+ self.keyword_extractor = yake.KeywordExtractor(lan="en", n=3, dedupLim=0.9, dedupFunc="seqm", windowsSize=1, top=5, features=None)
 
 
17
 
18
+ # Initialize Facebook/BART-Large-CNN summarizer
19
+ self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
20
 
21
+ def __call__(self, link, lang, size):
22
+ if self.yt is None:
23
+ self.yt = YouTube(link)
24
+ path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
 
 
 
 
 
 
25
 
26
+ if lang == "none":
27
+ lang = None
28
 
29
+ if size != self.current_size:
30
+ self.loaded_model = whisper.load_model(size)
31
+ self.current_size = size
32
+ results = self.loaded_model.transcribe(path, language=lang)
33
+
34
+ # Perform summarization on the transcription
35
+ transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
36
+
37
+ # Extract keywords from the transcription
38
+ keywords = self.keyword_extractor.extract_keywords(results["text"])
39
+
40
+ return results["text"], transcription_summary[0]["summary_text"], [kw[0] for kw in keywords]
41
+
42
+ def populate_metadata(self, link):
43
+ self.yt = YouTube(link)
44
+ return self.yt.thumbnail_url, self.yt.title
45
+
46
+
47
+ gio = GradioInference()
48
+ title = "Youtube Whisperer"
49
+ description = "Speech to text transcription, summary, and keyword extraction of Youtube videos using OpenAI's Whisper, Facebook/BART-Large-CNN, and YAKE"
50
+
51
+ block = gr.Blocks()
52
+ with block:
53
+ gr.HTML(
54
+ """
55
+ <div style="text-align: center; max-width: 500px; margin: 0 auto;">
56
+ <div>
57
+ <h1>Youtube Whisperer</h1>
58
+ </div>
59
+ <p style="margin-bottom: 10px; font-size: 94%">
60
+ Speech to text transcription, summary, and keyword extraction of Youtube videos using OpenAI's Whisper, Facebook/BART-Large-CNN, and YAKE
61
+ </p>
62
+ </div>
63
+ """
64
+ )
65
+ with gr.Group():
66
+ with gr.Box():
67
+ with gr.Row().style(equal_height=True):
68
+ sz = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
69
+ lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
70
+ link = gr.Textbox(label="YouTube Link")
71
+ title = gr.Label(label="Video Title")
72
+ with gr.Row().style(equal_height=True):
73
+ img = gr.Image(label="Thumbnail")
74
+ text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
75
+ with gr.Row().style(equal_height=True):
76
+ summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
77
+ keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
78
+ with gr.Row().style(equal_height=True):
79
+ btn = gr.Button("Transcribe, Summarize & Extract Keywords")
80
+ btn.click(gio, inputs=[link, lang, sz], outputs=[text, summary, keywords])
81
+ link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
82
+
83
+ block.launch()