storresbusquets commited on
Commit
5a3bdec
·
1 Parent(s): b0808a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -117
app.py CHANGED
@@ -57,125 +57,131 @@ class GradioInference:
57
  - Sentiment Analysis: using Hugging Face's default sentiment classifier
58
  - WordCloud: using the wordcloud python library.
59
  """
60
-
61
- gr.Info("Starting process")
62
- progress(0, desc="Starting analysis")
63
-
64
- if self.yt is None:
65
- self.yt = YouTube(link)
66
-
67
- # Pytube library to access to YouTube audio stream
68
- path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
69
-
70
- if lang == "none":
71
- lang = None
72
-
73
- if size != self.current_size:
74
- self.loaded_model = whisper.load_model(size)
75
- self.current_size = size
76
-
77
- progress(0.20, desc="Transcribing")
78
-
79
- # Transcribe the audio extracted from pytube
80
- results = self.loaded_model.transcribe(path, language=lang)
81
-
82
- progress(0.40, desc="Summarizing")
83
-
84
- # Perform summarization on the transcription
85
- transcription_summary = self.bart_summarizer(
86
- results["text"],
87
- max_length=256,
88
- min_length=30,
89
- do_sample=False,
90
- truncation=True
91
- )
92
-
93
- # Multilingual summary with mt5
94
- WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
95
-
96
- input_ids_sum = self.mt5_tokenizer(
97
- [WHITESPACE_HANDLER(results["text"])],
98
- return_tensors="pt",
99
- padding="max_length",
100
- truncation=True,
101
- max_length=512
102
- )["input_ids"]
103
-
104
- output_ids_sum = self.mt5_model.generate(
105
- input_ids=input_ids_sum,
106
- max_length=256,
107
- no_repeat_ngram_size=2,
108
- num_beams=4
109
- )[0]
110
-
111
- summary = self.mt5_tokenizer.decode(
112
- output_ids_sum,
113
- skip_special_tokens=True,
114
- clean_up_tokenization_spaces=False
115
- )
116
- # End multilingual summary
117
-
118
- progress(0.60, desc="Extracting Keywords")
119
-
120
- # Extract keywords using VoiceLabT5
121
- task_prefix = "Keywords: "
122
- input_sequence = task_prefix + results["text"]
123
-
124
- input_ids = self.keyword_tokenizer(
125
- input_sequence,
126
- return_tensors="pt",
127
- truncation=False
128
- ).input_ids
129
-
130
- output = self.keyword_model.generate(
131
- input_ids,
132
- no_repeat_ngram_size=3,
133
- num_beams=4
134
- )
135
-
136
- predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
137
- keywords = [x.strip() for x in predicted.split(",") if x.strip()]
138
- formatted_keywords = "\n".join([f"• {keyword}" for keyword in keywords])
139
-
140
- progress(0.80, desc="Extracting Sentiment")
141
-
142
- # Define a dictionary to map labels to emojis
143
- sentiment_emojis = {
144
- "positive": "Positive 👍🏼",
145
- "negative": "Negative 👎🏼",
146
- "neutral": "Neutral 😶",
147
- }
148
-
149
- # Sentiment label
150
- label = self.classifier(summary)[0]["label"]
151
-
152
- # Format the label with emojis
153
- formatted_sentiment = sentiment_emojis.get(label, label)
154
-
155
- progress(0.90, desc="Generating Wordcloud")
156
-
157
- # Generate WordCloud object
158
- wordcloud = WordCloud(colormap = "Oranges").generate(results["text"])
159
-
160
- # WordCloud image to display
161
- wordcloud_image = wordcloud.to_image()
162
-
163
- if lang == "english" or lang == "none":
164
- return (
165
- results["text"],
166
- transcription_summary[0]["summary_text"],
167
- formatted_keywords,
168
- formatted_sentiment,
169
- wordcloud_image,
170
  )
171
- else:
172
- return (
173
- results["text"],
174
- summary,
175
- formatted_keywords,
176
- formatted_sentiment,
177
- wordcloud_image,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  def populate_metadata(self, link):
 
57
  - Sentiment Analysis: using Hugging Face's default sentiment classifier
58
  - WordCloud: using the wordcloud python library.
59
  """
60
+ try:
61
+
62
+ progress(0, desc="Starting analysis")
63
+
64
+ if self.yt is None:
65
+ self.yt = YouTube(link)
66
+
67
+ # Pytube library to access to YouTube audio stream
68
+ path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
69
+
70
+ if lang == "none":
71
+ lang = None
72
+
73
+ if size != self.current_size:
74
+ self.loaded_model = whisper.load_model(size)
75
+ self.current_size = size
76
+
77
+ progress(0.20, desc="Transcribing")
78
+
79
+ # Transcribe the audio extracted from pytube
80
+ results = self.loaded_model.transcribe(path, language=lang)
81
+
82
+ progress(0.40, desc="Summarizing")
83
+
84
+ # Perform summarization on the transcription
85
+ transcription_summary = self.bart_summarizer(
86
+ results["text"],
87
+ max_length=256,
88
+ min_length=30,
89
+ do_sample=False,
90
+ truncation=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  )
92
+
93
+ # Multilingual summary with mt5
94
+ WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
95
+
96
+ input_ids_sum = self.mt5_tokenizer(
97
+ [WHITESPACE_HANDLER(results["text"])],
98
+ return_tensors="pt",
99
+ padding="max_length",
100
+ truncation=True,
101
+ max_length=512
102
+ )["input_ids"]
103
+
104
+ output_ids_sum = self.mt5_model.generate(
105
+ input_ids=input_ids_sum,
106
+ max_length=256,
107
+ no_repeat_ngram_size=2,
108
+ num_beams=4
109
+ )[0]
110
+
111
+ summary = self.mt5_tokenizer.decode(
112
+ output_ids_sum,
113
+ skip_special_tokens=True,
114
+ clean_up_tokenization_spaces=False
115
+ )
116
+ # End multilingual summary
117
+
118
+ progress(0.60, desc="Extracting Keywords")
119
+
120
+ # Extract keywords using VoiceLabT5
121
+ task_prefix = "Keywords: "
122
+ input_sequence = task_prefix + results["text"]
123
+
124
+ input_ids = self.keyword_tokenizer(
125
+ input_sequence,
126
+ return_tensors="pt",
127
+ truncation=False
128
+ ).input_ids
129
+
130
+ output = self.keyword_model.generate(
131
+ input_ids,
132
+ no_repeat_ngram_size=3,
133
+ num_beams=4
134
  )
135
+
136
+ predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
137
+ keywords = [x.strip() for x in predicted.split(",") if x.strip()]
138
+ formatted_keywords = "\n".join([f"• {keyword}" for keyword in keywords])
139
+
140
+ progress(0.80, desc="Extracting Sentiment")
141
+
142
+ # Define a dictionary to map labels to emojis
143
+ sentiment_emojis = {
144
+ "positive": "Positive 👍🏼",
145
+ "negative": "Negative 👎🏼",
146
+ "neutral": "Neutral 😶",
147
+ }
148
+
149
+ # Sentiment label
150
+ label = self.classifier(summary)[0]["label"]
151
+
152
+ # Format the label with emojis
153
+ formatted_sentiment = sentiment_emojis.get(label, label)
154
+
155
+ progress(0.90, desc="Generating Wordcloud")
156
+
157
+ # Generate WordCloud object
158
+ wordcloud = WordCloud(colormap = "Oranges").generate(results["text"])
159
+
160
+ # WordCloud image to display
161
+ wordcloud_image = wordcloud.to_image()
162
+
163
+ if lang == "english" or lang == "none":
164
+ return (
165
+ results["text"],
166
+ transcription_summary[0]["summary_text"],
167
+ formatted_keywords,
168
+ formatted_sentiment,
169
+ wordcloud_image,
170
+ )
171
+ else:
172
+ return (
173
+ results["text"],
174
+ summary,
175
+ formatted_keywords,
176
+ formatted_sentiment,
177
+ wordcloud_image,
178
+ )
179
+
180
+ except:
181
+ gr.Error("Restricted Content. Choose a different video")
182
+ return None, None, None, None, None
183
+
184
+ gr.Info("Success")
185
 
186
 
187
  def populate_metadata(self, link):