tfm-media-insights

Runtime error

App Files Files Community

storresbusquets commited on Sep 21, 2023

Commit

85cf8f4

1 Parent(s): 001afc8

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -187

app.py CHANGED Viewed

@@ -206,112 +206,117 @@ class GradioInference:
             - Sentiment Analysis: using Hugging Face's default sentiment classifier
             - WordCloud: using the wordcloud python library.
         """
-        gr.Info("Starting process")
-        progress(0, desc="Starting analysis")
-        if lang == "none":
-            lang = None
-        if size != self.current_size:
-            self.loaded_model = whisper.load_model(size)
-            self.current_size = size
-        progress(0.20, desc="Transcribing")
-        results = self.loaded_model.transcribe(audio_file, language=lang)
-        progress(0.40, desc="Summarizing")
-        # Perform summarization on the transcription
-        transcription_summary = self.bart_summarizer(
-            results["text"], max_length=150, min_length=30, do_sample=False, truncation=True
-        )
-        # Multilingual summary with mt5
-        WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
-        input_ids_sum = self.mt5_tokenizer(
-            [WHITESPACE_HANDLER(results["text"])],
-            return_tensors="pt",
-            padding="max_length",
-            truncation=True,
-            max_length=512
-        )["input_ids"]
-        output_ids_sum = self.mt5_model.generate(
-            input_ids=input_ids_sum,
-            max_length=130,
-            no_repeat_ngram_size=2,
-            num_beams=4
-        )[0]
-        summary = self.mt5_tokenizer.decode(
-            output_ids_sum,
-            skip_special_tokens=True,
-            clean_up_tokenization_spaces=False
-        )
-        # End multilingual summary
-        progress(0.60, desc="Extracting Keywords")
-        # Extract keywords using VoiceLabT5
-        task_prefix = "Keywords: "
-        input_sequence = task_prefix + results["text"]
-        input_ids = self.keyword_tokenizer(
-            input_sequence,
-            return_tensors="pt",
-            truncation=False
-        ).input_ids
-        output = self.keyword_model.generate(
-            input_ids,
-            no_repeat_ngram_size=3,
-            num_beams=4
-        )
-        predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
-        keywords = [x.strip() for x in predicted.split(",") if x.strip()]
-        formatted_keywords = "\n".join([f"• {keyword}" for keyword in keywords])
-        progress(0.80, desc="Extracting Sentiment")
-        # Define a dictionary to map labels to emojis
-        sentiment_emojis = {
-            "positive": "Positive 👍🏼",
-            "negative": "Negative 👎🏼",
-            "neutral": "Neutral 😶",
-        }
-        # Sentiment label
-        label = self.classifier(summary)[0]["label"]
-        # Format the label with emojis
-        formatted_sentiment = sentiment_emojis.get(label, label)
-        progress(0.90, desc="Generating Wordcloud")
-        # WordCloud object
-        wordcloud = WordCloud(colormap = "Oranges").generate(
-            results["text"]
-        )
-        wordcloud_image = wordcloud.to_image()
-        if lang == "english" or lang == "none":
-            return (
-                results["text"],
-                transcription_summary[0]["summary_text"],
-                formatted_keywords,
-                formatted_sentiment,
-                wordcloud_image,
             )
-        else:
-            return (
-                results["text"],
-                summary,
-                formatted_keywords,
-                formatted_sentiment,
-                wordcloud_image,
             )
     def from_article(self, article, progress=gr.Progress()):
         """
@@ -322,91 +327,96 @@ class GradioInference:
             - Sentiment Analysis: using Hugging Face's default sentiment classifier
             - WordCloud: using the wordcloud python library.
         """
-        gr.Info("Starting process")
-        progress(0, desc="Starting analysis")
-        progress(0.30, desc="Summarizing")
-        # Perform summarization on the transcription
-        transcription_summary = self.bart_summarizer(
-            article, max_length=150, min_length=30, do_sample=False, truncation=True
-        )
-        # Multilingual summary with mt5
-        WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
-        input_ids_sum = self.mt5_tokenizer(
-            [WHITESPACE_HANDLER(article)],
-            return_tensors="pt",
-            padding="max_length",
-            truncation=True,
-            max_length=512
-        )["input_ids"]
-        output_ids_sum = self.mt5_model.generate(
-            input_ids=input_ids_sum,
-            max_length=130,
-            no_repeat_ngram_size=2,
-            num_beams=4
-        )[0]
-        summary = self.mt5_tokenizer.decode(
-            output_ids_sum,
-            skip_special_tokens=True,
-            clean_up_tokenization_spaces=False
-        )
-        # End multilingual summary
-        progress(0.60, desc="Extracting Keywords")
-        # Extract keywords using VoiceLabT5
-        task_prefix = "Keywords: "
-        input_sequence = task_prefix + article
-        input_ids = self.keyword_tokenizer(
-            input_sequence,
-            return_tensors="pt",
-            truncation=False
-        ).input_ids
-        output = self.keyword_model.generate(
-            input_ids,
-            no_repeat_ngram_size=3,
-            num_beams=4
-        )
-        predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
-        keywords = [x.strip() for x in predicted.split(",") if x.strip()]
-        formatted_keywords = "\n".join([f"• {keyword}" for keyword in keywords])
-        progress(0.80, desc="Extracting Sentiment")
-        # Define a dictionary to map labels to emojis
-        sentiment_emojis = {
-            "positive": "Positive 👍🏼",
-            "negative": "Negative 👎🏼",
-            "neutral": "Neutral 😶",
-        }
-        # Sentiment label
-        label = self.classifier(summary)[0]["label"]
-        # Format the label with emojis
-        formatted_sentiment = sentiment_emojis.get(label, label)
-        progress(0.90, desc="Generating Wordcloud")
-        # WordCloud object
-        wordcloud = WordCloud(colormap = "Oranges").generate(
-            article
-        )
-        wordcloud_image = wordcloud.to_image()
-        return (
-            transcription_summary[0]["summary_text"],
-            formatted_keywords,
-            formatted_sentiment,
-            wordcloud_image,
-        )
 gio = GradioInference()
 title = "Media Insights"
@@ -420,7 +430,7 @@ with block as demo:
         """
         <div style="text-align: center; max-width: 500px; margin: 0 auto;">
           <div>
-            <h1 style="font-family: Montserrat, sans-serif;">MEDIA <span style="color: #433ccb;">INSIGHTS</span> 💡</h1>
           </div>
           <h4>
             Your AI-powered media analytics tool ✨

             - Sentiment Analysis: using Hugging Face's default sentiment classifier
             - WordCloud: using the wordcloud python library.
         """
+        try:
+            progress(0, desc="Starting analysis")
+            if lang == "none":
+                lang = None
+            if size != self.current_size:
+                self.loaded_model = whisper.load_model(size)
+                self.current_size = size
+            progress(0.20, desc="Transcribing")
+            results = self.loaded_model.transcribe(audio_file, language=lang)
+            progress(0.40, desc="Summarizing")
+            # Perform summarization on the transcription
+            transcription_summary = self.bart_summarizer(
+                results["text"], max_length=150, min_length=30, do_sample=False, truncation=True
             )
+            # Multilingual summary with mt5
+            WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+            input_ids_sum = self.mt5_tokenizer(
+                [WHITESPACE_HANDLER(results["text"])],
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=512
+            )["input_ids"]
+            output_ids_sum = self.mt5_model.generate(
+                input_ids=input_ids_sum,
+                max_length=130,
+                no_repeat_ngram_size=2,
+                num_beams=4
+            )[0]
+            summary = self.mt5_tokenizer.decode(
+                output_ids_sum,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=False
             )
+            # End multilingual summary
+            progress(0.60, desc="Extracting Keywords")
+            # Extract keywords using VoiceLabT5
+            task_prefix = "Keywords: "
+            input_sequence = task_prefix + results["text"]
+            input_ids = self.keyword_tokenizer(
+                input_sequence,
+                return_tensors="pt",
+                truncation=False
+            ).input_ids
+            output = self.keyword_model.generate(
+                input_ids,
+                no_repeat_ngram_size=3,
+                num_beams=4
+            )
+            predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
+            keywords = [x.strip() for x in predicted.split(",") if x.strip()]
+            formatted_keywords = "\n".join([f"• {keyword}" for keyword in keywords])
+            progress(0.80, desc="Extracting Sentiment")
+            # Define a dictionary to map labels to emojis
+            sentiment_emojis = {
+                "positive": "Positive 👍🏼",
+                "negative": "Negative 👎🏼",
+                "neutral": "Neutral 😶",
+            }
+            # Sentiment label
+            label = self.classifier(summary)[0]["label"]
+            # Format the label with emojis
+            formatted_sentiment = sentiment_emojis.get(label, label)
+            progress(0.90, desc="Generating Wordcloud")
+            # WordCloud object
+            wordcloud = WordCloud(colormap = "Oranges").generate(
+                results["text"]
+            )
+            wordcloud_image = wordcloud.to_image()
+            if lang == "english" or lang == "none":
+                return (
+                    results["text"],
+                    transcription_summary[0]["summary_text"],
+                    formatted_keywords,
+                    formatted_sentiment,
+                    wordcloud_image,
+                )
+            else:
+                return (
+                    results["text"],
+                    summary,
+                    formatted_keywords,
+                    formatted_sentiment,
+                    wordcloud_image,
+                )
+        except:
+            gr.Error(message="Exceeded audio size. Choose a different audio")
+        finally:
+            gr.Info("Success!")
     def from_article(self, article, progress=gr.Progress()):
         """
             - Sentiment Analysis: using Hugging Face's default sentiment classifier
             - WordCloud: using the wordcloud python library.
         """
+        try:
+            progress(0, desc="Starting analysis")
+            progress(0.30, desc="Summarizing")
+            # Perform summarization on the transcription
+            transcription_summary = self.bart_summarizer(
+                article, max_length=150, min_length=30, do_sample=False, truncation=True
+            )
+            # Multilingual summary with mt5
+            WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+            input_ids_sum = self.mt5_tokenizer(
+                [WHITESPACE_HANDLER(article)],
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=512
+            )["input_ids"]
+            output_ids_sum = self.mt5_model.generate(
+                input_ids=input_ids_sum,
+                max_length=130,
+                no_repeat_ngram_size=2,
+                num_beams=4
+            )[0]
+            summary = self.mt5_tokenizer.decode(
+                output_ids_sum,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=False
+            )
+            # End multilingual summary
+            progress(0.60, desc="Extracting Keywords")
+            # Extract keywords using VoiceLabT5
+            task_prefix = "Keywords: "
+            input_sequence = task_prefix + article
+            input_ids = self.keyword_tokenizer(
+                input_sequence,
+                return_tensors="pt",
+                truncation=False
+            ).input_ids
+            output = self.keyword_model.generate(
+                input_ids,
+                no_repeat_ngram_size=3,
+                num_beams=4
+            )
+            predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
+            keywords = [x.strip() for x in predicted.split(",") if x.strip()]
+            formatted_keywords = "\n".join([f"• {keyword}" for keyword in keywords])
+            progress(0.80, desc="Extracting Sentiment")
+            # Define a dictionary to map labels to emojis
+            sentiment_emojis = {
+                "positive": "Positive 👍🏼",
+                "negative": "Negative 👎🏼",
+                "neutral": "Neutral 😶",
+            }
+            # Sentiment label
+            label = self.classifier(summary)[0]["label"]
+            # Format the label with emojis
+            formatted_sentiment = sentiment_emojis.get(label, label)
+            progress(0.90, desc="Generating Wordcloud")
+            # WordCloud object
+            wordcloud = WordCloud(colormap = "Oranges").generate(
+                article
+            )
+            wordcloud_image = wordcloud.to_image()
+            return (
+                transcription_summary[0]["summary_text"],
+                formatted_keywords,
+                formatted_sentiment,
+                wordcloud_image,
+            )
+        except:
+            gr.Error(message="Exceeded text size. Choose a different audio")
+        finally:
+            gr.Info("Success!")
 gio = GradioInference()
 title = "Media Insights"
         """
         <div style="text-align: center; max-width: 500px; margin: 0 auto;">
           <div>
+            <h1 style="font-family: Nunito, sans-serif;">MEDIA <span style="color: #433ccb;">INSIGHTS</span> 💡</h1>
           </div>
           <h4>
             Your AI-powered media analytics tool ✨