Spaces:

amirhoseinsedaghati
/

multi-purpose-text-application

Sleeping

App Files Files Community

amirhoseinsedaghati commited on Feb 22, 2024

Commit

f053e24

verified ·

1 Parent(s): dc91b3c

Update pages/Analyze_Text.py

Browse files

Files changed (1) hide show

pages/Analyze_Text.py +20 -21

pages/Analyze_Text.py CHANGED Viewed

@@ -49,19 +49,9 @@ def get_word_stats(text):
 def plot_top_keywords_frequencies(text, n_top_keywords):
     preprocessed_text = nfx.remove_stopwords(text)
-    blob = TextBlob(preprocessed_text)
-    words = blob.words
-    top_keywords = Counter(words).most_common(n_top_keywords)
-    top_keywords_df = pd.DataFrame(top_keywords, columns=['words', 'frequency'])
-    figure = px.bar(top_keywords_df, x='words', y='frequency', color='frequency', title=f'the frequency of {n_top_keywords} top keywords', width=400, height=400, color_continuous_scale='Blues')
-    return figure
-def get_sentence_stats(text):
     try:
-        blob = TextBlob(text)
-        sentences = [str(sentence) for sentence in blob.sentences]
-        noun_phrases = list(blob.noun_phrases)
     except:
         # These corpora are commonly used by TextBlob for various natural language processing tasks.
         nltk.download('brown')
@@ -71,16 +61,25 @@ def get_sentence_stats(text):
         nltk.download('conll2000')
         nltk.download('movie_reviews')
-        blob = TextBlob(text)
-        sentences = [str(sentence) for sentence in blob.sentences]
-        noun_phrases = list(blob.noun_phrases)
     finally:
-        sentence_stats = {
-            'Number of Sentences' : len(sentences),
-            'Number of Noun Phrases' : len(noun_phrases)
-        }
-        sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
-        return sentences, noun_phrases, sentence_stats_df
 def plot_tokens_pos(tokens_stats_df):

 def plot_top_keywords_frequencies(text, n_top_keywords):
     preprocessed_text = nfx.remove_stopwords(text)
     try:
+        blob = TextBlob(preprocessed_text)
+        words = blob.words
     except:
         # These corpora are commonly used by TextBlob for various natural language processing tasks.
         nltk.download('brown')
         nltk.download('conll2000')
         nltk.download('movie_reviews')
+        blob = TextBlob(preprocessed_text)
+        words = blob.words
     finally:
+        top_keywords = Counter(words).most_common(n_top_keywords)
+        top_keywords_df = pd.DataFrame(top_keywords, columns=['words', 'frequency'])
+        figure = px.bar(top_keywords_df, x='words', y='frequency', color='frequency', title=f'the frequency of {n_top_keywords} top keywords', width=400, height=400, color_continuous_scale='Blues')
+        return figure
+def get_sentence_stats(text):
+    blob = TextBlob(text)
+    sentences = [str(sentence) for sentence in blob.sentences]
+    noun_phrases = list(blob.noun_phrases)
+    sentence_stats = {
+        'Number of Sentences' : len(sentences),
+        'Number of Noun Phrases' : len(noun_phrases)
+    }
+    sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
+    return sentences, noun_phrases, sentence_stats_df
 def plot_tokens_pos(tokens_stats_df):