amirhoseinsedaghati commited on
Commit
f053e24
·
verified ·
1 Parent(s): dc91b3c

Update pages/Analyze_Text.py

Browse files
Files changed (1) hide show
  1. pages/Analyze_Text.py +20 -21
pages/Analyze_Text.py CHANGED
@@ -49,19 +49,9 @@ def get_word_stats(text):
49
 
50
  def plot_top_keywords_frequencies(text, n_top_keywords):
51
  preprocessed_text = nfx.remove_stopwords(text)
52
- blob = TextBlob(preprocessed_text)
53
- words = blob.words
54
- top_keywords = Counter(words).most_common(n_top_keywords)
55
- top_keywords_df = pd.DataFrame(top_keywords, columns=['words', 'frequency'])
56
- figure = px.bar(top_keywords_df, x='words', y='frequency', color='frequency', title=f'the frequency of {n_top_keywords} top keywords', width=400, height=400, color_continuous_scale='Blues')
57
- return figure
58
-
59
-
60
- def get_sentence_stats(text):
61
  try:
62
- blob = TextBlob(text)
63
- sentences = [str(sentence) for sentence in blob.sentences]
64
- noun_phrases = list(blob.noun_phrases)
65
  except:
66
  # These corpora are commonly used by TextBlob for various natural language processing tasks.
67
  nltk.download('brown')
@@ -71,16 +61,25 @@ def get_sentence_stats(text):
71
  nltk.download('conll2000')
72
  nltk.download('movie_reviews')
73
 
74
- blob = TextBlob(text)
75
- sentences = [str(sentence) for sentence in blob.sentences]
76
- noun_phrases = list(blob.noun_phrases)
77
  finally:
78
- sentence_stats = {
79
- 'Number of Sentences' : len(sentences),
80
- 'Number of Noun Phrases' : len(noun_phrases)
81
- }
82
- sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
83
- return sentences, noun_phrases, sentence_stats_df
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  def plot_tokens_pos(tokens_stats_df):
 
49
 
50
  def plot_top_keywords_frequencies(text, n_top_keywords):
51
  preprocessed_text = nfx.remove_stopwords(text)
 
 
 
 
 
 
 
 
 
52
  try:
53
+ blob = TextBlob(preprocessed_text)
54
+ words = blob.words
 
55
  except:
56
  # These corpora are commonly used by TextBlob for various natural language processing tasks.
57
  nltk.download('brown')
 
61
  nltk.download('conll2000')
62
  nltk.download('movie_reviews')
63
 
64
+ blob = TextBlob(preprocessed_text)
65
+ words = blob.words
 
66
  finally:
67
+ top_keywords = Counter(words).most_common(n_top_keywords)
68
+ top_keywords_df = pd.DataFrame(top_keywords, columns=['words', 'frequency'])
69
+ figure = px.bar(top_keywords_df, x='words', y='frequency', color='frequency', title=f'the frequency of {n_top_keywords} top keywords', width=400, height=400, color_continuous_scale='Blues')
70
+ return figure
71
+
72
+
73
+ def get_sentence_stats(text):
74
+ blob = TextBlob(text)
75
+ sentences = [str(sentence) for sentence in blob.sentences]
76
+ noun_phrases = list(blob.noun_phrases)
77
+ sentence_stats = {
78
+ 'Number of Sentences' : len(sentences),
79
+ 'Number of Noun Phrases' : len(noun_phrases)
80
+ }
81
+ sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
82
+ return sentences, noun_phrases, sentence_stats_df
83
 
84
 
85
  def plot_tokens_pos(tokens_stats_df):