amirhoseinsedaghati commited on
Commit
f156672
·
verified ·
1 Parent(s): c4c785c

Update pages/Analyze_Text.py

Browse files
Files changed (1) hide show
  1. pages/Analyze_Text.py +27 -22
pages/Analyze_Text.py CHANGED
@@ -6,29 +6,19 @@ import plotly.express as px
6
  from wordcloud.wordcloud import WordCloud
7
  from configs.db_configs import add_one_item
8
  from configs.html_features import set_image, HTML_WRAPPER
9
-
10
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
-
12
  import torch
13
  from torch.nn.functional import softmax
14
-
15
  from spacy import displacy
16
  import spacy
17
  nlp = spacy.load('en_core_web_sm')
18
-
19
  from collections import Counter
20
  import neattext as nt
21
  import neattext.functions as nfx
22
  from textblob import TextBlob
23
  import nltk
24
 
25
- # These corpora are commonly used by TextBlob for various natural language processing tasks.
26
- nltk.download('brown')
27
- nltk.download('punkt')
28
- nltk.download('wordnet')
29
- nltk.download('averaged_perceptron_tagger')
30
- nltk.download('conll2000')
31
- nltk.download('movie_reviews')
32
 
33
  def get_tokens_analysis(text):
34
  doc_obj = nlp(text)
@@ -39,7 +29,6 @@ def get_tokens_analysis(text):
39
 
40
  def get_entities_tokens(text):
41
  doc_obj = nlp(text)
42
-
43
  html = displacy.render(doc_obj, style='ent')
44
  html = html.replace('\n\n', '\n')
45
  entities_tokens_html = HTML_WRAPPER.format(html)
@@ -69,15 +58,29 @@ def plot_top_keywords_frequencies(text, n_top_keywords):
69
 
70
 
71
  def get_sentence_stats(text):
72
- blob = TextBlob(text)
73
- sentences = [str(sentence) for sentence in blob.sentences]
74
- noun_phrases = list(blob.noun_phrases)
75
- sentence_stats = {
76
- 'Number of Sentences' : len(sentences),
77
- 'Number of Noun Phrases' : len(noun_phrases)
78
- }
79
- sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
80
- return sentences, noun_phrases, sentence_stats_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
  def plot_tokens_pos(tokens_stats_df):
@@ -109,6 +112,7 @@ def plot_word_frequency(text):
109
  plt.axis('off')
110
  return fig
111
 
 
112
  def main():
113
  st.title('Text Analyzer')
114
  im1, im2, im3 = st.columns([1, 5.3, 1])
@@ -122,6 +126,7 @@ def main():
122
 
123
  text = st.text_area('Text Analyzer', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
124
  n_top_keywords = st.sidebar.slider('n Top keywords', 5, 15, 5, 1)
 
125
  if st.button('Analyze it'):
126
  if text != '':
127
  with st.expander('Original Text'):
@@ -157,7 +162,7 @@ def main():
157
  st.write('Noun Phrases:\n', noun_phrases)
158
 
159
  with col22:
160
- with st.expander('The Frequency of Tokens Part of speech'):
161
  figure = plot_tokens_pos(tokens_stats_df)
162
  st.plotly_chart(figure)
163
 
 
6
  from wordcloud.wordcloud import WordCloud
7
  from configs.db_configs import add_one_item
8
  from configs.html_features import set_image, HTML_WRAPPER
 
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
10
  import torch
11
  from torch.nn.functional import softmax
 
12
  from spacy import displacy
13
  import spacy
14
  nlp = spacy.load('en_core_web_sm')
 
15
  from collections import Counter
16
  import neattext as nt
17
  import neattext.functions as nfx
18
  from textblob import TextBlob
19
  import nltk
20
 
21
+
 
 
 
 
 
 
22
 
23
  def get_tokens_analysis(text):
24
  doc_obj = nlp(text)
 
29
 
30
  def get_entities_tokens(text):
31
  doc_obj = nlp(text)
 
32
  html = displacy.render(doc_obj, style='ent')
33
  html = html.replace('\n\n', '\n')
34
  entities_tokens_html = HTML_WRAPPER.format(html)
 
58
 
59
 
60
  def get_sentence_stats(text):
61
+ try:
62
+ blob = TextBlob(text)
63
+ sentences = [str(sentence) for sentence in blob.sentences]
64
+ noun_phrases = list(blob.noun_phrases)
65
+ except:
66
+ # These corpora are commonly used by TextBlob for various natural language processing tasks.
67
+ nltk.download('brown')
68
+ nltk.download('punkt')
69
+ nltk.download('wordnet')
70
+ nltk.download('averaged_perceptron_tagger')
71
+ nltk.download('conll2000')
72
+ nltk.download('movie_reviews')
73
+
74
+ blob = TextBlob(text)
75
+ sentences = [str(sentence) for sentence in blob.sentences]
76
+ noun_phrases = list(blob.noun_phrases)
77
+ finally:
78
+ sentence_stats = {
79
+ 'Number of Sentences' : len(sentences),
80
+ 'Number of Noun Phrases' : len(noun_phrases)
81
+ }
82
+ sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
83
+ return sentences, noun_phrases, sentence_stats_df
84
 
85
 
86
  def plot_tokens_pos(tokens_stats_df):
 
112
  plt.axis('off')
113
  return fig
114
 
115
+
116
  def main():
117
  st.title('Text Analyzer')
118
  im1, im2, im3 = st.columns([1, 5.3, 1])
 
126
 
127
  text = st.text_area('Text Analyzer', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
128
  n_top_keywords = st.sidebar.slider('n Top keywords', 5, 15, 5, 1)
129
+
130
  if st.button('Analyze it'):
131
  if text != '':
132
  with st.expander('Original Text'):
 
162
  st.write('Noun Phrases:\n', noun_phrases)
163
 
164
  with col22:
165
+ with st.expander('The Distribution of different Parts of Speech'):
166
  figure = plot_tokens_pos(tokens_stats_df)
167
  st.plotly_chart(figure)
168