abdullahmubeen10 commited on
Commit
1c22865
·
verified ·
1 Parent(s): 1fa225a

Update pages/Text Preprocessing.py

Browse files
Files changed (1) hide show
  1. pages/Text Preprocessing.py +35 -6
pages/Text Preprocessing.py CHANGED
@@ -10,7 +10,29 @@ from sparknlp.annotator import *
10
  from sparknlp.base import *
11
  from sparknlp.pretrained import PretrainedPipeline
12
  from pyspark.sql.types import StringType, IntegerType
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  @st.cache_resource
15
  def init_spark():
16
  spark = sparknlp.start()
@@ -71,8 +93,15 @@ def fit_data(pipeline, data):
71
  def extract_annotations(output, annotation_type):
72
  return [anno.result for anno in output[0][annotation_type]]
73
 
74
-
75
- st.title("Typo Detector")
 
 
 
 
 
 
 
76
 
77
  model_name = "SentenceDetector|Tokenizer|Stemmer|Lemmatizer|Normalizer|Stop Words Remover"
78
  #model = st.sidebar.selectbox("Choose the pretrained model", model_name, help="For more info about the models visit: https://sparknlp.org/models",)
@@ -105,7 +134,7 @@ examples = [
105
 
106
  ]
107
 
108
- st.subheader("Split and clean text")
109
 
110
  selected_text = st.selectbox("Select an example", examples)
111
 
@@ -116,7 +145,7 @@ if custom_input:
116
  elif selected_text:
117
  selected_text = selected_text
118
 
119
- st.subheader('Selected Text')
120
  st.write(selected_text)
121
 
122
  spark = init_spark()
@@ -151,5 +180,5 @@ if selected_models[5]:
151
 
152
  if data_dict:
153
  df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in data_dict.items()]))
154
- st.subheader("Annotation Results:")
155
  st.dataframe(df)
 
10
  from sparknlp.base import *
11
  from sparknlp.pretrained import PretrainedPipeline
12
  from pyspark.sql.types import StringType, IntegerType
13
+
14
+ # Configure Streamlit page
15
+ st.set_page_config(
16
+ layout="wide",
17
+ page_title="Spark NLP Demos App",
18
+ initial_sidebar_state="auto"
19
+ )
20
+
21
+ # Custom CSS for better styling
22
+ st.markdown("""
23
+ <style>
24
+ .main-title {
25
+ font-size: 36px;
26
+ color: #4A90E2;
27
+ font-weight: bold;
28
+ text-align: center;
29
+ }
30
+ .section p, .section ul {
31
+ color: #666666;
32
+ }
33
+ </style>
34
+ """, unsafe_allow_html=True)
35
+
36
  @st.cache_resource
37
  def init_spark():
38
  spark = sparknlp.start()
 
93
  def extract_annotations(output, annotation_type):
94
  return [anno.result for anno in output[0][annotation_type]]
95
 
96
+ # st.title("Summarize Text")
97
+ st.markdown('<div class="main-title">State-of-the-Art Text Preprocessing with Spark NLP</div>', unsafe_allow_html=True)
98
+ st.write("")
99
+ st.write("")
100
+ st.markdown("""
101
+ <div class="section">
102
+ <p>This demo utilizes a comprehensive text preprocessing pipeline using Spark NLP. The pipeline includes several stages such as document assembly, tokenization, sentence detection, normalization, stopword cleaning, stemming, and lemmatization. These steps are essential for preparing text data for downstream NLP tasks, ensuring the text is clean and standardized for effective model training and evaluation.</p>
103
+ </div>
104
+ """, unsafe_allow_html=True)
105
 
106
  model_name = "SentenceDetector|Tokenizer|Stemmer|Lemmatizer|Normalizer|Stop Words Remover"
107
  #model = st.sidebar.selectbox("Choose the pretrained model", model_name, help="For more info about the models visit: https://sparknlp.org/models",)
 
134
 
135
  ]
136
 
137
+ st.write("Split and clean text")
138
 
139
  selected_text = st.selectbox("Select an example", examples)
140
 
 
145
  elif selected_text:
146
  selected_text = selected_text
147
 
148
+ st.write('Selected Text')
149
  st.write(selected_text)
150
 
151
  spark = init_spark()
 
180
 
181
  if data_dict:
182
  df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in data_dict.items()]))
183
+ st.write("Annotation Results:")
184
  st.dataframe(df)