Spaces:

spark-nlp
/

sparknlp

Sleeping

App Files Files Community

abdullahmubeen10 commited on Sep 10, 2024

Commit

1c22865

verified ·

1 Parent(s): 1fa225a

Update pages/Text Preprocessing.py

Browse files

Files changed (1) hide show

pages/Text Preprocessing.py +35 -6

pages/Text Preprocessing.py CHANGED Viewed

@@ -10,7 +10,29 @@ from sparknlp.annotator import *
 from sparknlp.base import *
 from sparknlp.pretrained import PretrainedPipeline
 from pyspark.sql.types import StringType, IntegerType
 @st.cache_resource
 def init_spark():
     spark = sparknlp.start()
@@ -71,8 +93,15 @@ def fit_data(pipeline, data):
 def extract_annotations(output, annotation_type):
     return [anno.result for anno in output[0][annotation_type]]
-st.title("Typo Detector")
 model_name = "SentenceDetector|Tokenizer|Stemmer|Lemmatizer|Normalizer|Stop Words Remover"
 #model = st.sidebar.selectbox("Choose the pretrained model", model_name, help="For more info about the models visit: https://sparknlp.org/models",)
@@ -105,7 +134,7 @@ examples = [
 ]
-st.subheader("Split and clean text")
 selected_text = st.selectbox("Select an example", examples)
@@ -116,7 +145,7 @@ if custom_input:
 elif selected_text:
     selected_text = selected_text
-st.subheader('Selected Text')
 st.write(selected_text)
 spark = init_spark()
@@ -151,5 +180,5 @@ if selected_models[5]:
 if data_dict:
     df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in data_dict.items()]))
-    st.subheader("Annotation Results:")
     st.dataframe(df)

 from sparknlp.base import *
 from sparknlp.pretrained import PretrainedPipeline
 from pyspark.sql.types import StringType, IntegerType
+# Configure Streamlit page
+st.set_page_config(
+    layout="wide",
+    page_title="Spark NLP Demos App",
+    initial_sidebar_state="auto"
+)
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+    </style>
+""", unsafe_allow_html=True)
 @st.cache_resource
 def init_spark():
     spark = sparknlp.start()
 def extract_annotations(output, annotation_type):
     return [anno.result for anno in output[0][annotation_type]]
+# st.title("Summarize Text")
+st.markdown('<div class="main-title">State-of-the-Art Text Preprocessing with Spark NLP</div>', unsafe_allow_html=True)
+st.write("")
+st.write("")
+st.markdown("""
+<div class="section">
+    <p>This demo utilizes a comprehensive text preprocessing pipeline using Spark NLP. The pipeline includes several stages such as document assembly, tokenization, sentence detection, normalization, stopword cleaning, stemming, and lemmatization. These steps are essential for preparing text data for downstream NLP tasks, ensuring the text is clean and standardized for effective model training and evaluation.</p>
+</div>
+""", unsafe_allow_html=True)
 model_name = "SentenceDetector|Tokenizer|Stemmer|Lemmatizer|Normalizer|Stop Words Remover"
 #model = st.sidebar.selectbox("Choose the pretrained model", model_name, help="For more info about the models visit: https://sparknlp.org/models",)
 ]
+st.write("Split and clean text")
 selected_text = st.selectbox("Select an example", examples)
 elif selected_text:
     selected_text = selected_text
+st.write('Selected Text')
 st.write(selected_text)
 spark = init_spark()
 if data_dict:
     df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in data_dict.items()]))
+    st.write("Annotation Results:")
     st.dataframe(df)