|
import streamlit as st |
|
import sparknlp |
|
import os |
|
import pandas as pd |
|
|
|
from sparknlp.base import * |
|
from sparknlp.annotator import * |
|
from pyspark.ml import Pipeline |
|
from sparknlp.pretrained import PretrainedPipeline |
|
|
|
|
|
st.set_page_config( |
|
layout="wide", |
|
page_title="Spark NLP Demos App", |
|
initial_sidebar_state="auto" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.main-title { |
|
font-size: 36px; |
|
color: #4A90E2; |
|
font-weight: bold; |
|
text-align: center; |
|
} |
|
.section p, .section ul { |
|
color: #666666; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
@st.cache_resource |
|
def init_spark(): |
|
return sparknlp.start() |
|
|
|
@st.cache_resource |
|
def create_pipeline(model): |
|
documentAssembler = DocumentAssembler()\ |
|
.setInputCol("text")\ |
|
.setOutputCol("document") |
|
|
|
use = UniversalSentenceEncoder.pretrained("tfhub_use", "en")\ |
|
.setInputCols(["document"])\ |
|
.setOutputCol("sentence_embeddings") |
|
|
|
|
|
sentimentdl = SentimentDLModel.pretrained(model, "en")\ |
|
.setInputCols(["sentence_embeddings"])\ |
|
.setOutputCol("sentiment") |
|
|
|
nlpPipeline = Pipeline(stages=[documentAssembler, use, sentimentdl]) |
|
|
|
return nlpPipeline |
|
|
|
def fit_data(pipeline, data): |
|
empty_df = spark.createDataFrame([['']]).toDF('text') |
|
pipeline_model = pipeline.fit(empty_df) |
|
model = LightPipeline(pipeline_model) |
|
results = model.fullAnnotate(data)[0] |
|
|
|
return results['sentiment'][0].result |
|
|
|
|
|
st.markdown('<div class="main-title">State-of-the-Art Sentiment Detection with Spark NLP</div>', unsafe_allow_html=True) |
|
|
|
|
|
model = st.sidebar.selectbox( |
|
"Choose the pretrained model", |
|
["sentimentdl_use_imdb", "sentimentdl_use_twitter"], |
|
help="For more info about the models visit: https://sparknlp.org/models" |
|
) |
|
|
|
|
|
link = """ |
|
<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN.ipynb"> |
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/> |
|
</a> |
|
""" |
|
st.sidebar.markdown('Reference notebook:') |
|
st.sidebar.markdown(link, unsafe_allow_html=True) |
|
|
|
|
|
folder_path = f"inputs/{model}" |
|
examples = [ |
|
lines[1].strip() |
|
for filename in os.listdir(folder_path) |
|
if filename.endswith('.txt') |
|
for lines in [open(os.path.join(folder_path, filename), 'r', encoding='utf-8').readlines()] |
|
if len(lines) >= 2 |
|
] |
|
|
|
selected_text = None |
|
result_type = 'tweet' |
|
if 'imdb' in model.lower() or 't5' in model.lower(): |
|
selected_text = st.selectbox("Select a sample IMDB review", examples) |
|
result_type = 'review' |
|
else: |
|
selected_text = st.selectbox("Select a sample Tweet", examples) |
|
|
|
custom_input = st.text_input("Try it for yourself!") |
|
|
|
if custom_input: |
|
selected_text = custom_input |
|
elif selected_text: |
|
selected_text = selected_text |
|
|
|
st.write('Selected Text') |
|
st.write(selected_text) |
|
|
|
|
|
spark = init_spark() |
|
pipeline = create_pipeline(model) |
|
output = fit_data(pipeline, selected_text) |
|
|
|
|
|
if output in ['pos', 'positive', 'POSITIVE']: |
|
st.markdown("""<h3>This seems like a <span style="color: green">{}</span> {}. <span style="font-size:35px;">😃</span></h3>""".format('positive', result_type), unsafe_allow_html=True) |
|
elif output in ['neg', 'negative', 'NEGATIVE']: |
|
st.markdown("""<h3>This seems like a <span style="color: red">{}</span> {}. <span style="font-size:35px;">😠</span?</h3>""".format('negative', result_type), unsafe_allow_html=True) |
|
|
|
|
|
|