Spaces:

spark-nlp
/

SparkNLP_NER

Build error

App Files Files Community

aemin commited on Dec 1, 2021

Commit

f82b177

1 Parent(s): 450afd2

Upload app.py

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import streamlit as st
+st.set_page_config(
+    layout="centered",  # Can be "centered" or "wide". In the future also "dashboard", etc.
+    initial_sidebar_state="auto",  # Can be "auto", "expanded", "collapsed"
+    page_title='Extractive Summarization',  # String or None. Strings get appended with "• Streamlit".
+    page_icon='./favicon.png',  # String, anything supported by st.image, or None.
+)
+import pandas as pd
+import numpy as np
+import os
+import sys
+sys.path.append(os.path.abspath('./'))
+import streamlit_apps_config as config
+from streamlit_ner_output import show_html2, jsl_display_annotations, get_color
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.sql import functions as F
+from sparknlp_display import NerVisualizer
+from pyspark.ml import Pipeline
+from pyspark.sql.types import StringType
+spark= sparknlp.start()
+## Marking down NER Style
+st.markdown(config.STYLE_CONFIG, unsafe_allow_html=True)
+root_path = config.project_path
+########## To Remove the Main Menu Hamburger ########
+hide_menu_style = """
+        <style>
+        #MainMenu {visibility: hidden;}
+        </style>
+        """
+st.markdown(hide_menu_style, unsafe_allow_html=True)
+########## Side Bar ########
+## loading logo(newer version with href)
+import base64
+@st.cache(allow_output_mutation=True)
+def get_base64_of_bin_file(bin_file):
+    with open(bin_file, 'rb') as f:
+        data = f.read()
+    return base64.b64encode(data).decode()
+@st.cache(allow_output_mutation=True)
+def get_img_with_href(local_img_path, target_url):
+    img_format = os.path.splitext(local_img_path)[-1].replace('.', '')
+    bin_str = get_base64_of_bin_file(local_img_path)
+    html_code = f'''
+        <a href="{target_url}">
+            <img height="90%" width="90%" src="data:image/{img_format};base64,{bin_str}" />
+        </a>'''
+    return html_code
+logo_html = get_img_with_href('./jsl-logo.png', 'https://www.johnsnowlabs.com/')
+st.sidebar.markdown(logo_html, unsafe_allow_html=True)
+#sidebar info
+model_name= ["nerdl_fewnerd_100d"]
+st.sidebar.title("Pretrained model to test")
+selected_model = st.sidebar.selectbox("", model_name)
+######## Main Page #########
+app_title= "Detect up to 8 entity types in general domain texts"
+app_description= "Named Entity Recognition model aimed to detect up to 8 entity types from general domain texts. This model was trained on the Few-NERD/inter public dataset using Spark NLP, and is available in Spark NLP Models hub (https://nlp.johnsnowlabs.com/models)"
+st.title(app_title)
+st.markdown("<h2>"+app_description+"</h2>" , unsafe_allow_html=True)
+if selected_model == "nerdl_fewnerd_100d":
+    st.markdown("**`PERSON`**   **,** **`ORGANIZATION`**    **,**  **`LOCATION`** **,**  **`ART`** **,**  **`BUILDING`** **,**  **`PRODUCT`** **,**  **`EVENT`** **,**  **`OTHER`**", unsafe_allow_html=True)
+st.subheader("")
+#### Running model and creating pipeline
+st.cache(allow_output_mutation=True)
+def get_pipeline(text):
+    documentAssembler = DocumentAssembler()\
+        .setInputCol("text")\
+        .setOutputCol("document")
+    sentenceDetector= SentenceDetector()\
+        .setInputCols(["document"])\
+        .setOutputCol("sentence")
+    tokenizer = Tokenizer()\
+        .setInputCols(["sentence"])\
+        .setOutputCol("token")
+    embeddings= WordEmbeddingsModel.pretrained("glove_100d")\
+        .setInputCols(["sentence", "token"])\
+        .setOutputCol("embeddings")
+    ner= NerDLModel.pretrained("nerdl_fewnerd_100d")\
+        .setInputCols(["document", "token", "embeddings"])\
+        .setOutputCol("ner")
+    ner_converter= NerConverter()\
+        .setInputCols(["sentence", "token", "ner"])\
+        .setOutputCol("ner_chunk")
+    pipeline = Pipeline(
+        stages = [
+        documentAssembler,
+        sentenceDetector,
+        tokenizer,
+        embeddings,
+        ner,
+        ner_converter
+        ])
+    empty_df = spark.createDataFrame([[""]]).toDF("text")
+    pipeline_model = pipeline.fit(empty_df)
+    text_df= spark.createDataFrame(pd.DataFrame({"text": [text]}))
+    result= pipeline_model.transform(text_df).toPandas()
+    return result
+text= st.text_input("Type here your text and press enter to run:")
+result= get_pipeline(text)
+#Displaying Ner Visualization
+df= pd.DataFrame({"ner_chunk": result["ner_chunk"].iloc[0]})
+labels_set = set()
+for i in df['ner_chunk'].values:
+    labels_set.add(i[4]['entity'])
+labels_set = list(labels_set)
+labels = st.sidebar.multiselect(
+        "NER Labels", options=labels_set, default=list(labels_set)
+    )
+show_html2(text, df, labels, "Text annotated with identified Named Entities")