Spaces:

spark-nlp
/

sparknlp-grammar-analysis-and-dependency-parsing

Sleeping

App Files Files Community

sparknlp-grammar-analysis-and-dependency-parsing / Demo.py

abdullahmubeen10

Upload 6 files

6520bbf verified 5 months ago

raw

history blame contribute delete

8.65 kB

	import streamlit as st
	import sparknlp
	from johnsnowlabs import nlp
	from sparknlp.base import *
	from sparknlp.annotator import *
	from pyspark.ml import Pipeline
	import pyspark.sql.functions as F
	import pandas as pd

	# Page Configuration
	st.set_page_config(
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# CSS Styling
	st.markdown("""
	<style>
	.main-title {
	font-size: 36px;
	color: #4A90E2;
	font-weight: bold;
	text-align: center;
	}
	.section {
	background-color: #f9f9f9;
	padding: 10px;
	border-radius: 10px;
	margin-top: 10px;
	}
	.section p, .section ul {
	color: #666666;
	}
	.table {
	width: 100%;
	border-collapse: collapse;
	margin-top: 20px;
	}
	.table th, .table td {
	border: 1px solid #ddd;
	padding: 8px;
	text-align: left;
	}
	.table th {
	background-color: #4A90E2;
	color: white;
	}
	.table td {
	background-color: #f2f2f2;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize Spark
	@st.cache_resource
	def init_spark():
	return sparknlp.start()

	# Create NLP Pipeline
	@st.cache_resource
	def create_pipeline():
	document_assembler = DocumentAssembler() \
	.setInputCol("text") \
	.setOutputCol("document")

	tokenizer = Tokenizer() \
	.setInputCols(["document"]) \
	.setOutputCol("token")

	pos_tagger = PerceptronModel.pretrained("pos_anc", 'en') \
	.setInputCols("document", "token") \
	.setOutputCol("pos")

	dep_parser = DependencyParserModel.pretrained('dependency_conllu') \
	.setInputCols(["document", "pos", "token"]) \
	.setOutputCol("dependency")

	typed_dep_parser = TypedDependencyParserModel.pretrained('dependency_typed_conllu') \
	.setInputCols(["token", "pos", "dependency"]) \
	.setOutputCol("dependency_type")

	pipeline = Pipeline(stages=[
	document_assembler,
	tokenizer,
	pos_tagger,
	dep_parser,
	typed_dep_parser
	])
	return pipeline

	# Fit Data to Pipeline
	def fit_data(pipeline, text):
	df = spark.createDataFrame([[text]]).toDF("text")
	result = pipeline.fit(df).transform(df)
	return result

	# Render DataFrame as HTML Table
	def render_table(df, sidebar=False):
	html = df.to_html(classes="table", index=False, escape=False)
	if sidebar:
	st.sidebar.markdown(html, unsafe_allow_html=True)
	else:
	st.markdown(html, unsafe_allow_html=True)

	def explain_tags(tag_type, tags, tag_dict):
	explanations = [(tag, tag_dict[tag]) for tag in tags if tag in tag_dict]
	if explanations:
	df = pd.DataFrame(explanations, columns=[f"{tag_type} Tag", f"{tag_type} Meaning"])
	df.index = [''] * len(df) # Hide the index
	render_table(df, sidebar=True)

	# Page Title and Subtitle
	title = "Grammar Analysis & Dependency Parsing"
	sub_title = "Visualize the syntactic structure of a sentence as a directed labeled graph."

	st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
	st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)

	# Example Sentences
	examples = [
	"John Snow is a good man. He knows a lot about science.",
	"In what country is the WTO headquartered?",
	"I was wearing my dark blue shirt and tie.",
	"The Geneva Motor Show is the most popular car show of the year.",
	"Bill Gates and Steve Jobs had periods of civility."
	]

	# Text Selection
	selected_text = st.selectbox("Select an example", examples)
	custom_input = st.text_input("Try it with your own sentence!")

	text_to_analyze = custom_input if custom_input else selected_text

	st.write('Text to analyze:')
	HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto;
	border: 1px solid #e6e9ef; border-radius: 0.25rem;
	padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
	st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)

	# Initialize Spark and Pipeline
	spark = init_spark()
	pipeline = create_pipeline()
	output = fit_data(pipeline, text_to_analyze)

	# Display Dependency Tree
	st.write("Dependency Tree:")
	nlp.load('dep.typed').viz_streamlit_dep_tree(
	text=text_to_analyze,
	title='',
	sub_title='',
	set_wide_layout_CSS=False,
	generate_code_sample=False,
	key="NLU_streamlit",
	show_infos=False,
	show_logo=False,
	show_text_input=False,
	)

	# Display Raw Result
	st.write("Raw Result:")
	df = output.select(F.explode(F.arrays_zip(
	output.token.result,
	output.token.begin,
	output.token.end,
	output.pos.result,
	output.dependency.result,
	output.dependency_type.result
	)).alias("cols")) \
	.select(F.expr("cols['0']").alias("chunk"),
	F.expr("cols['1']").alias("begin"),
	F.expr("cols['2']").alias("end"),
	F.expr("cols['3']").alias("pos"),
	F.expr("cols['4']").alias("dependency"),
	F.expr("cols['5']").alias("dependency_type")).toPandas()

	render_table(df)

	# Sidebar Content
	# POS and Dependency dictionaries
	pos_dict = {
	"CC": "Coordinating conjunction", "CD": "Cardinal number", "DT": "Determiner",
	"EX": "Existential there", "FW": "Foreign word", "IN": "Preposition or subordinating conjunction",
	"JJ": "Adjective", "JJR": "Adjective, comparative", "JJS": "Adjective, superlative",
	"LS": "List item marker", "MD": "Modal", "NN": "Noun, singular or mass",
	"NNS": "Noun, plural", "NNP": "Proper noun, singular", "NNPS": "Proper noun, plural",
	"PDT": "Predeterminer", "POS": "Possessive ending", "PRP": "Personal pronoun",
	"PRP$": "Possessive pronoun", "RB": "Adverb", "RBR": "Adverb, comparative",
	"RBS": "Adverb, superlative", "RP": "Particle", "SYM": "Symbol", "TO": "to",
	"UH": "Interjection", "VB": "Verb, base form", "VBD": "Verb, past tense",
	"VBG": "Verb, gerund or present participle", "VBN": "Verb, past participle",
	"VBP": "Verb, non-3rd person singular present", "VBZ": "Verb, 3rd person singular present",
	"WDT": "Wh-determiner", "WP": "Wh-pronoun", "WP$": "Possessive wh-pronoun",
	"WRB": "Wh-adverb"
	}

	dependency_dict = {
	"acl": "clausal modifier of noun (adjectival clause)",
	"advcl": "adverbial clause modifier",
	"advmod": "adverbial modifier",
	"amod": "adjectival modifier",
	"appos": "appositional modifier",
	"aux": "auxiliary",
	"case": "case marking",
	"cc": "coordinating conjunction",
	"ccomp": "clausal complement",
	"clf": "classifier",
	"compound": "compound",
	"conj": "conjunct",
	"cop": "copula",
	"csubj": "clausal subject",
	"dep": "unspecified dependency",
	"det": "determiner",
	"discourse": "discourse element",
	"dislocated": "dislocated elements",
	"expl": "expletive",
	"fixed": "fixed multiword expression",
	"flat": "flat multiword expression",
	"goeswith": "goes with",
	"iobj": "indirect object",
	"list": "list",
	"mark": "marker",
	"nmod": "nominal modifier",
	"nsubj": "nominal subject",
	"nummod": "numeric modifier",
	"obj": "object",
	"obl": "oblique nominal",
	"orphan": "orphan",
	"parataxis": "parataxis",
	"punct": "punctuation",
	"reparandum": "overridden disfluency",
	"root": "root",
	"vocative": "vocative",
	"xcomp": "open clausal complement"
	}

	# Get unique POS and dependency tags
	unique_pos = df['pos'].unique()
	unique_dep = df['dependency_type'].unique()

	# Sidebar options for explanations
	if st.sidebar.checkbox("Explain POS Tags"):
	explain_tags("POS", unique_pos, pos_dict)

	if st.sidebar.checkbox("Explain Dependency Types"):
	explain_tags("Dependency", unique_dep, dependency_dict)

	# Sidebar with Reference Notebook Link
	colab_link = """
	<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/GRAMMAR_EN.ipynb">
	<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
	</a>
	"""
	st.sidebar.markdown('Reference Notebook:')
	st.sidebar.markdown(colab_link, unsafe_allow_html=True)