Spaces:

spark-nlp
/

MarianMT

Running

App Files Files Community

MarianMT / Demo.py

abdullahmubeen10

Upload 75 files

8d64fe0 verified 7 months ago

raw

history blame contribute delete

5.64 kB

	import streamlit as st
	import sparknlp
	import os

	from sparknlp.base import *
	from sparknlp.common import *
	from sparknlp.annotator import *
	from pyspark.ml import Pipeline
	from sparknlp.pretrained import PretrainedPipeline

	# Page Configuration
	st.set_page_config(
	layout="wide",
	initial_sidebar_state="auto"
	)

	# Custom CSS for Styling
	st.markdown("""
	<style>
	.main-title {
	font-size: 36px;
	color: #4A90E2;
	font-weight: bold;
	text-align: center;
	}
	.section {
	background-color: #f9f9f9;
	padding: 10px;
	border-radius: 10px;
	margin-top: 10px;
	}
	.section p, .section ul {
	color: #666666;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize Spark NLP
	@st.cache_resource
	def init_spark():
	return sparknlp.start()

	# Create a Spark NLP Pipeline for MarianTransformer
	@st.cache_resource
	def create_pipeline(model_name):
	document_assembler = DocumentAssembler()\
	.setInputCol("text")\
	.setOutputCol("document")

	sentence_detector = SentenceDetectorDLModel()\
	.pretrained("sentence_detector_dl", "xx")\
	.setInputCols(["document"])\
	.setOutputCol("sentences")

	marian_translator = MarianTransformer.pretrained(model_name, "xx")\
	.setInputCols(["sentences"])\
	.setOutputCol("translation")

	return Pipeline(stages=[document_assembler, sentence_detector, marian_translator])

	# Process the Input Text Through the Pipeline
	def fit_data(pipeline, text):
	data = spark.createDataFrame([[text]]).toDF("text")
	result = pipeline.fit(data).transform(data)
	return result.select('translation.result').collect()

	# Title and Subtitle
	title = 'Multilingual Text Translation with Spark NLP and MarianMT'
	sub_title = """
	The MarianTransformer is a powerful, state-of-the-art machine translation model based on the Transformer architecture. Developed by the MarianMT project, this annotator supports over 1,000 translation directions, making it one of the most versatile tools for multilingual natural language processing. Integrated within Spark NLP, the MarianTransformer Annotator allows for scalable and efficient text translation, leveraging the parallel processing capabilities of Apache Spark. Whether you're translating large documents or handling multiple languages simultaneously, this tool ensures high-quality translations with minimal latency.
	"""

	st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
	st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)

	# Mapping Models to Descriptions
	model_mappings = {
	"opus_mt_en_fr": "Translate text from English to French",
	"opus_mt_en_it": "Translate text from English to Italian",
	"opus_mt_en_es": "Translate text from English to Spanish",
	"opus_mt_en_de": "Translate text from English to German",
	"opus_mt_en_cpp": "Translate text from English to Portuguese",
	"opus_mt_fr_en": "Translate text from French to English",
	"opus_mt_it_en": "Translate text from Italian to English",
	"opus_mt_es_en": "Translate text from Spanish to English",
	"opus_mt_de_en": "Translate text from German to English",
	"opus_mt_cpp_en": "Translate text from Portuguese to English"
	}

	# Sidebar for Language Selection
	st.sidebar.title("Language Selection")

	language_mapping = {
	"English": 'en',
	"French": 'fr',
	"Italian": 'it',
	"Spanish": 'es',
	"German": 'de',
	"Portuguese": 'cpp'
	}

	from_language = st.sidebar.selectbox("Translate From", list(language_mapping.keys()))

	if from_language == 'English':
	to_language = st.sidebar.selectbox("Translate To", ['French', 'Italian', 'Spanish', 'German', 'Portuguese'])
	else:
	to_language = st.sidebar.selectbox("Translate To", ['English'])

	selected_model = f'opus_mt_{language_mapping[from_language]}_{language_mapping[to_language]}'
	st.subheader(model_mappings[selected_model])

	# Reference Notebook Link in Sidebar
	link= """<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/TRANSLATION_MARIAN.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/></a>"""
	st.sidebar.title('')
	st.sidebar.markdown('Reference notebook:')
	st.sidebar.markdown(link, unsafe_allow_html=True)

	# Load Sample Text Files
	folder_path = f"inputs/{selected_model}"
	examples = [
	lines[1].strip()
	for filename in os.listdir(folder_path)
	if filename.endswith('.txt')
	for lines in [open(os.path.join(folder_path, filename), 'r', encoding='utf-8').readlines()]
	if len(lines) >= 2
	]

	selected_text = st.selectbox("Select a Sample Text", examples)
	custom_input = st.text_input("Try it for yourself!")

	if custom_input:
	selected_text = custom_input

	# Display the Selected or Entered Text
	st.subheader('Selected Text')
	st.write(selected_text)

	# Perform Translation and Display the Result
	st.subheader("Translation Result")

	spark = init_spark()
	pipeline = create_pipeline(selected_model)
	output = fit_data(pipeline, selected_text)

	res = "".join(output[0][0])
	HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
	st.markdown(HTML_WRAPPER.format(res), unsafe_allow_html=True)