diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..1351925fe878f35a9e31ac01757b8a4853757090 --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,3 @@ +[theme] +base="light" +primaryColor="#29B4E8" diff --git a/Demo.py b/Demo.py new file mode 100644 index 0000000000000000000000000000000000000000..6b83fe9df089f90cf73173c7acffe15833a9c3c0 --- /dev/null +++ b/Demo.py @@ -0,0 +1,149 @@ +import streamlit as st +import sparknlp +import os + +from sparknlp.base import * +from sparknlp.common import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline +from sparknlp.pretrained import PretrainedPipeline + +# Page Configuration +st.set_page_config( + layout="wide", + initial_sidebar_state="auto" +) + +# Custom CSS for Styling +st.markdown(""" + +""", unsafe_allow_html=True) + +# Initialize Spark NLP +@st.cache_resource +def init_spark(): + return sparknlp.start() + +# Create a Spark NLP Pipeline for MarianTransformer +@st.cache_resource +def create_pipeline(model_name): + document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + + sentence_detector = SentenceDetectorDLModel()\ + .pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentences") + + marian_translator = MarianTransformer.pretrained(model_name, "xx")\ + .setInputCols(["sentences"])\ + .setOutputCol("translation") + + return Pipeline(stages=[document_assembler, sentence_detector, marian_translator]) + +# Process the Input Text Through the Pipeline +def fit_data(pipeline, text): + data = spark.createDataFrame([[text]]).toDF("text") + result = pipeline.fit(data).transform(data) + return result.select('translation.result').collect() + +# Title and Subtitle +title = 'Multilingual Text Translation with Spark NLP and MarianMT' +sub_title = """ +The MarianTransformer is a powerful, state-of-the-art machine translation model based on the Transformer architecture. Developed by the MarianMT project, this annotator supports over 1,000 translation directions, making it one of the most versatile tools for multilingual natural language processing. Integrated within Spark NLP, the MarianTransformer Annotator allows for scalable and efficient text translation, leveraging the parallel processing capabilities of Apache Spark. Whether you're translating large documents or handling multiple languages simultaneously, this tool ensures high-quality translations with minimal latency. +""" + +st.markdown(f'
{title}
', unsafe_allow_html=True) +st.markdown(f'

{sub_title}

', unsafe_allow_html=True) + +# Mapping Models to Descriptions +model_mappings = { + "opus_mt_en_fr": "Translate text from English to French", + "opus_mt_en_it": "Translate text from English to Italian", + "opus_mt_en_es": "Translate text from English to Spanish", + "opus_mt_en_de": "Translate text from English to German", + "opus_mt_en_cpp": "Translate text from English to Portuguese", + "opus_mt_fr_en": "Translate text from French to English", + "opus_mt_it_en": "Translate text from Italian to English", + "opus_mt_es_en": "Translate text from Spanish to English", + "opus_mt_de_en": "Translate text from German to English", + "opus_mt_cpp_en": "Translate text from Portuguese to English" +} + +# Sidebar for Language Selection +st.sidebar.title("Language Selection") + +language_mapping = { + "English": 'en', + "French": 'fr', + "Italian": 'it', + "Spanish": 'es', + "German": 'de', + "Portuguese": 'cpp' +} + +from_language = st.sidebar.selectbox("Translate From", list(language_mapping.keys())) + +if from_language == 'English': + to_language = st.sidebar.selectbox("Translate To", ['French', 'Italian', 'Spanish', 'German', 'Portuguese']) +else: + to_language = st.sidebar.selectbox("Translate To", ['English']) + +selected_model = f'opus_mt_{language_mapping[from_language]}_{language_mapping[to_language]}' +st.subheader(model_mappings[selected_model]) + +# Reference Notebook Link in Sidebar +link= """Open In Colab""" +st.sidebar.title('') +st.sidebar.markdown('Reference notebook:') +st.sidebar.markdown(link, unsafe_allow_html=True) + +# Load Sample Text Files +folder_path = f"inputs/{selected_model}" +examples = [ + lines[1].strip() + for filename in os.listdir(folder_path) + if filename.endswith('.txt') + for lines in [open(os.path.join(folder_path, filename), 'r', encoding='utf-8').readlines()] + if len(lines) >= 2 +] + +selected_text = st.selectbox("Select a Sample Text", examples) +custom_input = st.text_input("Try it for yourself!") + +if custom_input: + selected_text = custom_input + +# Display the Selected or Entered Text +st.subheader('Selected Text') +st.write(selected_text) + +# Perform Translation and Display the Result +st.subheader("Translation Result") + +spark = init_spark() +pipeline = create_pipeline(selected_model) +output = fit_data(pipeline, selected_text) + +res = "".join(output[0][0]) +HTML_WRAPPER = """
{}
""" +st.markdown(HTML_WRAPPER.format(res), unsafe_allow_html=True) + + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..7f58cb8a7daf756f87a7dc905c06ec50e8a410d4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,72 @@ +# Download base image ubuntu 18.04 +FROM ubuntu:18.04 + +# Set environment variables +ENV NB_USER jovyan +ENV NB_UID 1000 +ENV HOME /home/${NB_USER} +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/ + +# Install required packages +RUN apt-get update && apt-get install -y \ + tar \ + wget \ + bash \ + rsync \ + gcc \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libpng-dev \ + libzmq3-dev \ + python3 \ + python3-dev \ + python3-pip \ + unzip \ + pkg-config \ + software-properties-common \ + graphviz \ + openjdk-8-jdk \ + ant \ + ca-certificates-java \ + && apt-get clean \ + && update-ca-certificates -f + +# Install Python 3.8 and pip +RUN add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y python3.8 python3-pip \ + && apt-get clean + +# Set up JAVA_HOME +RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \ + && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile +# Create a new user named "jovyan" with user ID 1000 +RUN useradd -m -u ${NB_UID} ${NB_USER} + +# Switch to the "jovyan" user +USER ${NB_USER} + +# Set home and path variables for the user +ENV HOME=/home/${NB_USER} \ + PATH=/home/${NB_USER}/.local/bin:$PATH + +# Set up PySpark to use Python 3.8 for both driver and workers +ENV PYSPARK_PYTHON=/usr/bin/python3.8 +ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8 + +# Set the working directory to the user's home directory +WORKDIR ${HOME} + +# Upgrade pip and install Python dependencies +RUN python3.8 -m pip install --upgrade pip +COPY requirements.txt /tmp/requirements.txt +RUN python3.8 -m pip install -r /tmp/requirements.txt + +# Copy the application code into the container at /home/jovyan +COPY --chown=${NB_USER}:${NB_USER} . ${HOME} + +# Expose port for Streamlit +EXPOSE 7860 + +# Define the entry point for the container +ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"] \ No newline at end of file diff --git a/inputs/opus_mt_cpp_en/Example1.txt b/inputs/opus_mt_cpp_en/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..79118a5ae2b1e965d4bc8bd6cacf82663d268264 --- /dev/null +++ b/inputs/opus_mt_cpp_en/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Além de ser o rei do norte, John Snow é um médico inglês e líder no desenvolvimento de anestesia e higiene médica. Ele é considerado o primeiro a usar dados para curar surto de cólera em 1854. \ No newline at end of file diff --git a/inputs/opus_mt_cpp_en/Example2.txt b/inputs/opus_mt_cpp_en/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..40a0c706acbeb0822942d9004780054aa57610fe --- /dev/null +++ b/inputs/opus_mt_cpp_en/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic é um romance épico americano de 1997, um filme de desastre dirigido, escrito, co-produzido e co-editado por James Cameron. Incorporando aspectos históricos e ficcionalizados, é baseado em relatos do naufrágio do RMS Titanic. É estrelado por Leonardo DiCaprio e Kate Winslet como membros de diferentes classes sociais que se apaixonam a bordo do navio durante sua viagem inaugural malfadada. \ No newline at end of file diff --git a/inputs/opus_mt_cpp_en/Example3.txt b/inputs/opus_mt_cpp_en/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbe3cf5696d456f1a8f1bffe4c805c67d2f5843c --- /dev/null +++ b/inputs/opus_mt_cpp_en/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (nascido em 28 de outubro de 1955) é um magnata dos negócios, desenvolvedor de software, investidor e filantropo americano. Ele é mais conhecido como o co-fundador da Microsoft Corporation. Durante sua carreira na Microsoft, Gates ocupou os cargos de presidente do conselho, diretor executivo (CEO), presidente e arquiteto-chefe de software. Ele também foi o maior acionista individual até maio de 2014. É um dos mais conhecidos empreendedores e pioneiros da revolução do microcomputador das décadas de 1970 e 1980. \ No newline at end of file diff --git a/inputs/opus_mt_cpp_en/Example4.txt b/inputs/opus_mt_cpp_en/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..a0203a9453a094e438ea1669faf25ede1ba9627e --- /dev/null +++ b/inputs/opus_mt_cpp_en/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +A Mona Lisa é uma pintura a óleo do século 16 criada por Leonardo. É realizado no Louvre, em Paris. \ No newline at end of file diff --git a/inputs/opus_mt_cpp_en/Example5.txt b/inputs/opus_mt_cpp_en/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7279b088170e679c94646325169699d4231d141 --- /dev/null +++ b/inputs/opus_mt_cpp_en/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +O Facebook é um serviço de rede social lançado como TheFacebook em 4 de fevereiro de 2004. Foi fundado por Mark Zuckerberg com seus colegas de faculdade e colegas da Universidade de Harvard Eduardo Saverin, Andrew McCollum, Dustin Moskovitz e Chris Hughes. A associação do site foi inicialmente limitada pelos fundadores aos alunos de Harvard, mas foi expandida para outras faculdades na área de Boston, a Ivy League e, gradualmente, a maioria das universidades nos Estados Unidos e Canadá. \ No newline at end of file diff --git a/inputs/opus_mt_cpp_en/Example6.txt b/inputs/opus_mt_cpp_en/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..b74feb86dc200d34f44986bdf028aa2a02c38d24 --- /dev/null +++ b/inputs/opus_mt_cpp_en/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton é um psicólogo cognitivo e cientista da computação canadense inglês, mais conhecido por seu trabalho em redes neurais artificiais. Desde 2013, ele divide seu tempo trabalhando para o Google e a Universidade de Toronto. Em 2017, ele foi cofundador e se tornou o assessor científico-chefe do Vector Institute em Toronto. \ No newline at end of file diff --git a/inputs/opus_mt_cpp_en/Example7.txt b/inputs/opus_mt_cpp_en/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..952ec6075829fdde342bf0a6a03976ac991813f0 --- /dev/null +++ b/inputs/opus_mt_cpp_en/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +Quando eu disse a John que queria me mudar para o Alasca, ele me avisou que teria problemas para encontrar um Starbucks lá. \ No newline at end of file diff --git a/inputs/opus_mt_de_en/Example1.txt b/inputs/opus_mt_de_en/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b7ce781298e760a1023a7d36986b567f0a7438a --- /dev/null +++ b/inputs/opus_mt_de_en/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +John Snow ist ein englischer Arzt und ein Führer in der Entwicklung der Anästhesie des Nordens.. Er ist der erste, der Daten verwendet, um Choleraausbruch 1854 zu heilen. diff --git a/inputs/opus_mt_de_en/Example2.txt b/inputs/opus_mt_de_en/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..5d7a140b658696866a51ee476c3554238e12d51a --- /dev/null +++ b/inputs/opus_mt_de_en/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic ist ein US-amerikanischer Film von James Cameron.. Sie umfasst sowohl historische als auch fiktionalisierte Aspekte, basiert auf Konten der Versenken der RMS Titanic.. Es spielt Leonardo DiCaprio und Kate Winslet als Mitglieder der verschiedenen sozialen Klassen. Sie verliebt sich in das Schiff während seiner unglückselige Mädchen. diff --git a/inputs/opus_mt_de_en/Example3.txt b/inputs/opus_mt_de_en/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..43b01e99dfb057a1107a958276cf2fb545d97753 --- /dev/null +++ b/inputs/opus_mt_de_en/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +Oktober 1955 in New York City) ist ein US-amerikanischer Software-Entwickler, Investor und Philanthrop.. Während seiner Karriere bei Microsoft, Gates war die Positionen von Vorsitzender, Chief Executive Officer (CEO), Präsident und Chef Software Architekt.. Bis Mai 2014 war er der größte Einzelaktionär.. Er ist einer der bekanntesten Unternehmer und Pioniere der Mikrocomputerrevolution der 1970er und 1980er Jahre. diff --git a/inputs/opus_mt_de_en/Example4.txt b/inputs/opus_mt_de_en/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..8d1400498f5426a8f8cf8638b13a4e1a3a39f1ac --- /dev/null +++ b/inputs/opus_mt_de_en/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +Die Mona Lisa ist ein Ölgemälde aus dem 16.. Es wird im Louvre in Paris gehalten. diff --git a/inputs/opus_mt_de_en/Example5.txt b/inputs/opus_mt_de_en/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe8697100618f75a1df784237add883d3b258a97 --- /dev/null +++ b/inputs/opus_mt_de_en/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook ist ein Social Network Service am 4. Februar 2004.. Es wurde von Mark Zuckerberg mit seinen College-Mitbewohnern und anderen Harvard-Universitätsstudenten Eduardo Saverin, Andrew McCollum, Dustin Moskovitz gegründet.. Die Mitgliedschaft der Website wurde zunächst von Harvard Studenten, aber auch auf andere Hochschulen in der Boston, die Ivy League und allmählich die meisten Universitäten in den Vereinigten Staaten diff --git a/inputs/opus_mt_de_en/Example6.txt b/inputs/opus_mt_de_en/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..ff9ece7e4d54c93b00f5999ee87c2e8bee295ff2 --- /dev/null +++ b/inputs/opus_mt_de_en/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton ist ein kanadischer Psychologe und Computer.. Seit 2013 arbeitet er für Google und die University of Toronto.. 2017 wurde er wissenschaftlicher Berater des Vector Institute in Toronto. diff --git a/inputs/opus_mt_de_en/Example7.txt b/inputs/opus_mt_de_en/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..72e72dabf21e56f9ecf33e095631cd9edd0631bc --- /dev/null +++ b/inputs/opus_mt_de_en/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +Als ich John sagte, dass ich nach Alaska ziehen wollte, dass ich Probleme hatte, dass ich ein Starbucks dort haben würde, er mich, dass ich hatte, dass ich hatte, dass ich diff --git a/inputs/opus_mt_en_cpp/Example1.txt b/inputs/opus_mt_en_cpp/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c757ff5fd516a360fecb7a8245b80bfba9bc47c --- /dev/null +++ b/inputs/opus_mt_en_cpp/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Other than being the king of the north, John Snow is a an english physician and a leader in the development of anaesthesia and medical hygiene. He is considered for being the first one using data to cure cholera outbreak in 1854. diff --git a/inputs/opus_mt_en_cpp/Example2.txt b/inputs/opus_mt_en_cpp/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..c8021d29f86182ef384388804a597c0e9e835b88 --- /dev/null +++ b/inputs/opus_mt_en_cpp/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic is a 1997 American epic romance and disaster film directed, written, co-produced, and co-edited by James Cameron. Incorporating both historical and fictionalized aspects, it is based on accounts of the sinking of the RMS Titanic. It stars Leonardo DiCaprio and Kate Winslet as members of different social classes who fall in love aboard the ship during its ill-fated maiden voyage. diff --git a/inputs/opus_mt_en_cpp/Example3.txt b/inputs/opus_mt_en_cpp/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..60b65fd29f16e3012087938d84401d620401819c --- /dev/null +++ b/inputs/opus_mt_en_cpp/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist. He is best known as the co-founder of Microsoft Corporation. During his career at Microsoft, Gates held the positions of chairman, chief executive officer (CEO), president and chief software architect. He was also being the largest individual shareholder until May 2014. He is one of the best-known entrepreneurs and pioneers of the microcomputer revolution of the 1970s and 1980s. diff --git a/inputs/opus_mt_en_cpp/Example4.txt b/inputs/opus_mt_en_cpp/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..fed060c5ad7b447dbc0c020af38973d598c2db28 --- /dev/null +++ b/inputs/opus_mt_en_cpp/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +The Mona Lisa is a 16th century oil painting created by Leonardo. It's held at the Louvre in Paris. diff --git a/inputs/opus_mt_en_cpp/Example5.txt b/inputs/opus_mt_en_cpp/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c7cae0ae4091c708c1044ebbb1e198f02afa5b9 --- /dev/null +++ b/inputs/opus_mt_en_cpp/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook is a social networking service launched as TheFacebook on February 4, 2004. It was founded by Mark Zuckerberg with his college roommates and fellow Harvard University students Eduardo Saverin, Andrew McCollum, Dustin Moskovitz and Chris Hughes. The website's membership was initially limited by the founders to Harvard students, but was expanded to other colleges in the Boston area, the Ivy League, and gradually most universities in the United States and Canada. diff --git a/inputs/opus_mt_en_cpp/Example6.txt b/inputs/opus_mt_en_cpp/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ae368a23e00f0925178d5f9cb734435e62defe8 --- /dev/null +++ b/inputs/opus_mt_en_cpp/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton is an English Canadian cognitive psychologist and computer scientist, most noted for his work on artificial neural networks. Since 2013 he divides his time working for Google and the University of Toronto. In 2017, he cofounded and became the Chief Scientific Advisor of the Vector Institute in Toronto. diff --git a/inputs/opus_mt_en_cpp/Example7.txt b/inputs/opus_mt_en_cpp/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb44cafaf668a8806a55c56504736a3807a1e59c --- /dev/null +++ b/inputs/opus_mt_en_cpp/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +When I told John that I wanted to move to Alaska, he warned me that I'd have trouble finding a Starbucks there. diff --git a/inputs/opus_mt_en_de/Example1.txt b/inputs/opus_mt_en_de/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c757ff5fd516a360fecb7a8245b80bfba9bc47c --- /dev/null +++ b/inputs/opus_mt_en_de/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Other than being the king of the north, John Snow is a an english physician and a leader in the development of anaesthesia and medical hygiene. He is considered for being the first one using data to cure cholera outbreak in 1854. diff --git a/inputs/opus_mt_en_de/Example2.txt b/inputs/opus_mt_en_de/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..c8021d29f86182ef384388804a597c0e9e835b88 --- /dev/null +++ b/inputs/opus_mt_en_de/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic is a 1997 American epic romance and disaster film directed, written, co-produced, and co-edited by James Cameron. Incorporating both historical and fictionalized aspects, it is based on accounts of the sinking of the RMS Titanic. It stars Leonardo DiCaprio and Kate Winslet as members of different social classes who fall in love aboard the ship during its ill-fated maiden voyage. diff --git a/inputs/opus_mt_en_de/Example3.txt b/inputs/opus_mt_en_de/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..60b65fd29f16e3012087938d84401d620401819c --- /dev/null +++ b/inputs/opus_mt_en_de/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist. He is best known as the co-founder of Microsoft Corporation. During his career at Microsoft, Gates held the positions of chairman, chief executive officer (CEO), president and chief software architect. He was also being the largest individual shareholder until May 2014. He is one of the best-known entrepreneurs and pioneers of the microcomputer revolution of the 1970s and 1980s. diff --git a/inputs/opus_mt_en_de/Example4.txt b/inputs/opus_mt_en_de/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..fed060c5ad7b447dbc0c020af38973d598c2db28 --- /dev/null +++ b/inputs/opus_mt_en_de/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +The Mona Lisa is a 16th century oil painting created by Leonardo. It's held at the Louvre in Paris. diff --git a/inputs/opus_mt_en_de/Example5.txt b/inputs/opus_mt_en_de/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c7cae0ae4091c708c1044ebbb1e198f02afa5b9 --- /dev/null +++ b/inputs/opus_mt_en_de/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook is a social networking service launched as TheFacebook on February 4, 2004. It was founded by Mark Zuckerberg with his college roommates and fellow Harvard University students Eduardo Saverin, Andrew McCollum, Dustin Moskovitz and Chris Hughes. The website's membership was initially limited by the founders to Harvard students, but was expanded to other colleges in the Boston area, the Ivy League, and gradually most universities in the United States and Canada. diff --git a/inputs/opus_mt_en_de/Example6.txt b/inputs/opus_mt_en_de/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ae368a23e00f0925178d5f9cb734435e62defe8 --- /dev/null +++ b/inputs/opus_mt_en_de/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton is an English Canadian cognitive psychologist and computer scientist, most noted for his work on artificial neural networks. Since 2013 he divides his time working for Google and the University of Toronto. In 2017, he cofounded and became the Chief Scientific Advisor of the Vector Institute in Toronto. diff --git a/inputs/opus_mt_en_de/Example7.txt b/inputs/opus_mt_en_de/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb44cafaf668a8806a55c56504736a3807a1e59c --- /dev/null +++ b/inputs/opus_mt_en_de/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +When I told John that I wanted to move to Alaska, he warned me that I'd have trouble finding a Starbucks there. diff --git a/inputs/opus_mt_en_es/Example1.txt b/inputs/opus_mt_en_es/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c757ff5fd516a360fecb7a8245b80bfba9bc47c --- /dev/null +++ b/inputs/opus_mt_en_es/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Other than being the king of the north, John Snow is a an english physician and a leader in the development of anaesthesia and medical hygiene. He is considered for being the first one using data to cure cholera outbreak in 1854. diff --git a/inputs/opus_mt_en_es/Example2.txt b/inputs/opus_mt_en_es/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..c8021d29f86182ef384388804a597c0e9e835b88 --- /dev/null +++ b/inputs/opus_mt_en_es/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic is a 1997 American epic romance and disaster film directed, written, co-produced, and co-edited by James Cameron. Incorporating both historical and fictionalized aspects, it is based on accounts of the sinking of the RMS Titanic. It stars Leonardo DiCaprio and Kate Winslet as members of different social classes who fall in love aboard the ship during its ill-fated maiden voyage. diff --git a/inputs/opus_mt_en_es/Example3.txt b/inputs/opus_mt_en_es/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..60b65fd29f16e3012087938d84401d620401819c --- /dev/null +++ b/inputs/opus_mt_en_es/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist. He is best known as the co-founder of Microsoft Corporation. During his career at Microsoft, Gates held the positions of chairman, chief executive officer (CEO), president and chief software architect. He was also being the largest individual shareholder until May 2014. He is one of the best-known entrepreneurs and pioneers of the microcomputer revolution of the 1970s and 1980s. diff --git a/inputs/opus_mt_en_es/Example4.txt b/inputs/opus_mt_en_es/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..fed060c5ad7b447dbc0c020af38973d598c2db28 --- /dev/null +++ b/inputs/opus_mt_en_es/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +The Mona Lisa is a 16th century oil painting created by Leonardo. It's held at the Louvre in Paris. diff --git a/inputs/opus_mt_en_es/Example5.txt b/inputs/opus_mt_en_es/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c7cae0ae4091c708c1044ebbb1e198f02afa5b9 --- /dev/null +++ b/inputs/opus_mt_en_es/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook is a social networking service launched as TheFacebook on February 4, 2004. It was founded by Mark Zuckerberg with his college roommates and fellow Harvard University students Eduardo Saverin, Andrew McCollum, Dustin Moskovitz and Chris Hughes. The website's membership was initially limited by the founders to Harvard students, but was expanded to other colleges in the Boston area, the Ivy League, and gradually most universities in the United States and Canada. diff --git a/inputs/opus_mt_en_es/Example6.txt b/inputs/opus_mt_en_es/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ae368a23e00f0925178d5f9cb734435e62defe8 --- /dev/null +++ b/inputs/opus_mt_en_es/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton is an English Canadian cognitive psychologist and computer scientist, most noted for his work on artificial neural networks. Since 2013 he divides his time working for Google and the University of Toronto. In 2017, he cofounded and became the Chief Scientific Advisor of the Vector Institute in Toronto. diff --git a/inputs/opus_mt_en_es/Example7.txt b/inputs/opus_mt_en_es/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb44cafaf668a8806a55c56504736a3807a1e59c --- /dev/null +++ b/inputs/opus_mt_en_es/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +When I told John that I wanted to move to Alaska, he warned me that I'd have trouble finding a Starbucks there. diff --git a/inputs/opus_mt_en_fr/Example1.txt b/inputs/opus_mt_en_fr/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c757ff5fd516a360fecb7a8245b80bfba9bc47c --- /dev/null +++ b/inputs/opus_mt_en_fr/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Other than being the king of the north, John Snow is a an english physician and a leader in the development of anaesthesia and medical hygiene. He is considered for being the first one using data to cure cholera outbreak in 1854. diff --git a/inputs/opus_mt_en_fr/Example2.txt b/inputs/opus_mt_en_fr/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..c8021d29f86182ef384388804a597c0e9e835b88 --- /dev/null +++ b/inputs/opus_mt_en_fr/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic is a 1997 American epic romance and disaster film directed, written, co-produced, and co-edited by James Cameron. Incorporating both historical and fictionalized aspects, it is based on accounts of the sinking of the RMS Titanic. It stars Leonardo DiCaprio and Kate Winslet as members of different social classes who fall in love aboard the ship during its ill-fated maiden voyage. diff --git a/inputs/opus_mt_en_fr/Example3.txt b/inputs/opus_mt_en_fr/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..60b65fd29f16e3012087938d84401d620401819c --- /dev/null +++ b/inputs/opus_mt_en_fr/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist. He is best known as the co-founder of Microsoft Corporation. During his career at Microsoft, Gates held the positions of chairman, chief executive officer (CEO), president and chief software architect. He was also being the largest individual shareholder until May 2014. He is one of the best-known entrepreneurs and pioneers of the microcomputer revolution of the 1970s and 1980s. diff --git a/inputs/opus_mt_en_fr/Example4.txt b/inputs/opus_mt_en_fr/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..fed060c5ad7b447dbc0c020af38973d598c2db28 --- /dev/null +++ b/inputs/opus_mt_en_fr/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +The Mona Lisa is a 16th century oil painting created by Leonardo. It's held at the Louvre in Paris. diff --git a/inputs/opus_mt_en_fr/Example5.txt b/inputs/opus_mt_en_fr/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c7cae0ae4091c708c1044ebbb1e198f02afa5b9 --- /dev/null +++ b/inputs/opus_mt_en_fr/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook is a social networking service launched as TheFacebook on February 4, 2004. It was founded by Mark Zuckerberg with his college roommates and fellow Harvard University students Eduardo Saverin, Andrew McCollum, Dustin Moskovitz and Chris Hughes. The website's membership was initially limited by the founders to Harvard students, but was expanded to other colleges in the Boston area, the Ivy League, and gradually most universities in the United States and Canada. diff --git a/inputs/opus_mt_en_fr/Example6.txt b/inputs/opus_mt_en_fr/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ae368a23e00f0925178d5f9cb734435e62defe8 --- /dev/null +++ b/inputs/opus_mt_en_fr/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton is an English Canadian cognitive psychologist and computer scientist, most noted for his work on artificial neural networks. Since 2013 he divides his time working for Google and the University of Toronto. In 2017, he cofounded and became the Chief Scientific Advisor of the Vector Institute in Toronto. diff --git a/inputs/opus_mt_en_fr/Example7.txt b/inputs/opus_mt_en_fr/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb44cafaf668a8806a55c56504736a3807a1e59c --- /dev/null +++ b/inputs/opus_mt_en_fr/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +When I told John that I wanted to move to Alaska, he warned me that I'd have trouble finding a Starbucks there. diff --git a/inputs/opus_mt_en_it/Example1.txt b/inputs/opus_mt_en_it/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c757ff5fd516a360fecb7a8245b80bfba9bc47c --- /dev/null +++ b/inputs/opus_mt_en_it/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Other than being the king of the north, John Snow is a an english physician and a leader in the development of anaesthesia and medical hygiene. He is considered for being the first one using data to cure cholera outbreak in 1854. diff --git a/inputs/opus_mt_en_it/Example2.txt b/inputs/opus_mt_en_it/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..c8021d29f86182ef384388804a597c0e9e835b88 --- /dev/null +++ b/inputs/opus_mt_en_it/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic is a 1997 American epic romance and disaster film directed, written, co-produced, and co-edited by James Cameron. Incorporating both historical and fictionalized aspects, it is based on accounts of the sinking of the RMS Titanic. It stars Leonardo DiCaprio and Kate Winslet as members of different social classes who fall in love aboard the ship during its ill-fated maiden voyage. diff --git a/inputs/opus_mt_en_it/Example3.txt b/inputs/opus_mt_en_it/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..60b65fd29f16e3012087938d84401d620401819c --- /dev/null +++ b/inputs/opus_mt_en_it/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist. He is best known as the co-founder of Microsoft Corporation. During his career at Microsoft, Gates held the positions of chairman, chief executive officer (CEO), president and chief software architect. He was also being the largest individual shareholder until May 2014. He is one of the best-known entrepreneurs and pioneers of the microcomputer revolution of the 1970s and 1980s. diff --git a/inputs/opus_mt_en_it/Example4.txt b/inputs/opus_mt_en_it/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..fed060c5ad7b447dbc0c020af38973d598c2db28 --- /dev/null +++ b/inputs/opus_mt_en_it/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +The Mona Lisa is a 16th century oil painting created by Leonardo. It's held at the Louvre in Paris. diff --git a/inputs/opus_mt_en_it/Example5.txt b/inputs/opus_mt_en_it/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c7cae0ae4091c708c1044ebbb1e198f02afa5b9 --- /dev/null +++ b/inputs/opus_mt_en_it/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook is a social networking service launched as TheFacebook on February 4, 2004. It was founded by Mark Zuckerberg with his college roommates and fellow Harvard University students Eduardo Saverin, Andrew McCollum, Dustin Moskovitz and Chris Hughes. The website's membership was initially limited by the founders to Harvard students, but was expanded to other colleges in the Boston area, the Ivy League, and gradually most universities in the United States and Canada. diff --git a/inputs/opus_mt_en_it/Example6.txt b/inputs/opus_mt_en_it/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ae368a23e00f0925178d5f9cb734435e62defe8 --- /dev/null +++ b/inputs/opus_mt_en_it/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton is an English Canadian cognitive psychologist and computer scientist, most noted for his work on artificial neural networks. Since 2013 he divides his time working for Google and the University of Toronto. In 2017, he cofounded and became the Chief Scientific Advisor of the Vector Institute in Toronto. diff --git a/inputs/opus_mt_en_it/Example7.txt b/inputs/opus_mt_en_it/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb44cafaf668a8806a55c56504736a3807a1e59c --- /dev/null +++ b/inputs/opus_mt_en_it/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +When I told John that I wanted to move to Alaska, he warned me that I'd have trouble finding a Starbucks there. diff --git a/inputs/opus_mt_es_en/Example1.txt b/inputs/opus_mt_es_en/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..48b14528e68c6faa30940ec46f78c05926301963 --- /dev/null +++ b/inputs/opus_mt_es_en/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Además de ser el rey del norte, John Snow es un médico inglés y líder en el desarrollo de la anestesia y la higiene médica. Se le considera el primero en utilizar datos para curar el brote de cólera en 1854. \ No newline at end of file diff --git a/inputs/opus_mt_es_en/Example2.txt b/inputs/opus_mt_es_en/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe16d0ac736b0c7c9d567bcd7d30a962235c1fc3 --- /dev/null +++ b/inputs/opus_mt_es_en/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic es una película épica estadounidense de 1997 sobre desastres y romance dirigida, escrita, coproducida y coeditada por James Cameron. Incorpora aspectos históricos y ficticios, y se basa en relatos del hundimiento del RMS Titanic. Está protagonizada por Leonardo DiCaprio y Kate Winslet como miembros de diferentes clases sociales que se enamoran a bordo del barco durante su desafortunado viaje inaugural. \ No newline at end of file diff --git a/inputs/opus_mt_es_en/Example3.txt b/inputs/opus_mt_es_en/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..d1cbb1421d2bf92dc648d92240a9a839f1985d6a --- /dev/null +++ b/inputs/opus_mt_es_en/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (nacido el 28 de octubre de 1955) es un magnate empresarial, desarrollador de software, inversor y filántropo estadounidense. Es mejor conocido como cofundador de Microsoft Corporation. Durante su carrera en Microsoft, Gates ocupó los puestos de presidente, director ejecutivo (CEO), presidente y arquitecto jefe de software. También fue el mayor accionista individual hasta mayo de 2014. Es uno de los empresarios más reconocidos y pioneros de la revolución de las microcomputadoras de los años setenta y ochenta. \ No newline at end of file diff --git a/inputs/opus_mt_es_en/Example4.txt b/inputs/opus_mt_es_en/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..b635293aba9c31eb6eb8e324e02256b2a482ff72 --- /dev/null +++ b/inputs/opus_mt_es_en/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +La Mona Lisa es una obra de Leonardo.. Se celebra en el Louvre en París diff --git a/inputs/opus_mt_es_en/Example5.txt b/inputs/opus_mt_es_en/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..cdf325b7f80ad010b7963a6854a7bf237309c085 --- /dev/null +++ b/inputs/opus_mt_es_en/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook es un servicio de red social lanzado como TheFacebook el 4 de febrero de 2004. Fue fundado por Mark Zuckerberg con sus compañeros de cuarto y compañeros de la Universidad de Harvard Eduardo Saverin, Andrew McCollum, Dustin Moskovitz y Chris Hughes. La membresía del sitio web estaba inicialmente limitada por los fundadores a los estudiantes de Harvard, pero se expandió a otras universidades en el área de Boston, la Ivy League y gradualmente a la mayoría de las universidades de los Estados Unidos y Canadá. \ No newline at end of file diff --git a/inputs/opus_mt_es_en/Example6.txt b/inputs/opus_mt_es_en/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..2929f12d0b1475b2f74b617c40942f9872560fdd --- /dev/null +++ b/inputs/opus_mt_es_en/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton es un psicólogo cognitivo e informático canadiense inglés, más conocido por su trabajo en redes neuronales artificiales. Desde 2013 divide su tiempo trabajando para Google y la Universidad de Toronto. En 2017, fue cofundador y se convirtió en el Asesor Científico Jefe del Vector Institute en Toronto. \ No newline at end of file diff --git a/inputs/opus_mt_es_en/Example7.txt b/inputs/opus_mt_es_en/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..e2cc88830e4f726f58ef8e4145877b8d3ec19405 --- /dev/null +++ b/inputs/opus_mt_es_en/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +Cuando le dije a John que quería mudarme a Alaska, me advirtió que tendría problemas para encontrar un Starbucks allí. \ No newline at end of file diff --git a/inputs/opus_mt_fr_en/Example1.txt b/inputs/opus_mt_fr_en/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b1ca1bfed167a8f26fdd6e6cbbf62d4d602b82b --- /dev/null +++ b/inputs/opus_mt_fr_en/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Outre le roi du nord, John Snow est un médecin anglais et un chef de file dans le développement de l'anesthésie et de l'hygiène.. Il est considéré comme le premier à utiliser les données pour guérir l'épidémie de choléra en 1854. diff --git a/inputs/opus_mt_fr_en/Example2.txt b/inputs/opus_mt_fr_en/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..b35af375bd8c883e26be580c603b5b4529b22a6b --- /dev/null +++ b/inputs/opus_mt_fr_en/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic est un film américain de romance et de catastrophe réalisé par James Cameron.. En intégrant des aspects historiques et fictifs, il est basé sur les récits du naufrages.. Il met en vedette Leonardo DiCaprio et Kate Winslet Il est membre de différentes classes sociales qui tombent amoureux à bord du navire pendant son voyage de jeune fille. diff --git a/inputs/opus_mt_fr_en/Example3.txt b/inputs/opus_mt_fr_en/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8a5f9aeac93de86318f96fa8ff5656a51b113ff --- /dev/null +++ b/inputs/opus_mt_fr_en/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III est un créateur de logiciels et un graphiste américain.. Au cours de sa carrière chez Microsoft, Gates a occupé les postes de président, chef de la direction et d'architecte en chef.. Il a également été le plus grand actionnaire individuel jusqu'en mai 2014.. Il est l'un des entrepreneurs les plus connus et pionniers de la révolution des micro-ordinateurs des années 1970-1980. diff --git a/inputs/opus_mt_fr_en/Example4.txt b/inputs/opus_mt_fr_en/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..529d867941b59742116cb53d8833dad7a02f83a9 --- /dev/null +++ b/inputs/opus_mt_fr_en/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +La Joconde est une peinture à l'huile de la .. Il a lieu au Louvre. diff --git a/inputs/opus_mt_fr_en/Example5.txt b/inputs/opus_mt_fr_en/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..089e0b75acfd3aba5f059f6b07b95534b3b3401c --- /dev/null +++ b/inputs/opus_mt_fr_en/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook est un service de réseautage social lancé le 4 février 2004.. Il a été fondé par Mark Zuckerberg avec ses colocataires et ses collègues Eduardo Saverin.. L'adhésion du site a été initialement limité par les fondateurs aux étudiants de Harvard, mais a été étendu à d'autres universités dans la région de Boston, la ligue Ivy, et la plupart diff --git a/inputs/opus_mt_fr_en/Example6.txt b/inputs/opus_mt_fr_en/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..fbf679a9e746798f4c6f6d5cc56cfd9f8aeab09e --- /dev/null +++ b/inputs/opus_mt_fr_en/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton est un psychologue cognitif et informaticien canadien, dont le travail sur les réseaux neurologique.. Depuis 2013, il partage son temps de travail pour Google et l'Université de Toronto.. En 2017, il cofonde et devient conseiller scientifique en chef du Vector Institute. diff --git a/inputs/opus_mt_fr_en/Example7.txt b/inputs/opus_mt_fr_en/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..9867d830b18001e1ff555cdb87e3bbbc859f5a8f --- /dev/null +++ b/inputs/opus_mt_fr_en/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +Quand j'ai dit à John que je voulais déménager en Alaska, il m'avais dit que j'avais du mal à trouver un Starbucks. diff --git a/inputs/opus_mt_it_en/Example1.txt b/inputs/opus_mt_it_en/Example1.txt new file mode 100644 index 0000000000000000000000000000000000000000..555dbcbcf13ff75fccf18410a26dedc43632cd42 --- /dev/null +++ b/inputs/opus_mt_it_en/Example1.txt @@ -0,0 +1,2 @@ +Example1.txt +Oltre ad essere il re del nord, John Snow è un medico inglese e leader nello sviluppo dell'anestesia e dell'igiene medica.. È considerato il primo che usa i dati per curare l'epidemia di colera nel 1854. diff --git a/inputs/opus_mt_it_en/Example2.txt b/inputs/opus_mt_it_en/Example2.txt new file mode 100644 index 0000000000000000000000000000000000000000..9dfbc8520a69dc37533bca636ce60a5dae6f9c73 --- /dev/null +++ b/inputs/opus_mt_it_en/Example2.txt @@ -0,0 +1,2 @@ +Example2.txt +Titanic è un film del 1997 diretto, co-prodotto e co-edited di James Cameron.. Comprendente aspetti sia storici e fittizi, si basa sui racconti dell'affondamento del Titanic dell'annegamento dell'RMS.. Il suo ruolo è di Leonardo DiCaprio e Kate Winslet come membro di diverse classi sociali. diff --git a/inputs/opus_mt_it_en/Example3.txt b/inputs/opus_mt_it_en/Example3.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b6a59d52ba788ae84321b535bf5330678758c10 --- /dev/null +++ b/inputs/opus_mt_it_en/Example3.txt @@ -0,0 +1,2 @@ +Example3.txt +William Henry Gates III (nato il 28 ottobre 1955) è un magnate americano business, sviluppatore di software, investitore e filantropo.. Durante la sua carriera a Microsoft, Gates ha ricoperto le cariche di presidente, direttore esecutivo e direttore esecutivo e direttore di Microsoft.. È stato anche il più grande azionista individuale fino al maggio 2014.. È uno dei più noti imprenditori e pionieri della rivoluzione dei microcomputer degli anni '70 e degli anni '80. diff --git a/inputs/opus_mt_it_en/Example4.txt b/inputs/opus_mt_it_en/Example4.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fc331ff6e032de4bf652f58f216f0d44cf8a699 --- /dev/null +++ b/inputs/opus_mt_it_en/Example4.txt @@ -0,0 +1,2 @@ +Example4.txt +La Monna Lisa è un dipinto ad olio del XVI secolo creato da Leonardo.. Si tiene al Louvre di Parigi. diff --git a/inputs/opus_mt_it_en/Example5.txt b/inputs/opus_mt_it_en/Example5.txt new file mode 100644 index 0000000000000000000000000000000000000000..d524ba3c7ac32d644e4b5bf2f868b2d474a3c03d --- /dev/null +++ b/inputs/opus_mt_it_en/Example5.txt @@ -0,0 +1,2 @@ +Example5.txt +Facebook è un servizio di social networking lanciato come TheFacebook il 4 febbraio 2004.. È stata fondata da Mark Zuckerberg con i suoi compagni di stanza e gli studenti dell'università di Harvard.. L'adesione del Web site era inizialmente limitata dai fondatori degli studenti di Harvard, ma fu estesa ad altri college della Ivy League, e gradualmente, e la maggior parte diff --git a/inputs/opus_mt_it_en/Example6.txt b/inputs/opus_mt_it_en/Example6.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5849964547e3ce2604b72b033553f8664fb5a5a --- /dev/null +++ b/inputs/opus_mt_it_en/Example6.txt @@ -0,0 +1,2 @@ +Example6.txt +Geoffrey Everest Hinton è uno psicologo cognitivo canadese e informatico più noto per il suo lavoro sulle reti neurale artificiale.. Dal 2013 divide il suo tempo lavorando per Google e l'Università di Toronto.. Nel 2017 ha fondato e divenne Consigliere Scientifico Capo dell'Istituto Vector Institute di Toronto. diff --git a/inputs/opus_mt_it_en/Example7.txt b/inputs/opus_mt_it_en/Example7.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f1ea3362c2ec74eb1065038cf9a7503e0cdc08b --- /dev/null +++ b/inputs/opus_mt_it_en/Example7.txt @@ -0,0 +1,2 @@ +Example7.txt +Quando ho detto a John che volevo trasferirmi in Alaska che non avrei trovato uno Starbucks. diff --git a/pages/Workflow & Model Overview.py b/pages/Workflow & Model Overview.py new file mode 100644 index 0000000000000000000000000000000000000000..1bf3f2f522be939e3b8f400770734246a7a1b165 --- /dev/null +++ b/pages/Workflow & Model Overview.py @@ -0,0 +1,184 @@ +import streamlit as st +import pandas as pd + +# Custom CSS for Styling +st.markdown(""" + +""", unsafe_allow_html=True) + +# Main Title +st.markdown('
Multilingual Text Translation with Spark NLP and MarianMT
', unsafe_allow_html=True) + +# Overview Section +st.markdown(""" +
+

With the ever-growing need to bridge language barriers in today's globalized world, multilingual text translation has become more important than ever. The MarianMT model, a fast and efficient neural machine translation framework, is built on the Transformer architecture and supports over 1,000 translation directions. This guide will demonstrate how to utilize MarianMT within Spark NLP to perform high-quality translations across multiple languages.

+
+""", unsafe_allow_html=True) + +# Introduction to MarianMT and Spark NLP +st.markdown('
What is MarianMT?
', unsafe_allow_html=True) + +# What is MarianMT? +st.markdown(""" +
+

MarianMT is a neural machine translation framework developed by the Marian project, primarily backed by Microsoft Translator. It is a highly efficient tool, capable of translating text between numerous languages with remarkable speed and accuracy. MarianMT is implemented in C++ and is used in various industrial and research applications.

+
+""", unsafe_allow_html=True) + +# Pipeline and Results +st.markdown('
Pipeline and Results
', unsafe_allow_html=True) + +st.markdown(""" +
+

In this section, we will build a Spark NLP pipeline that uses the MarianMT model to translate English text into Chinese. We'll demonstrate the translation process from data preparation to the final output.

+
+""", unsafe_allow_html=True) + +# Step 1: Creating the Data +st.markdown(""" +
+

Step 1: Creating the Data

+

We'll begin by creating a Spark DataFrame containing the English text that we want to translate into Chinese.

+""", unsafe_allow_html=True) + +st.code(""" +data = [["Hello, how are you?"]] +df = spark.createDataFrame(data).toDF("text") +""", language="python") + +# Step 2: Assembling the Pipeline +st.markdown(""" +
+

Step 2: Assembling the Pipeline

+

We will now set up a Spark NLP pipeline that includes a document assembler, a sentence detector, and the MarianMT model for translation.

+""", unsafe_allow_html=True) + +st.code(""" +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline + +document_assembler = DocumentAssembler()\\ + .setInputCol("text")\\ + .setOutputCol("document") + +sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\\ + .setInputCols(["document"])\\ + .setOutputCol("sentences") + +marian = MarianTransformer.pretrained("opus_mt_en_zh", "xx")\\ + .setInputCols(["sentences"])\\ + .setOutputCol("translation") + +pipeline = Pipeline(stages=[document_assembler, sentence_detector, marian]) +model = pipeline.fit(df) +result = model.transform(df) +""", language="python") + +# Step 3: Viewing the Results +st.markdown(""" +
+

Step 3: Viewing the Results

+

After processing the text, we can view the translations generated by the MarianMT model:

+""", unsafe_allow_html=True) + +st.code(""" +result.select("translation.result").show(truncate=False) +""", language="python") + +st.text(""" ++--------------+ +|result | ++--------------+ +|[你好,你好吗?] | ++--------------+ +""") + +# Model Information and Use Cases +st.markdown(""" +
+

Model Information and Use Cases

+

The MarianMT model is highly versatile, supporting numerous translation directions. Here’s a brief overview of its characteristics:

+
    +
  • Model Name: opus_mt_en_zh
  • +
  • Input Language: English (en)
  • +
  • Output Language: Chinese (zh)
  • +
  • Best for: General text translation from English to Chinese.
  • +
  • Compatibility: Spark NLP 2.7.0+
  • +
+
+""", unsafe_allow_html=True) + +# Conclusion +st.markdown(""" +
+

Conclusion

+

By integrating MarianMT with Spark NLP, you can easily perform high-quality translations across various languages, leveraging the power of distributed computing. The example provided here demonstrates how to translate English text to Chinese using the opus_mt_en_zh model. Whether you’re working with small-scale text or massive datasets, this approach offers scalability and flexibility.

+
+""", unsafe_allow_html=True) + +# References +st.markdown(""" +
+

References

+ +
+""", unsafe_allow_html=True) + +# Community & Support +st.markdown('
Community & Support
', unsafe_allow_html=True) +st.markdown(""" +
+
    +
  • Official Website: Documentation and examples
  • +
  • Slack: Live discussion with the community and team
  • +
  • GitHub: Bug reports, feature requests, and contributions
  • +
  • Medium: Tutorials and articles
  • +
+
+""", unsafe_allow_html=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5edb638b3ec10f97431ae105409c40d3c00d766f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +streamlit +pandas +numpy +spark-nlp +pyspark \ No newline at end of file