# -*- coding: utf-8 -*- import numpy as np import streamlit as st from transformers import AutoTokenizer, AutoModelForSeq2SeqLM st.set_page_config( page_title="KoQuillBot", layout="wide", initial_sidebar_state="expanded" ) @st.cache def load_model(model_name): model = AutoModelForSeq2SeqLM.from_pretrained(model_name) return model tokenizer = AutoTokenizer.from_pretrained("QuoQA-NLP/KE-T5-Ko2En-Base") ko2en_model = load_model("QuoQA-NLP/KE-T5-Ko2En-Base") en2ko_model = load_model("QuoQA-NLP/KE-T5-En2Ko-Base") st.title("πŸ€– KoQuillBot") default_value = "ν”„λ‘œμ νŠΈ κ°€μΉ˜κ°€ λ―Έν™” 1백만 λ‹¬λŸ¬ 이상인 곡곡 νŒŒνŠΈλ„ˆκ°€ μ‹œμž‘ν•œ PPP ν”„λ‘œμ νŠΈμ— λŒ€ν•΄ 2단계 μž…μ°°μ΄ μ‹€μ‹œλ©λ‹ˆλ‹€. μž…μ°°μ„ μ „μž λ°©μ‹μœΌλ‘œ μ§„ν–‰ν•˜λŠ” 것이 ν—ˆμš©λ©λ‹ˆλ‹€. (즉, μ‹ μ²­μ„œ 및 μž…μ°° μ œμ•ˆμ˜ μ „μž 제좜). COVID-19 전염병과 그에 λ”°λ₯Έ μ—¬ν–‰ μ œν•œμœΌλ‘œ 인해 μ˜€λŠ˜λ‚ μ—λŠ” 일반적인 관행이 λ˜μ—ˆμŠ΅λ‹ˆλ‹€." src_text = st.text_area( "λ°”κΎΈκ³  싢은 λ¬Έμž₯을 μž…λ ₯ν•˜μ„Έμš”:", default_value, height=300, max_chars=200, ) print(src_text) if src_text == "": st.warning("Please **enter text** for translation") # translate into english sentence english_translation = ko2en_model.generate( **tokenizer( src_text, return_tensors="pt", padding="max_length", truncation=True, max_length=64, ), max_length=64, num_beams=5, repetition_penalty=1.3, no_repeat_ngram_size=3, num_return_sequences=1, ) english_translation = tokenizer.decode( english_translation[0], clean_up_tokenization_spaces=True, skip_special_tokens=True, ) # translate back to korean korean_translation = en2ko_model.generate( **tokenizer( english_translation, return_tensors="pt", padding="max_length", truncation=True, max_length=64, ), max_length=64, num_beams=5, repetition_penalty=1.3, no_repeat_ngram_size=3, num_return_sequences=1, ) korean_translation = tokenizer.decode( korean_translation[0], clean_up_tokenization_spaces=True, skip_special_tokens=True, ) print(f"{src_text} -> {english_translation} -> {korean_translation}") st.write(korean_translation) print(korean_translation)