|
import streamlit as st |
|
from transformers import MarianMTModel, MarianTokenizer |
|
|
|
|
|
languages = { |
|
'English': 'en', |
|
'Urdu': 'ur', |
|
'French': 'fr', |
|
'Spanish': 'es', |
|
'German': 'de', |
|
'Chinese': 'zh', |
|
'Italian': 'it', |
|
'Russian': 'ru', |
|
'Japanese': 'ja', |
|
'Arabic': 'ar', |
|
'Hindi': 'hi', |
|
} |
|
|
|
|
|
language_pairs = { |
|
('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur', |
|
('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en', |
|
('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr', |
|
('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en', |
|
('en', 'es'): 'Helsinki-NLP/opus-mt-en-es', |
|
('es', 'en'): 'Helsinki-NLP/opus-mt-es-en', |
|
('en', 'de'): 'Helsinki-NLP/opus-mt-en-de', |
|
('de', 'en'): 'Helsinki-NLP/opus-mt-de-en', |
|
('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh', |
|
('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en', |
|
('en', 'it'): 'Helsinki-NLP/opus-mt-en-it', |
|
('it', 'en'): 'Helsinki-NLP/opus-mt-it-en', |
|
('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru', |
|
('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en', |
|
('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja', |
|
('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en', |
|
('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar', |
|
('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en', |
|
('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi', |
|
('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en', |
|
|
|
} |
|
|
|
def load_model(src_lang, tgt_lang): |
|
model_name = language_pairs.get((src_lang, tgt_lang)) |
|
if not model_name: |
|
raise ValueError(f"No available model for {src_lang} to {tgt_lang}") |
|
|
|
tokenizer = MarianTokenizer.from_pretrained(model_name) |
|
model = MarianMTModel.from_pretrained(model_name) |
|
return model, tokenizer |
|
|
|
def translate(text, src_lang, tgt_lang): |
|
model, tokenizer = load_model(src_lang, tgt_lang) |
|
inputs = tokenizer.encode(text, return_tensors="pt", padding=True) |
|
translated = model.generate(inputs) |
|
return tokenizer.decode(translated[0], skip_special_tokens=True) |
|
|
|
def translate_chain(text, src_lang, tgt_lang): |
|
if src_lang != 'en': |
|
text = translate(text, src_lang, 'en') |
|
if tgt_lang != 'en': |
|
text = translate(text, 'en', tgt_lang) |
|
return text |
|
|
|
def translate_ui(text, source_language, target_language): |
|
src_lang = languages[source_language] |
|
tgt_lang = languages[target_language] |
|
|
|
try: |
|
return translate(text, src_lang, tgt_lang) |
|
except ValueError: |
|
return translate_chain(text, src_lang, tgt_lang) |
|
|
|
|
|
st.title("Multilingual Translator") |
|
st.write("Translate text between various languages including Urdu, French, Spanish, and more.") |
|
|
|
|
|
text = st.text_area("Enter text to translate", height=100) |
|
|
|
|
|
source_language = st.selectbox("Select Source Language", list(languages.keys())) |
|
target_language = st.selectbox("Select Target Language", list(languages.keys())) |
|
|
|
|
|
if st.button("Translate"): |
|
if text.strip(): |
|
translation = translate_ui(text, source_language, target_language) |
|
st.text_area("Translated Text", translation, height=100) |
|
else: |
|
st.warning("Please enter text to translate.") |
|
|
|
|
|
st.sidebar.title("About") |
|
st.sidebar.info( |
|
""" |
|
This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection. |
|
""" |
|
) |
|
|