Engr-Saeed's picture
Create app.py
8a60c72 verified
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
# Define a list of supported language pairs
languages = {
'English': 'en',
'Urdu': 'ur',
'French': 'fr',
'Spanish': 'es',
'German': 'de',
'Chinese': 'zh',
'Italian': 'it',
'Russian': 'ru',
'Japanese': 'ja',
'Arabic': 'ar',
'Hindi': 'hi',
}
# Define supported language pairs
language_pairs = {
('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur',
('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en',
('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr',
('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en',
('en', 'es'): 'Helsinki-NLP/opus-mt-en-es',
('es', 'en'): 'Helsinki-NLP/opus-mt-es-en',
('en', 'de'): 'Helsinki-NLP/opus-mt-en-de',
('de', 'en'): 'Helsinki-NLP/opus-mt-de-en',
('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh',
('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en',
('en', 'it'): 'Helsinki-NLP/opus-mt-en-it',
('it', 'en'): 'Helsinki-NLP/opus-mt-it-en',
('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru',
('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en',
('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja',
('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en',
('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar',
('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en',
('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi',
('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en',
# Add more pairs as available
}
def load_model(src_lang, tgt_lang):
model_name = language_pairs.get((src_lang, tgt_lang))
if not model_name:
raise ValueError(f"No available model for {src_lang} to {tgt_lang}")
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
return model, tokenizer
def translate(text, src_lang, tgt_lang):
model, tokenizer = load_model(src_lang, tgt_lang)
inputs = tokenizer.encode(text, return_tensors="pt", padding=True)
translated = model.generate(inputs)
return tokenizer.decode(translated[0], skip_special_tokens=True)
def translate_chain(text, src_lang, tgt_lang):
if src_lang != 'en':
text = translate(text, src_lang, 'en')
if tgt_lang != 'en':
text = translate(text, 'en', tgt_lang)
return text
def translate_ui(text, source_language, target_language):
src_lang = languages[source_language]
tgt_lang = languages[target_language]
try:
return translate(text, src_lang, tgt_lang)
except ValueError:
return translate_chain(text, src_lang, tgt_lang)
# Streamlit App UI
st.title("Multilingual Translator")
st.write("Translate text between various languages including Urdu, French, Spanish, and more.")
# Input text
text = st.text_area("Enter text to translate", height=100)
# Source and Target Languages
source_language = st.selectbox("Select Source Language", list(languages.keys()))
target_language = st.selectbox("Select Target Language", list(languages.keys()))
# Translate Button
if st.button("Translate"):
if text.strip():
translation = translate_ui(text, source_language, target_language)
st.text_area("Translated Text", translation, height=100)
else:
st.warning("Please enter text to translate.")
# About Section
st.sidebar.title("About")
st.sidebar.info(
"""
This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection.
"""
)