import streamlit as st from transformers import MarianMTModel, MarianTokenizer # Supported models for different language pairs MODELS = { 'French': "Helsinki-NLP/opus-mt-en-fr", 'Spanish': "Helsinki-NLP/opus-mt-en-es", 'German': "Helsinki-NLP/opus-mt-en-de", 'Chinese': "Helsinki-NLP/opus-mt-en-zh", 'Russian': "Helsinki-NLP/opus-mt-en-ru", 'Japanese': "Helsinki-NLP/opus-mt-en-ja", 'Arabic': "Helsinki-NLP/opus-mt-en-ar", 'Urdu': "Helsinki-NLP/opus-mt-en-ur", 'Hindi': "Helsinki-NLP/opus-mt-en-hi", 'Bengali': "Helsinki-NLP/opus-mt-en-bn", } # Language codes for different models LANG_CODE_MAP = { 'French': 'fr', 'Spanish': 'es', 'German': 'de', 'Chinese': 'zh', 'Russian': 'ru', 'Japanese': 'ja', 'Arabic': 'ar', 'Urdu': 'ur', 'Hindi': 'hi', 'Bengali': 'bn', } def translate_text(text, target_lang): # Load the appropriate model and tokenizer for the target language model_name = MODELS.get(target_lang) if not model_name: return "Error: Language not supported." model = MarianMTModel.from_pretrained(model_name) tokenizer = MarianTokenizer.from_pretrained(model_name) # Encode the text encoded_text = tokenizer(text, return_tensors="pt") # Language code for forced_bos_token_id lang_code = LANG_CODE_MAP.get(target_lang) if not lang_code: return "Error: Language code not found." # Translate text translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.convert_tokens_to_ids(lang_code)) # Decode the translated text translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) return translated_text # Streamlit app st.title('Language Translator') # Input text and language selection text_to_translate = st.text_area("Enter text to translate") target_language = st.selectbox("Select target language", list(MODELS.keys())) # Translate button if st.button('Translate'): if text_to_translate: translated_text = translate_text(text_to_translate, target_language) st.write(f"Translated text ({target_language}): {translated_text}") else: st.error("Please enter text to translate.")