File size: 2,231 Bytes
56b97c3
 
 
e1452a1
 
 
 
 
 
 
 
 
 
 
 
56b97c3
 
82debbb
cce785c
 
 
 
 
 
 
 
 
 
 
 
 
8a5cce1
e1452a1
 
 
 
 
 
 
 
82debbb
8a5cce1
cce785c
82debbb
cce785c
 
 
 
8a5cce1
82debbb
cce785c
8a5cce1
 
 
56b97c3
 
8a5cce1
56b97c3
8a5cce1
 
e1452a1
56b97c3
8a5cce1
 
56b97c3
e1452a1
 
56b97c3
8a5cce1
e1452a1
cce785c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer

# Supported models for different language pairs
MODELS = {
    'French': "Helsinki-NLP/opus-mt-en-fr",
    'Spanish': "Helsinki-NLP/opus-mt-en-es",
    'German': "Helsinki-NLP/opus-mt-en-de",
    'Chinese': "Helsinki-NLP/opus-mt-en-zh",
    'Russian': "Helsinki-NLP/opus-mt-en-ru",
    'Japanese': "Helsinki-NLP/opus-mt-en-ja",
    'Arabic': "Helsinki-NLP/opus-mt-en-ar",
    'Urdu': "Helsinki-NLP/opus-mt-en-ur",
    'Hindi': "Helsinki-NLP/opus-mt-en-hi",
    'Bengali': "Helsinki-NLP/opus-mt-en-bn",
}

# Language codes for different models
LANG_CODE_MAP = {
    'French': 'fr',
    'Spanish': 'es',
    'German': 'de',
    'Chinese': 'zh',
    'Russian': 'ru',
    'Japanese': 'ja',
    'Arabic': 'ar',
    'Urdu': 'ur',
    'Hindi': 'hi',
    'Bengali': 'bn',
}

def translate_text(text, target_lang):
    # Load the appropriate model and tokenizer for the target language
    model_name = MODELS.get(target_lang)
    if not model_name:
        return "Error: Language not supported."

    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    
    # Encode the text
    encoded_text = tokenizer(text, return_tensors="pt")
    
    # Language code for forced_bos_token_id
    lang_code = LANG_CODE_MAP.get(target_lang)
    if not lang_code:
        return "Error: Language code not found."
    
    # Translate text
    translated = model.generate(**encoded_text, forced_bos_token_id=tokenizer.convert_tokens_to_ids(lang_code))
    
    # Decode the translated text
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Streamlit app
st.title('Language Translator')

# Input text and language selection
text_to_translate = st.text_area("Enter text to translate")
target_language = st.selectbox("Select target language", list(MODELS.keys()))

# Translate button
if st.button('Translate'):
    if text_to_translate:
        translated_text = translate_text(text_to_translate, target_language)
        st.write(f"Translated text ({target_language}): {translated_text}")
    else:
        st.error("Please enter text to translate.")