import gradio as gr from transformers import MarianMTModel, MarianTokenizer import torch import nltk # Download punkt for sentence tokenization nltk.download('punkt') nltk.download('punkt_tab') from nltk.tokenize import sent_tokenize # Cache for storing models and tokenizers models_cache = {} def load_model(model_name): """ Load and cache the MarianMT model and tokenizer. """ if model_name not in models_cache: tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) if torch.cuda.is_available(): model = model.to('cuda') models_cache[model_name] = (model, tokenizer) return models_cache[model_name] def translate_text(model_name, text): """ Translate input text sentence by sentence using the specified model. """ if not model_name or not text: return "Please select a model and provide text for translation." try: # Load the model and tokenizer model, tokenizer = load_model(model_name) # Split text into sentences sentences = sent_tokenize(text) translated_sentences = [] for sentence in sentences: # Tokenize the sentence tokens = tokenizer(sentence, return_tensors="pt", padding=True) if torch.cuda.is_available(): tokens = {k: v.to('cuda') for k, v in tokens.items()} # Generate translation for the sentence translated = model.generate(**tokens) translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) translated_sentences.append(translated_text) # Join translated sentences back into a single string return " ".join(translated_sentences) except Exception as e: return f"Error: {str(e)}" # Model options model_options = [ ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"), ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"), ("English to French", "Helsinki-NLP/opus-mt-tc-big-en-fr"), ("French to English", "Helsinki-NLP/opus-mt-tc-big-fr-en"), ("English to German", "Helsinki-NLP/opus-mt-en-de"), ("German to English", "Helsinki-NLP/opus-mt-de-en"), ("English to Spanish", "Helsinki-NLP/opus-mt-tc-big-en-es"), ("Spanish to English", "Helsinki-NLP/opus-mt-es-en"), ("English to Arabic", "Helsinki-NLP/opus-mt-tc-big-en-ar"), ("Arabic to English", "Helsinki-NLP/opus-mt-tc-big-ar-en"), ("English to Urdu", "Helsinki-NLP/opus-mt-en-ur"), ("Urdu to English", "Helsinki-NLP/opus-mt-ur-en"), ("English to Hindi", "Helsinki-NLP/opus-mt-en-hi"), ("Hindi to English", "Helsinki-NLP/opus-mt-hi-en"), ("English to Chinese", "Helsinki-NLP/opus-mt-en-zh"), ("Chinese to English", "Helsinki-NLP/opus-mt-zh-en") ] # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# 🌍 Real-Time Sentence Translation") with gr.Row(): model_dropdown = gr.Dropdown( label="Select Translation Model", choices=[option[1] for option in model_options], type="value", ) with gr.Row(): input_text = gr.Textbox( label="Enter text (complete sentences)", lines=5, placeholder="Type here...", ) with gr.Row(): translate_button = gr.Button("Translate") clear_button = gr.Button("Clear") output_text = gr.Textbox(label="Translated Text", interactive=False) def clear_inputs(): return "", "" translate_button.click( fn=translate_text, inputs=[model_dropdown, input_text], outputs=output_text, ) clear_button.click( fn=clear_inputs, inputs=[], outputs=[input_text, output_text], ) # Run the Gradio app demo.launch()