nllb / app.py
davanstrien's picture
davanstrien HF staff
Refactor code to improve performance and readability
3f23d73
raw
history blame
3.12 kB
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from flores import code_mapping
import platform
import torch
device = "cpu" if platform.system() == "Darwin" else "cuda"
MODEL_NAME = "facebook/nllb-200-3.3B"
code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[1]))
flores_codes = list(code_mapping.keys())
def load_model():
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
return model
model = load_model()
def load_tokenizer(src_lang, tgt_lang):
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME, src_lang=code_mapping[src_lang], tgt_lang=code_mapping[tgt_lang]
)
return tokenizer
@spaces.GPU
def translate(
text: str,
src_lang: str,
tgt_lang: str,
window_size: int = 800,
overlap_size: int = 200,
):
tokenizer = load_tokenizer(src_lang, tgt_lang)
input_tokens = (
tokenizer(text, return_tensors="pt").input_ids[0].cpu().numpy().tolist()
)
translated_chunks = []
for i in range(0, len(input_tokens), window_size - overlap_size):
window = input_tokens[i : i + window_size]
translated_chunk = model.generate(
input_ids=torch.tensor([window]).to(device),
forced_bos_token_id=tokenizer.lang_code_to_id[code_mapping[tgt_lang]],
max_length=window_size,
num_return_sequences=1,
)
translated_chunk = tokenizer.decode(
translated_chunk[0], skip_special_tokens=True
)
translated_chunks.append(translated_chunk)
return " ".join(translated_chunks)
description = """
No Language Left Behind (NLLB) is a series of open-source models aiming to provide high-quality translations between 200 languages.
This demo application allows you to use the NLLB model to translate text between a source and target language.
## Notes
- Whilst the model supports 200 languages, the quality of translations may vary between languages.
- "Low Resource" languages (languages which are less present on the internet and have a lower amount of investment) may have lower quality translations.
- The demo uses a sliding window approach to handle longer texts.
"""
instructions = """
1. Select the source and target language from the dropdown menus.
2. Enter the text you would like to translate.
3. Click the 'Translate text' button.
"""
with gr.Blocks() as demo:
gr.Markdown("# No Language Left Behind (NLLB) Translation Demo")
gr.Markdown(description)
gr.Markdown("## Instructions")
gr.Markdown(instructions)
with gr.Row():
src_lang = gr.Dropdown(label="Source Language", choices=flores_codes)
target_lang = gr.Dropdown(label="Target Language", choices=flores_codes)
with gr.Row():
input_text = gr.Textbox(label="Input Text", lines=6)
with gr.Row():
btn = gr.Button("Translate text")
with gr.Row():
output = gr.Textbox(label="Output Text", lines=6)
btn.click(
translate,
inputs=[input_text, src_lang, target_lang],
outputs=output,
)
demo.launch()