santhosh's picture
Create app.py
3cdca2d verified
raw
history blame
2.19 kB
import ctranslate2
import gradio as gr
from huggingface_hub import snapshot_download
from sentencepiece import SentencePieceProcessor
title = "Mesolitica t5-base-standard-bahasa Translation Demo"
description = """
<p>
Translator using <a href='https://huggingface.co/spaces/mesolitica/malaysian-translation/' target='_blank'>Mesolitica Malaysian Translation model</a>. This demo application uses
CTranslate2 optimized version of it: <a href="https://huggingface.co/santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2">santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2</a>,
</p>
"""
model_name = "santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2"
model_path = snapshot_download(model_name)
tokenizer = SentencePieceProcessor()
tokenizer.load(f"{model_path}/sentencepiece.model")
translator = ctranslate2.Translator(model_path)
map_lang = {"en": "Inggeris", "jv": "Jawa", "bjn": "Banjarese", "ms": "Melayu", "id": "Indonesia"}
def translate(input_text, target_language):
input_tokens = tokenizer.encode(
f"f'terjemah ke {map_lang[target_language]}: {input_text}", out_type=str
)
results = translator.translate_batch(
[input_tokens],
batch_type="tokens",
max_input_length=6144,
max_decoding_length=6144,
max_batch_size=1024,
beam_size=1,
)
translated_sentence = tokenizer.decode(results[0].hypotheses[0])
return translated_sentence
def translate_interface(input_text, target_language):
translated_text = translate(input_text, target_language)
return translated_text
input_text = gr.Textbox(
label="Input Text",
value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.",
)
languages = [
("English", "en"),
("Bahasa Melayu", "ms"),
("Indonesian", "id"),
("Banjarese", "bjn"),
("Jawa", "jv"),
]
target_language = gr.Dropdown(languages, value="en", label="Target Language")
output_text = gr.Textbox(label="Translated Text")
gr.Interface(
title=title,
description=description,
fn=translate_interface,
inputs=[input_text, target_language],
outputs=output_text,
).launch()