Spaces:
Sleeping
Sleeping
import gradio as gr | |
import subprocess | |
subprocess.check_call(["pip", "install", "transformers"]) | |
subprocess.check_call(["pip", "install", "torch"]) | |
subprocess.check_call(["pip", "install", "sentencepiece"]) | |
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast | |
from transformers import pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
def summariser(ar_en, lang): | |
summ = summarizer(ar_en, max_length=130, min_length=30, do_sample=False)[0]['summary_text'] | |
tokenizer.src_lang = "en_XX" | |
encoded_ar = tokenizer(summ, return_tensors="pt") | |
if(lang=='Hindi'): | |
coi='hi_IN' | |
if(lang=='Gujrati'): | |
coi='gu_IN' | |
if(lang=='Bengali'): | |
coi='bn_IN' | |
if(lang=='Tamil'): | |
coi='ta_IN' | |
generated_tokens = model.generate( | |
**encoded_ar, | |
forced_bos_token_id=tokenizer.lang_code_to_id[coi] | |
) | |
output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
return output | |
iface = gr.Interface( | |
fn=summariser, | |
inputs=[gr.Textbox(label="Enter the paragraph in English", placeholder="Type here..."), gr.Radio(["Hindi", "Gujrati", "Bengali", "Tamil"], label="Language to be summarised in:")], | |
outputs=gr.Textbox(label="Summarised Text"), | |
title="English to Indic Summariser" | |
) | |
iface.launch() |