balaramas's picture
Update app.py
1928de9 verified
import gradio as gr
import subprocess
subprocess.check_call(["pip", "install", "transformers"])
subprocess.check_call(["pip", "install", "torch"])
subprocess.check_call(["pip", "install", "sentencepiece"])
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
def summariser(ar_en, lang):
summ = summarizer(ar_en, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
tokenizer.src_lang = "en_XX"
encoded_ar = tokenizer(summ, return_tensors="pt")
if(lang=='Hindi'):
coi='hi_IN'
if(lang=='Gujrati'):
coi='gu_IN'
if(lang=='Bengali'):
coi='bn_IN'
if(lang=='Tamil'):
coi='ta_IN'
generated_tokens = model.generate(
**encoded_ar,
forced_bos_token_id=tokenizer.lang_code_to_id[coi]
)
output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return output
iface = gr.Interface(
fn=summariser,
inputs=[gr.Textbox(label="Enter the paragraph in English", placeholder="Type here..."), gr.Radio(["Hindi", "Gujrati", "Bengali", "Tamil"], label="Language to be summarised in:")],
outputs=gr.Textbox(label="Summarised Text"),
title="English to Indic Summariser"
)
iface.launch()