Spaces:
Runtime error
Runtime error
# import gradio as gr | |
# Def_04 Docx file to translated_Docx file | |
from transformers import MarianMTModel, MarianTokenizer | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
from nltk.tokenize import LineTokenizer | |
nltk.download('punkt') | |
import math | |
import torch | |
from docx import Document | |
from time import sleep | |
import docx | |
def getText(filename): | |
doc = docx.Document(filename) | |
fullText = [] | |
for para in doc.paragraphs: | |
fullText.append(para.text) | |
return '\n'.join(fullText) | |
# Def_01 applying process bar to function | |
import sys | |
def print_progress_bar(index, total, label): | |
n_bar = 50 # Progress bar width | |
progress = index / total | |
sys.stdout.write('\r') | |
sys.stdout.write(f"[{'=' * int(n_bar * progress):{n_bar}s}] {int(100 * progress)}% {label}") | |
sys.stdout.flush() | |
if torch.cuda.is_available(): | |
dev = "cuda" | |
else: | |
dev = "cpu" | |
device = torch.device(dev) | |
mname = "Helsinki-NLP/opus-mt-en-hi" | |
tokenizer = MarianTokenizer.from_pretrained(mname) | |
model = MarianMTModel.from_pretrained(mname) | |
model.to(device) | |
def btTranslator(docxfile): | |
a=getText(docxfile) | |
a1=a.split('\n') | |
bigtext=''' ''' | |
for a in a1: | |
bigtext=bigtext+'\n'+a | |
files=Document() | |
lt = LineTokenizer() | |
batch_size = 8 | |
paragraphs = lt.tokenize(bigtext) | |
translated_paragraphs = [] | |
for index, paragraph in enumerate(paragraphs): | |
# ###################################### | |
total=len(paragraphs) | |
print_progress_bar(index, total, "Percentage Bar") | |
sleep(0.5) | |
# ###################################### | |
sentences = sent_tokenize(paragraph) | |
batches = math.ceil(len(sentences) / batch_size) | |
translated = [] | |
for i in range(batches): | |
sent_batch = sentences[i*batch_size:(i+1)*batch_size] | |
model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True, max_length=500).to(device) | |
with torch.no_grad(): | |
translated_batch = model.generate(**model_inputs) | |
translated += translated_batch | |
translated = [tokenizer.decode(t, skip_special_tokens=True) for t in translated] | |
translated_paragraphs += [" ".join(translated)] | |
files.add_paragraph(translated) | |
# translated_text = "\n".join(translated_paragraphs) | |
f=files.save(f"Translated_{docxfile[23:]}") | |
return translated_paragraphs,f | |
import gradio as gr | |
interface = gr.Interface(fn=btTranslator, | |
inputs=gr.inputs.Textbox(lines=1), | |
# inputs = gr.inputs.File(file_count="multiple",label="Input Files"), | |
# inputs= | |
outputs=['text','file'], | |
show_progress=True | |
) | |
interface.launch(debug=True) | |