LegalSummerizer / app.py
Quake24's picture
[ADD] Summerize urls fix error
b351ada
import gradio as gr
from transformers import pipeline,AutoTokenizer, AutoModelForSeq2SeqLM
from bs4 import BeautifulSoup
from bs4.element import Comment
from urllib.request import urlopen
import urllib.request
from bs4 import BeautifulSoup
def easyterms(text:str)->str:
print("In summerizing function of easyterms")
tokenizer = AutoTokenizer.from_pretrained("EasyTerms/etsummerizer_v2")
model = AutoModelForSeq2SeqLM.from_pretrained("EasyTerms/etsummerizer_v2")
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
summary_ids = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=128, num_beams=4)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
def get_paragaph(url:str)-> list:
parser = 'html.parser' # or 'lxml' (preferred) or 'html5lib', if installed
html = urllib.request.urlopen(url)
# parsing the html file
soup = BeautifulSoup(html, parser, from_encoding=html.info().get_param('charset'))
samples = soup.findAll("p")
samples = [item.text for item in samples]
return samples
def summerize(Option:str, Text:str)-> str:
print(Option)
if Option == "text":
return easyterms(Text)
else:
paragraph = get_paragaph(Text)
result = []
for par in paragraph:
result.append(easyterms(par))
res = '\n'.join(data for data in result)
return res
intro = gr.Markdown(
'''
<center><h1>A Legal document summerizer.</h1></span>
If you want to better understand legal text or document, this platform is for you. By choosing the url option you submit a url whose content will in turn be summerized for you.
Otherwhise you can choose the text option and submit your own text to be summerized.
'''
)
interface = gr.Interface(
fn=summerize,
inputs=[gr.Radio(["url", "text"]),"text"],
outputs=["text"]
)
interface.launch()