Quake24 commited on
Commit
49dddf8
1 Parent(s): d787032

[ADD] Summerize urls

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -1,22 +1,42 @@
1
  import gradio as gr
2
  from transformers import pipeline,AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
3
 
4
  def easyterms(text:str)->str:
5
- print("In summerizing function of easyterms")
6
- tokenizer = AutoTokenizer.from_pretrained("EasyTerms/etsummerizer_v2")
7
- model = AutoModelForSeq2SeqLM.from_pretrained("EasyTerms/etsummerizer_v2")
8
-
9
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
10
- summary_ids = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=128, num_beams=4)
11
- summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
12
- return summary
 
 
 
 
 
 
 
 
13
 
14
  def summerize(Option:str, Text:str)-> str:
15
  print(Option)
16
  if Option == "text":
17
  return easyterms(Text)
18
  else:
19
- return "Input is a URL string"
 
 
 
 
 
 
 
20
  intro = gr.Markdown(
21
  '''
22
  <center><h1>A Legal document summerizer.</h1></span>
 
1
  import gradio as gr
2
  from transformers import pipeline,AutoTokenizer, AutoModelForSeq2SeqLM
3
+ from bs4 import BeautifulSoup
4
+ from bs4.element import Comment
5
+ from urllib.request import urlopen
6
+ import urllib.request
7
+ from bs4 import BeautifulSoup
8
 
9
  def easyterms(text:str)->str:
10
+ print("In summerizing function of easyterms")
11
+ tokenizer = AutoTokenizer.from_pretrained("EasyTerms/etsummerizer_v2")
12
+ model = AutoModelForSeq2SeqLM.from_pretrained("EasyTerms/etsummerizer_v2")
13
+
14
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
15
+ summary_ids = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=128, num_beams=4)
16
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
17
+ return summary
18
+ def get_paragaph(url:str)-> list:
19
+ parser = 'html.parser' # or 'lxml' (preferred) or 'html5lib', if installed
20
+ html = urllib.request.urlopen(url)
21
+ # parsing the html file
22
+ soup = BeautifulSoup(html, parser, from_encoding=html.info().get_param('charset'))
23
+ samples = soup.findAll("p")
24
+ samples = [item.text for item in samples]
25
+ return samples
26
 
27
  def summerize(Option:str, Text:str)-> str:
28
  print(Option)
29
  if Option == "text":
30
  return easyterms(Text)
31
  else:
32
+ paragraph = get_paragaph(Text)
33
+ result = []
34
+ for par in paragraph:
35
+ result.append(easyterms(par)
36
+ res = '\n'.join(data for data in result)
37
+ return res
38
+
39
+
40
  intro = gr.Markdown(
41
  '''
42
  <center><h1>A Legal document summerizer.</h1></span>