patent-summarizer / util /summarizer.py
amielle's picture
feat: Build class for summarizer pipeline
cb09dc9
raw
history blame
2.85 kB
import gradio as gr
from util import textproc
summary_options = ["Abstract", "Background", "Claims"]
model_names = ["huggingface/google/bigbird-pegasus-large-bigpatent",
"huggingface/cnicu/t5-small-booksum",
"huggingface/sshleifer/distilbart-cnn-6-6",
"huggingface/google/pegasus-xsum"]
def init_models():
model = dict()
for name in model_names:
model[name] = gr.Interface.load(name)
return model
class PatentSummarizer():
def __init__(self, model_collection):
self.model = model_collection
self.max_word_input = 1000
def pipeline(self, patent_information, summaries_generated, abstract_model, \
background_model, claims_model, collate_claims, word_limit):
parsed_info = textproc.retrieve_parsed_doc(patent_information,
summaries_generated)
if parsed_info is None:
return ["[ERROR] Invalid Patent Information.", None, None]
abstract, background, claims = parsed_info
summaries = list()
try:
if "Abstract" in summaries_generated and abstract is not None:
abstract = abstract[0: textproc.get_word_index(abstract, word_limit)]
abstract_summary = self.model[abstract_model](abstract)
abstract_summary = textproc.post_process(abstract_summary)
summaries.append(abstract_summary)
else:
summaries.append(None)
if "Background" in summaries_generated and background is not None:
background = background[0: textproc.get_word_index(background, word_limit)]
background_summary = self.model[background_model](background)
background_summary = textproc.post_process(background_summary)
summaries.append(background_summary)
else:
summaries.append(None)
if "Claims" in summaries_generated and claims is not None:
if collate_claims:
claims = ' '.join(claims)
print(len(claims))
claims = claims[0: textproc.get_word_index(claims, word_limit)]
print(len(claims))
claims_summary = self.model[claims_model](claims)
else:
claims_summary = ''
for claim in claims:
claims_summary += self.model[claims_model](claim)
claims_summary = textproc.post_process(claims_summary)
summaries.append(claims_summary)
else:
summaries.append(None)
return summaries
except Exception as e:
return [f'[ERROR] {e}'] + [None]*(len(summaries_generated) - 1)