Spaces:
Build error
Build error
from textwrap import wrap | |
from sumy.parsers import DocumentParser | |
from sumy.parsers.html import HtmlParser | |
from sumy.parsers.plaintext import PlaintextParser | |
from sumy.nlp.tokenizers import Tokenizer | |
from sumy.nlp.stemmers import Stemmer | |
from sumy.summarizers.lsa import LsaSummarizer | |
from sumy.utils import get_stop_words | |
from transformers import Pipeline | |
class Summarizer: | |
DEFAULT_LANGUAGE = "english" | |
def __init__(self, pipeline: Pipeline): | |
self.pipeline = pipeline | |
stemmer = Stemmer(Summarizer.DEFAULT_LANGUAGE) | |
self.lsa_summarizer = LsaSummarizer(stemmer) | |
self.lsa_summarizer.stop_words = get_stop_words(language=Summarizer.DEFAULT_LANGUAGE) | |
def sentence_list(summarized_sentences) -> list: | |
summarized_list = [] | |
for sentence in summarized_sentences: | |
summarized_list.append(sentence._text) | |
return summarized_list | |
def __extractive_summary(self, parser: DocumentParser, sentences_count): | |
summarized_sentences = self.lsa_summarizer(parser.document, sentences_count) | |
summarized_list = Summarizer.sentence_list(summarized_sentences) | |
all_sentences_list = Summarizer.sentence_list(parser.document.sentences) | |
return all_sentences_list, summarized_list | |
def extractive_summary_from_text(self, text: str, sentences_count: int) -> (list, list): | |
parser = PlaintextParser.from_string(text, Tokenizer(Summarizer.DEFAULT_LANGUAGE)) | |
return self.__extractive_summary(parser, sentences_count) | |
def extractive_summary_from_url(self, url: str, sentences_count: int) -> (list, list): | |
parser = HtmlParser.from_url(url, Tokenizer(Summarizer.DEFAULT_LANGUAGE)) | |
return self.__extractive_summary(parser, sentences_count) | |
def abstractive_summary(self, summary: str) -> str: | |
summary_text = " ".join([result['summary_text'] for result in self.pipeline(wrap(summary, 2048))]) | |
return summary_text | |