Spaces:
Running
Running
from sumy.parsers.plaintext import PlaintextParser | |
from sumy.nlp.tokenizers import Tokenizer | |
from sumy.summarizers.luhn import LuhnSummarizer | |
from sumy.nlp.stemmers import Stemmer | |
from sumy.utils import get_stop_words | |
import gradio as gr | |
import nltk | |
import time | |
def luhn_summarizer(text_corpus): | |
start_time = time.time() | |
parser = PlaintextParser.from_string(text_corpus, Tokenizer("english")) | |
stemmer = Stemmer("english") | |
summarizer = LuhnSummarizer(stemmer) | |
summarizer.stop_words = get_stop_words("english") | |
sentences = summarizer(parser.document, 25) | |
summary = "" | |
for sentence in sentences: | |
summary += str(sentence) + "" | |
end_time = time.time() | |
print(f"Time taken: {end_time - start_time:.2f} seconds") | |
return summary | |
def clear_everything(text_corpus, summary): | |
return None, None | |
theme = gr.themes.Soft( | |
primary_hue="purple", | |
secondary_hue="cyan", | |
neutral_hue="slate", | |
font=[ | |
gr.themes.GoogleFont('Syne'), | |
gr.themes.GoogleFont('Poppins'), | |
gr.themes.GoogleFont('Poppins'), | |
gr.themes.GoogleFont('Poppins') | |
], | |
) | |
with gr.Blocks(theme=theme, title="Luhn Summarizer", fill_height=True) as app: | |
gr.HTML( | |
value =''' | |
<h1 style="text-align: center;">Luhn Summarizer</h1> | |
<p style="text-align: center;">This app uses a Luhn approach to summarize PDF documents based on CPU.</p> | |
<p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p> | |
''') | |
with gr.Row(): | |
with gr.Column(): | |
text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5) | |
with gr.Row(): | |
clear_btn = gr.Button(value="Clear", variant='stop') | |
summarize_btn = gr.Button(value="Summarize", variant='primary') | |
summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True) | |
summarize_btn.click( | |
luhn_summarizer, | |
inputs=[text_corpus], | |
outputs=[summary], | |
concurrency_limit=25, | |
scroll_to_output=True, | |
show_api=True, | |
api_name="luhn_summarizer", | |
show_progress="full", | |
) | |
clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False) | |
nltk.download('punkt', quiet=True) | |
nltk.download('punkt_tab', quiet=True) | |
app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False) | |