LuhnSummarizer / app.py
raannakasturi's picture
Update app.py
35f19d4 verified
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import gradio as gr
import nltk
import time
def luhn_summarizer(text_corpus):
start_time = time.time()
parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
stemmer = Stemmer("english")
summarizer = LuhnSummarizer(stemmer)
summarizer.stop_words = get_stop_words("english")
sentences = summarizer(parser.document, 25)
summary = ""
for sentence in sentences:
summary += str(sentence) + ""
end_time = time.time()
print(f"Time taken: {end_time - start_time:.2f} seconds")
return summary
def clear_everything(text_corpus, summary):
return None, None
theme = gr.themes.Soft(
primary_hue="purple",
secondary_hue="cyan",
neutral_hue="slate",
font=[
gr.themes.GoogleFont('Syne'),
gr.themes.GoogleFont('Poppins'),
gr.themes.GoogleFont('Poppins'),
gr.themes.GoogleFont('Poppins')
],
)
with gr.Blocks(theme=theme, title="Luhn Summarizer", fill_height=True) as app:
gr.HTML(
value ='''
<h1 style="text-align: center;">Luhn Summarizer</h1>
<p style="text-align: center;">This app uses a Luhn approach to summarize PDF documents based on CPU.</p>
<p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
''')
with gr.Row():
with gr.Column():
text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
with gr.Row():
clear_btn = gr.Button(value="Clear", variant='stop')
summarize_btn = gr.Button(value="Summarize", variant='primary')
summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
summarize_btn.click(
luhn_summarizer,
inputs=[text_corpus],
outputs=[summary],
concurrency_limit=25,
scroll_to_output=True,
show_api=True,
api_name="luhn_summarizer",
show_progress="full",
)
clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)