raannakasturi commited on
Commit
9137ef8
·
1 Parent(s): 4fe226a

Add Luhn Summarizer app with Gradio interface and required dependencies

Browse files
Files changed (2) hide show
  1. app.py +68 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sumy.parsers.plaintext import PlaintextParser
2
+ from sumy.nlp.tokenizers import Tokenizer
3
+ from sumy.summarizers.luhn import LuhnSummarizer
4
+ from sumy.nlp.stemmers import Stemmer
5
+ from sumy.utils import get_stop_words
6
+ import gradio as gr
7
+ import nltk
8
+ import time
9
+
10
+ def luhn_summarizer(text_corpus):
11
+ start_time = time.time()
12
+ parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
13
+ stemmer = Stemmer("english")
14
+ summarizer = LuhnSummarizer(stemmer)
15
+ summarizer.stop_words = get_stop_words("english")
16
+ sentences = summarizer(parser.document, 10)
17
+ summary = ""
18
+ for sentence in sentences:
19
+ summary += str(sentence) + ""
20
+ end_time = time.time()
21
+ print(f"Time taken: {end_time - start_time:.2f} seconds")
22
+ return summary
23
+
24
+ def clear_everything(text_corpus, summary):
25
+ return None, None
26
+
27
+ theme = gr.themes.Soft(
28
+ primary_hue="purple",
29
+ secondary_hue="cyan",
30
+ neutral_hue="slate",
31
+ font=[
32
+ gr.themes.GoogleFont('Syne'),
33
+ gr.themes.GoogleFont('Poppins'),
34
+ gr.themes.GoogleFont('Poppins'),
35
+ gr.themes.GoogleFont('Poppins')
36
+ ],
37
+ )
38
+
39
+ with gr.Blocks(theme=theme, title="Luhn Summarizer", fill_height=True) as app:
40
+ gr.HTML(
41
+ value ='''
42
+ <h1 style="text-align: center;">Luhn Summarizer</h1>
43
+ <p style="text-align: center;">This app uses a Luhn approach to summarize PDF documents based on CPU.</p>
44
+ <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
45
+ ''')
46
+ with gr.Row():
47
+ with gr.Column():
48
+ text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
49
+ with gr.Row():
50
+ clear_btn = gr.Button(value="Clear", variant='stop')
51
+ summarize_btn = gr.Button(value="Summarize", variant='primary')
52
+ summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
53
+
54
+ summarize_btn.click(
55
+ luhn_summarizer,
56
+ inputs=[text_corpus],
57
+ outputs=[summary],
58
+ concurrency_limit=25,
59
+ scroll_to_output=True,
60
+ show_api=True,
61
+ api_name="luhn_summarizer",
62
+ show_progress="full",
63
+ )
64
+ clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
65
+
66
+ nltk.download('punkt', quiet=True)
67
+ nltk.download('punkt_tab', quiet=True)
68
+ app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.8.0
2
+ sumy==0.11.0
3
+ nltk==3.9.1