pszemraj commited on
Commit
9bc2923
β€’
1 Parent(s): 93b2cca

πŸ’„ reorg UI

Browse files

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

Files changed (1) hide show
  1. app.py +61 -52
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import logging
 
 
2
  import time
3
  from pathlib import Path
4
 
@@ -64,7 +66,14 @@ def proc_submission(
64
 
65
  if processed["was_truncated"]:
66
  tr_in = processed["truncated_text"]
67
- msg = f"Input text was truncated to {max_input_length} words (based on whitespace)"
 
 
 
 
 
 
 
68
  logging.warning(msg)
69
  history["WARNING"] = msg
70
  else:
@@ -92,7 +101,7 @@ def proc_submission(
92
  html = ""
93
  html += f"<p>Runtime: {rt} minutes on CPU</p>"
94
  if msg is not None:
95
- html += f"<h2>WARNING:</h2><hr><b>{msg}</b><br><br>"
96
 
97
  html += ""
98
 
@@ -152,7 +161,7 @@ if __name__ == "__main__":
152
  name_to_path = load_example_filenames(_here / "examples")
153
  logging.info(f"Loaded {len(name_to_path)} examples")
154
  demo = gr.Blocks()
155
-
156
  with demo:
157
 
158
  gr.Markdown("# Long-Form Summarization: LED & BookSum")
@@ -167,66 +176,37 @@ if __name__ == "__main__":
167
  )
168
  with gr.Row():
169
  model_size = gr.Radio(
170
- choices=["base", "large"], label="Model Variant", value="large"
171
  )
172
  num_beams = gr.Radio(
173
  choices=[2, 3, 4],
174
  label="Beam Search: # of Beams",
175
  value=2,
176
  )
177
- gr.Markdown(
178
- "_The base model is less performant than the large model, but is faster and will accept up to 2048 words per input (Large model accepts up to 768)._"
179
- )
180
- with gr.Row():
181
- length_penalty = gr.inputs.Slider(
182
- minimum=0.5,
183
- maximum=1.0,
184
- label="length penalty",
185
- default=0.7,
186
- step=0.05,
187
- )
188
- token_batch_length = gr.Radio(
189
- choices=[512, 768, 1024],
190
- label="token batch length",
191
- value=512,
192
- )
193
-
194
- with gr.Row():
195
- repetition_penalty = gr.inputs.Slider(
196
- minimum=1.0,
197
- maximum=5.0,
198
- label="repetition penalty",
199
- default=3.5,
200
- step=0.1,
201
- )
202
- no_repeat_ngram_size = gr.Radio(
203
- choices=[2, 3, 4],
204
- label="no repeat ngram size",
205
- value=3,
206
- )
207
  with gr.Row():
208
  example_name = gr.Dropdown(
209
- list(name_to_path.keys()),
210
- label="Choose an Example",
 
211
  )
212
- load_examples_button = gr.Button(
213
- "Load Example",
214
- )
215
- input_text = gr.Textbox(
216
- lines=6,
217
- label="Input Text (for summarization)",
218
- placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
219
- )
220
- gr.Markdown("Upload your own file:")
221
- with gr.Row():
222
  uploaded_file = gr.File(
223
- label="Upload a text file",
224
  file_count="single",
225
  type="file",
226
  )
227
- load_file_button = gr.Button("Load Uploaded File")
228
-
229
- gr.Markdown("---")
 
 
 
 
 
 
 
 
 
230
 
231
  with gr.Column():
232
  gr.Markdown("## Generate Summary")
@@ -250,10 +230,39 @@ if __name__ == "__main__":
250
  label="Summary Scores", placeholder="Summary scores will appear here"
251
  )
252
 
253
- gr.Markdown("---")
254
 
255
  with gr.Column():
256
- gr.Markdown("## About the Model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  gr.Markdown(
258
  "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
259
  )
 
1
  import logging
2
+ import random
3
+ import re
4
  import time
5
  from pathlib import Path
6
 
 
66
 
67
  if processed["was_truncated"]:
68
  tr_in = processed["truncated_text"]
69
+ # create elaborate HTML warning
70
+ input_wc = re.split(r"\s+", input_text)
71
+ msg = f"""
72
+ <div style="background-color: #FFA500; color: white; padding: 20px;">
73
+ <h3>Warning</h3>
74
+ <p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
75
+ </div>
76
+ """
77
  logging.warning(msg)
78
  history["WARNING"] = msg
79
  else:
 
101
  html = ""
102
  html += f"<p>Runtime: {rt} minutes on CPU</p>"
103
  if msg is not None:
104
+ html += msg
105
 
106
  html += ""
107
 
 
161
  name_to_path = load_example_filenames(_here / "examples")
162
  logging.info(f"Loaded {len(name_to_path)} examples")
163
  demo = gr.Blocks()
164
+ _examples = list(name_to_path.keys())
165
  with demo:
166
 
167
  gr.Markdown("# Long-Form Summarization: LED & BookSum")
 
176
  )
177
  with gr.Row():
178
  model_size = gr.Radio(
179
+ choices=["base", "large"], label="Model Variant", value="base"
180
  )
181
  num_beams = gr.Radio(
182
  choices=[2, 3, 4],
183
  label="Beam Search: # of Beams",
184
  value=2,
185
  )
186
+ gr.Markdown("Select an example, or upload a `.txt` file")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  with gr.Row():
188
  example_name = gr.Dropdown(
189
+ _examples,
190
+ label="Examples",
191
+ value=random.choice(_examples),
192
  )
 
 
 
 
 
 
 
 
 
 
193
  uploaded_file = gr.File(
194
+ label="File Upload",
195
  file_count="single",
196
  type="file",
197
  )
198
+ with gr.Row():
199
+ input_text = gr.Textbox(
200
+ lines=4,
201
+ label="Input Text (for summarization)",
202
+ placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
203
+ )
204
+ with gr.Column(min_width=100, scale=0.5):
205
+ load_examples_button = gr.Button(
206
+ "Load Example",
207
+ )
208
+ load_file_button = gr.Button("Upload File")
209
+ gr.Markdown("---")
210
 
211
  with gr.Column():
212
  gr.Markdown("## Generate Summary")
 
230
  label="Summary Scores", placeholder="Summary scores will appear here"
231
  )
232
 
233
+ gr.Markdown("---")
234
 
235
  with gr.Column():
236
+ gr.Markdown("### Advanced Settings")
237
+ with gr.Row():
238
+ length_penalty = gr.inputs.Slider(
239
+ minimum=0.5,
240
+ maximum=1.0,
241
+ label="length penalty",
242
+ default=0.7,
243
+ step=0.05,
244
+ )
245
+ token_batch_length = gr.Radio(
246
+ choices=[512, 768, 1024, 1536],
247
+ label="token batch length",
248
+ value=1024,
249
+ )
250
+
251
+ with gr.Row():
252
+ repetition_penalty = gr.inputs.Slider(
253
+ minimum=1.0,
254
+ maximum=5.0,
255
+ label="repetition penalty",
256
+ default=3.5,
257
+ step=0.1,
258
+ )
259
+ no_repeat_ngram_size = gr.Radio(
260
+ choices=[2, 3, 4],
261
+ label="no repeat ngram size",
262
+ value=3,
263
+ )
264
+ with gr.Column():
265
+ gr.Markdown("### About the Model")
266
  gr.Markdown(
267
  "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
268
  )