Peter commited on
Commit
8dbbc84
·
1 Parent(s): f4f4797
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import logging
2
- from pathlib import Path
3
- import os
4
  import re
 
 
5
  import gradio as gr
6
  import nltk
7
- import torch
8
  from cleantext import clean
9
 
10
  from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
@@ -78,8 +77,7 @@ def proc_submission(
78
  processed = truncate_word_count(clean_text, max_input_length)
79
  if processed["was_truncated"]:
80
  tr_in = processed["truncated_text"]
81
- history["was_truncated"] = True
82
- msg = f"Input text was truncated to {max_input_length} characters."
83
  logging.warning(msg)
84
  history["WARNING"] = msg
85
  else:
@@ -129,9 +127,9 @@ def load_examples(examples_dir="examples"):
129
  if __name__ == "__main__":
130
 
131
  model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
132
- title = "Long-form Summarization: LED & BookSum"
133
  description = (
134
- "This is a simple example of using the LED model to summarize a long-form text. This model is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the booksum dataset. the goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
135
  )
136
 
137
  gr.Interface(
@@ -162,6 +160,7 @@ if __name__ == "__main__":
162
  examples_per_page=4,
163
  title=title,
164
  description=description,
 
165
  examples=load_examples(),
166
  cache_examples=False,
167
  ).launch(enable_queue=True, )
 
1
  import logging
 
 
2
  import re
3
+ from pathlib import Path
4
+
5
  import gradio as gr
6
  import nltk
 
7
  from cleantext import clean
8
 
9
  from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
 
77
  processed = truncate_word_count(clean_text, max_input_length)
78
  if processed["was_truncated"]:
79
  tr_in = processed["truncated_text"]
80
+ msg = f"Input text was truncated to {max_input_length} words (based on whitespace)"
 
81
  logging.warning(msg)
82
  history["WARNING"] = msg
83
  else:
 
127
  if __name__ == "__main__":
128
 
129
  model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
130
+ title = "Long-Form Summarization: LED & BookSum"
131
  description = (
132
+ "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
133
  )
134
 
135
  gr.Interface(
 
160
  examples_per_page=4,
161
  title=title,
162
  description=description,
163
+ article="The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a notebook for a tutorial.",
164
  examples=load_examples(),
165
  cache_examples=False,
166
  ).launch(enable_queue=True, )