Spaces:
Sleeping
Sleeping
Peter
commited on
Commit
·
98a3ea7
1
Parent(s):
8281a66
add base model for faster rt
Browse files
app.py
CHANGED
@@ -43,6 +43,7 @@ def truncate_word_count(text, max_words=512):
|
|
43 |
|
44 |
def proc_submission(
|
45 |
input_text: str,
|
|
|
46 |
num_beams,
|
47 |
token_batch_length,
|
48 |
length_penalty,
|
@@ -74,6 +75,7 @@ def proc_submission(
|
|
74 |
|
75 |
history = {}
|
76 |
clean_text = clean(input_text, lower=False)
|
|
|
77 |
processed = truncate_word_count(clean_text, max_input_length)
|
78 |
if processed["was_truncated"]:
|
79 |
tr_in = processed["truncated_text"]
|
@@ -86,8 +88,8 @@ def proc_submission(
|
|
86 |
|
87 |
_summaries = summarize_via_tokenbatches(
|
88 |
tr_in,
|
89 |
-
model,
|
90 |
-
tokenizer,
|
91 |
batch_length=token_batch_length,
|
92 |
**settings,
|
93 |
)
|
@@ -128,6 +130,7 @@ def load_examples(examples_dir="examples"):
|
|
128 |
if __name__ == "__main__":
|
129 |
|
130 |
model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
|
|
|
131 |
title = "Long-Form Summarization: LED & BookSum"
|
132 |
description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
|
133 |
|
@@ -137,8 +140,9 @@ if __name__ == "__main__":
|
|
137 |
gr.inputs.Textbox(
|
138 |
lines=10,
|
139 |
label="input text",
|
140 |
-
placeholder="Enter text to summarize, the text will be cleaned and truncated
|
141 |
),
|
|
|
142 |
gr.inputs.Slider(
|
143 |
minimum=1, maximum=6, label="num_beams", default=4, step=1
|
144 |
),
|
|
|
43 |
|
44 |
def proc_submission(
|
45 |
input_text: str,
|
46 |
+
model_size: str,
|
47 |
num_beams,
|
48 |
token_batch_length,
|
49 |
length_penalty,
|
|
|
75 |
|
76 |
history = {}
|
77 |
clean_text = clean(input_text, lower=False)
|
78 |
+
max_input_length = 1024 if model_size == "base" else max_input_length
|
79 |
processed = truncate_word_count(clean_text, max_input_length)
|
80 |
if processed["was_truncated"]:
|
81 |
tr_in = processed["truncated_text"]
|
|
|
88 |
|
89 |
_summaries = summarize_via_tokenbatches(
|
90 |
tr_in,
|
91 |
+
model_sm if model_size == "base" else model,
|
92 |
+
tokenizer_sm if model_size == "base" else tokenizer,
|
93 |
batch_length=token_batch_length,
|
94 |
**settings,
|
95 |
)
|
|
|
130 |
if __name__ == "__main__":
|
131 |
|
132 |
model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
|
133 |
+
model_sm, tokenizer_sm = load_model_and_tokenizer("pszemraj/led-base-book-summary")
|
134 |
title = "Long-Form Summarization: LED & BookSum"
|
135 |
description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
|
136 |
|
|
|
140 |
gr.inputs.Textbox(
|
141 |
lines=10,
|
142 |
label="input text",
|
143 |
+
placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
|
144 |
),
|
145 |
+
gr.inputs.radio(choices=['base', 'large'], label='model size', default='base'),
|
146 |
gr.inputs.Slider(
|
147 |
minimum=1, maximum=6, label="num_beams", default=4, step=1
|
148 |
),
|