Change some text and set truncation for summarization model
Browse files
app.py
CHANGED
@@ -51,7 +51,7 @@ def get_summarizer_model():
|
|
51 |
|
52 |
# Page setup
|
53 |
st.set_page_config(
|
54 |
-
page_title="Post-processing summarization fact checker",
|
55 |
page_icon="",
|
56 |
layout="centered",
|
57 |
initial_sidebar_state="auto",
|
@@ -68,7 +68,7 @@ def list_all_article_names() -> list:
|
|
68 |
for file in sorted(os.listdir('./sample-articles/')):
|
69 |
if file.endswith('.txt'):
|
70 |
filenames.append(file.replace('.txt', ''))
|
71 |
-
|
72 |
filenames.append("Provide your own input")
|
73 |
return filenames
|
74 |
|
@@ -101,7 +101,6 @@ def fetch_dependency_specific_contents(filename: str) -> AnyStr:
|
|
101 |
|
102 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
103 |
with open(f'./dependency-images/{filename.lower()}.txt', 'r') as f:
|
104 |
-
# data = f.read()
|
105 |
lines = [line.rstrip() for line in f]
|
106 |
return lines
|
107 |
|
@@ -113,9 +112,6 @@ def display_summary(summary_content: str):
|
|
113 |
|
114 |
|
115 |
def get_all_entities_per_sentence(text):
|
116 |
-
# load all NER models
|
117 |
-
# nlp = get_spacy()
|
118 |
-
# tagger = get_flair_tagger()
|
119 |
doc = nlp(text)
|
120 |
|
121 |
sentences = list(doc.sents)
|
@@ -128,7 +124,7 @@ def get_all_entities_per_sentence(text):
|
|
128 |
for entity in sentence.ents:
|
129 |
entities_this_sentence.append(str(entity))
|
130 |
|
131 |
-
# FLAIR ENTITIES
|
132 |
# sentence_entities = Sentence(str(sentence))
|
133 |
# tagger.predict(sentence_entities)
|
134 |
# for entity in sentence_entities.get_spans('ner'):
|
@@ -150,22 +146,17 @@ def get_all_entities(text):
|
|
150 |
|
151 |
|
152 |
def get_and_compare_entities():
|
153 |
-
# article_content = fetch_article_contents(article_name)
|
154 |
article_content = st.session_state.article_text
|
155 |
all_entities_per_sentence = get_all_entities_per_sentence(article_content)
|
156 |
-
# st.session_state.entities_per_sentence_article = all_entities_per_sentence
|
157 |
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
158 |
|
159 |
-
# summary_content = fetch_summary_contents(article_name)
|
160 |
summary_content = st.session_state.summary_output
|
161 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
162 |
-
# st.session_state.entities_per_sentence_summary = all_entities_per_sentence
|
163 |
entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
164 |
|
165 |
matched_entities = []
|
166 |
unmatched_entities = []
|
167 |
for entity in entities_summary:
|
168 |
-
# TODO: currently substring matching but probably should do embedding method or idk?
|
169 |
if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
|
170 |
matched_entities.append(entity)
|
171 |
elif any(
|
@@ -179,7 +170,6 @@ def get_and_compare_entities():
|
|
179 |
|
180 |
|
181 |
def highlight_entities():
|
182 |
-
# summary_content = fetch_summary_contents(article_name)
|
183 |
summary_content = st.session_state.summary_output
|
184 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
185 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
@@ -206,11 +196,9 @@ def check_dependency(article: bool):
|
|
206 |
if article:
|
207 |
text = st.session_state.article_text
|
208 |
all_entities = get_all_entities_per_sentence(text)
|
209 |
-
# all_entities = st.session_state.entities_per_sentence_article
|
210 |
else:
|
211 |
text = st.session_state.summary_output
|
212 |
all_entities = get_all_entities_per_sentence(text)
|
213 |
-
# all_entities = st.session_state.entities_per_sentence_summary
|
214 |
doc = nlp(text)
|
215 |
tok_l = doc.to_json()['tokens']
|
216 |
test_list_dict_output = []
|
@@ -230,7 +218,6 @@ def check_dependency(article: bool):
|
|
230 |
continue
|
231 |
# ONE NEEDS TO BE ENTITY
|
232 |
if object_here in all_entities[i]:
|
233 |
-
# all_deps = all_deps.join(str(sentence))
|
234 |
identifier = object_here + t['dep'] + object_target
|
235 |
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
|
236 |
"target_word_index": (t['head'] - sentence.start),
|
@@ -261,23 +248,24 @@ def generate_abstractive_summary(text, type, min_len=120, max_len=512, **kwargs)
|
|
261 |
if type == "top_p":
|
262 |
text = summarization_model(text, min_length=min_len,
|
263 |
max_length=max_len,
|
264 |
-
top_k=50, top_p=0.95, clean_up_tokenization_spaces=True)
|
265 |
elif type == "greedy":
|
266 |
text = summarization_model(text, min_length=min_len,
|
267 |
-
max_length=max_len, clean_up_tokenization_spaces=True)
|
268 |
elif type == "top_k":
|
269 |
text = summarization_model(text, min_length=min_len, max_length=max_len, top_k=50,
|
270 |
-
clean_up_tokenization_spaces=True)
|
271 |
elif type == "beam":
|
272 |
text = summarization_model(text, min_length=min_len,
|
273 |
max_length=max_len,
|
274 |
-
clean_up_tokenization_spaces=True, **kwargs)
|
275 |
summary = text[0]['summary_text'].replace("<n>", " ")
|
276 |
return summary
|
277 |
|
278 |
|
|
|
279 |
# Page
|
280 |
-
st.title('Summarization fact checker')
|
281 |
|
282 |
# INTRODUCTION
|
283 |
st.header("Introduction")
|
@@ -286,16 +274,14 @@ several different downstream NLP tasks. One such task is that of text summarizat
|
|
286 |
is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive
|
287 |
and abstractive. **Extractive summarization** merely copies informative fragments from the input,
|
288 |
whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal
|
289 |
-
information in the input and has to be linguistically fluent. This blogpost will focus on this more difficult task of
|
290 |
abstractive summary generation.""")
|
291 |
|
292 |
st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail)
|
293 |
model, producing abstractive summaries from large articles. These summaries often contain sentences with different
|
294 |
-
kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to
|
295 |
-
the generated summaries.
|
296 |
-
|
297 |
-
results for some methods on specific examples. These text blocks will be indicated and they change according to the
|
298 |
-
currently selected article.""")
|
299 |
|
300 |
# Load all different models (cached) at start time of the hugginface space
|
301 |
sentence_embedding_model = get_sentence_embedding_model()
|
@@ -304,10 +290,11 @@ nlp = get_spacy()
|
|
304 |
summarization_model = get_summarizer_model()
|
305 |
|
306 |
# GENERATING SUMMARIES PART
|
307 |
-
st.header("Generating summaries")
|
308 |
st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide "
|
309 |
-
"text yourself. Note that it’s suggested to provide a sufficiently large
|
310 |
-
"generated from it might not be optimal, leading to suboptimal performance of the post-processing
|
|
|
311 |
|
312 |
selected_article = st.selectbox('Select an article or provide your own:',
|
313 |
list_all_article_names())
|
@@ -319,19 +306,18 @@ article_text = st.text_area(
|
|
319 |
)
|
320 |
|
321 |
summarize_button = st.button(label='Process article content',
|
322 |
-
help="
|
323 |
|
324 |
if summarize_button:
|
325 |
st.session_state.article_text = article_text
|
326 |
st.markdown(
|
327 |
-
"Below you can find the generated summary for the article.
|
328 |
-
"
|
329 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
330 |
"summaries for the same article, with different parameters (or even different models). By using "
|
331 |
"post-processing error detection, we can then select the best possible summary.")
|
332 |
if st.session_state.article_text:
|
333 |
-
with st.spinner('Generating summary...'):
|
334 |
-
# classify_comment(article_text, selected_model)
|
335 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
336 |
selected_article):
|
337 |
st.session_state.unchanged_text = True
|
@@ -367,17 +353,15 @@ if summarize_button:
|
|
367 |
|
368 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
369 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
370 |
-
st.markdown(
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
"explanation of the results below.",
|
380 |
-
unsafe_allow_html=True)
|
381 |
if st.session_state.unchanged_text:
|
382 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
383 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
@@ -397,21 +381,17 @@ if summarize_button:
|
|
397 |
st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
|
398 |
"husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely "
|
399 |
"“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that "
|
400 |
-
"are still correct. “The borders of Ukraine” have a different dependency between “borders” and
|
|
|
401 |
"than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all "
|
402 |
"dependencies between article and summary (as we did with entity matching) would not be a robust method.")
|
403 |
-
st.markdown(
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
"dependencies between an existing **entity** and its direct connections. Below we highlight all unmatched "
|
410 |
-
"dependencies that satisfy the discussed constraints. We also discuss the specific results for the "
|
411 |
-
"currently selected article.")
|
412 |
with st.spinner("Doing dependency parsing..."):
|
413 |
-
# TODO RIGHT IF FUNCTION (IF EXAMPLE AND IF INPUT UNCHANGED)
|
414 |
-
# if selected_article == 'article11':
|
415 |
if st.session_state.unchanged_text:
|
416 |
for cur_svg_image in fetch_dependency_svg(selected_article):
|
417 |
st.write(cur_svg_image, unsafe_allow_html=True)
|
@@ -431,15 +411,13 @@ if summarize_button:
|
|
431 |
|
432 |
# OUTRO/CONCLUSION
|
433 |
st.header("Wrapping up")
|
434 |
-
st.markdown(
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
"definitely not sufficiently robust for general use-cases. (something about that we tested also RE and "
|
440 |
-
"maybe other things).")
|
441 |
st.markdown("####")
|
442 |
-
st.markdown("Below we generated 5 different kind of summaries from the article in which their ranks are estimated, "
|
443 |
"and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) "
|
444 |
"will be at the top. TODO: implement this (at the end I think) and also put something in the text with "
|
445 |
"the actual parameters or something? ")
|
|
|
51 |
|
52 |
# Page setup
|
53 |
st.set_page_config(
|
54 |
+
page_title="📜 Post-processing summarization fact checker 📜",
|
55 |
page_icon="",
|
56 |
layout="centered",
|
57 |
initial_sidebar_state="auto",
|
|
|
68 |
for file in sorted(os.listdir('./sample-articles/')):
|
69 |
if file.endswith('.txt'):
|
70 |
filenames.append(file.replace('.txt', ''))
|
71 |
+
# Append free use possibility:
|
72 |
filenames.append("Provide your own input")
|
73 |
return filenames
|
74 |
|
|
|
101 |
|
102 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
103 |
with open(f'./dependency-images/{filename.lower()}.txt', 'r') as f:
|
|
|
104 |
lines = [line.rstrip() for line in f]
|
105 |
return lines
|
106 |
|
|
|
112 |
|
113 |
|
114 |
def get_all_entities_per_sentence(text):
|
|
|
|
|
|
|
115 |
doc = nlp(text)
|
116 |
|
117 |
sentences = list(doc.sents)
|
|
|
124 |
for entity in sentence.ents:
|
125 |
entities_this_sentence.append(str(entity))
|
126 |
|
127 |
+
# FLAIR ENTITIES (CURRENTLY NOT USED)
|
128 |
# sentence_entities = Sentence(str(sentence))
|
129 |
# tagger.predict(sentence_entities)
|
130 |
# for entity in sentence_entities.get_spans('ner'):
|
|
|
146 |
|
147 |
|
148 |
def get_and_compare_entities():
|
|
|
149 |
article_content = st.session_state.article_text
|
150 |
all_entities_per_sentence = get_all_entities_per_sentence(article_content)
|
|
|
151 |
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
152 |
|
|
|
153 |
summary_content = st.session_state.summary_output
|
154 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
|
|
155 |
entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
156 |
|
157 |
matched_entities = []
|
158 |
unmatched_entities = []
|
159 |
for entity in entities_summary:
|
|
|
160 |
if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
|
161 |
matched_entities.append(entity)
|
162 |
elif any(
|
|
|
170 |
|
171 |
|
172 |
def highlight_entities():
|
|
|
173 |
summary_content = st.session_state.summary_output
|
174 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
175 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
|
|
196 |
if article:
|
197 |
text = st.session_state.article_text
|
198 |
all_entities = get_all_entities_per_sentence(text)
|
|
|
199 |
else:
|
200 |
text = st.session_state.summary_output
|
201 |
all_entities = get_all_entities_per_sentence(text)
|
|
|
202 |
doc = nlp(text)
|
203 |
tok_l = doc.to_json()['tokens']
|
204 |
test_list_dict_output = []
|
|
|
218 |
continue
|
219 |
# ONE NEEDS TO BE ENTITY
|
220 |
if object_here in all_entities[i]:
|
|
|
221 |
identifier = object_here + t['dep'] + object_target
|
222 |
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
|
223 |
"target_word_index": (t['head'] - sentence.start),
|
|
|
248 |
if type == "top_p":
|
249 |
text = summarization_model(text, min_length=min_len,
|
250 |
max_length=max_len,
|
251 |
+
top_k=50, top_p=0.95, clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
252 |
elif type == "greedy":
|
253 |
text = summarization_model(text, min_length=min_len,
|
254 |
+
max_length=max_len, clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
255 |
elif type == "top_k":
|
256 |
text = summarization_model(text, min_length=min_len, max_length=max_len, top_k=50,
|
257 |
+
clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
258 |
elif type == "beam":
|
259 |
text = summarization_model(text, min_length=min_len,
|
260 |
max_length=max_len,
|
261 |
+
clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
262 |
summary = text[0]['summary_text'].replace("<n>", " ")
|
263 |
return summary
|
264 |
|
265 |
|
266 |
+
|
267 |
# Page
|
268 |
+
st.title('📜 Summarization fact checker 📜')
|
269 |
|
270 |
# INTRODUCTION
|
271 |
st.header("Introduction")
|
|
|
274 |
is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive
|
275 |
and abstractive. **Extractive summarization** merely copies informative fragments from the input,
|
276 |
whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal
|
277 |
+
information in the input and has to be linguistically fluent. This interactive blogpost will focus on this more difficult task of
|
278 |
abstractive summary generation.""")
|
279 |
|
280 |
st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail)
|
281 |
model, producing abstractive summaries from large articles. These summaries often contain sentences with different
|
282 |
+
kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to detect errors
|
283 |
+
from the generated summaries. Throughout this blog, we will also explain the results for some methods on specific
|
284 |
+
examples. These text blocks will be indicated and they change according to the currently selected article.""")
|
|
|
|
|
285 |
|
286 |
# Load all different models (cached) at start time of the hugginface space
|
287 |
sentence_embedding_model = get_sentence_embedding_model()
|
|
|
290 |
summarization_model = get_summarizer_model()
|
291 |
|
292 |
# GENERATING SUMMARIES PART
|
293 |
+
st.header("🪶 Generating summaries")
|
294 |
st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide "
|
295 |
+
"text yourself. Note that it’s suggested to provide a sufficiently large article, as otherwise the "
|
296 |
+
"summary generated from it might not be optimal, leading to suboptimal performance of the post-processing "
|
297 |
+
"steps. However, too long articles will be truncated and might miss information in the summary.")
|
298 |
|
299 |
selected_article = st.selectbox('Select an article or provide your own:',
|
300 |
list_all_article_names())
|
|
|
306 |
)
|
307 |
|
308 |
summarize_button = st.button(label='Process article content',
|
309 |
+
help="Start interactive blogpost")
|
310 |
|
311 |
if summarize_button:
|
312 |
st.session_state.article_text = article_text
|
313 |
st.markdown(
|
314 |
+
"Below you can find the generated summary for the article. We will discuss two approaches that we found are "
|
315 |
+
"able to detect some common errors. Based on errors, one could then score different summaries, indicating how "
|
316 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
317 |
"summaries for the same article, with different parameters (or even different models). By using "
|
318 |
"post-processing error detection, we can then select the best possible summary.")
|
319 |
if st.session_state.article_text:
|
320 |
+
with st.spinner('Generating summary, this might take a while...'):
|
|
|
321 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
322 |
selected_article):
|
323 |
st.session_state.unchanged_text = True
|
|
|
353 |
|
354 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
355 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
356 |
+
st.markdown(
|
357 |
+
"We call this technique “entity matching” and here you can see what this looks like when we apply this "
|
358 |
+
"method on the summary. Entities in the summary are marked " + green_text + " when the entity also "
|
359 |
+
"exists in the article, "
|
360 |
+
"while unmatched entities "
|
361 |
+
"are marked " + red_text +
|
362 |
+
". Several of the example articles and their summaries indicate different errors we find by using this "
|
363 |
+
"technique. Based on the current article, we provide a short explanation of the results below **(only for "
|
364 |
+
"example articles)**. ", unsafe_allow_html=True)
|
|
|
|
|
365 |
if st.session_state.unchanged_text:
|
366 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
367 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
|
|
381 |
st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
|
382 |
"husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely "
|
383 |
"“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that "
|
384 |
+
"are still correct. “The borders of Ukraine” have a different dependency between “borders” and "
|
385 |
+
"“Ukraine” "
|
386 |
"than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all "
|
387 |
"dependencies between article and summary (as we did with entity matching) would not be a robust method.")
|
388 |
+
st.markdown("However, we have found that there are specific dependencies that, when unmatched, are often an "
|
389 |
+
"indication of a wrongly constructed sentence. We found 2(/3 TODO) common dependencies which, "
|
390 |
+
"when present in the summary but not in the article, are highly indicative of factualness errors. "
|
391 |
+
"Furthermore, we only check dependencies between an existing **entity** and its direct connections. "
|
392 |
+
"Below we highlight all unmatched dependencies that satisfy the discussed constraints. We also "
|
393 |
+
"discuss the specific results for the currently selected example article.")
|
|
|
|
|
|
|
394 |
with st.spinner("Doing dependency parsing..."):
|
|
|
|
|
395 |
if st.session_state.unchanged_text:
|
396 |
for cur_svg_image in fetch_dependency_svg(selected_article):
|
397 |
st.write(cur_svg_image, unsafe_allow_html=True)
|
|
|
411 |
|
412 |
# OUTRO/CONCLUSION
|
413 |
st.header("Wrapping up")
|
414 |
+
st.markdown("We have presented 2 methods that try to detect errors in summaries via post-processing steps. Entity "
|
415 |
+
"matching can be used to solve hallucinations, while dependency comparison can be used to filter out "
|
416 |
+
"some bad sentences (and thus worse summaries). These methods highlight the possibilities of "
|
417 |
+
"post-processing AI-made summaries, but are only a first introduction. As the methods were "
|
418 |
+
"empirically tested they are definitely not sufficiently robust for general use-cases.")
|
|
|
|
|
419 |
st.markdown("####")
|
420 |
+
st.markdown("(TODO) Below we generated 5 different kind of summaries from the article in which their ranks are estimated, "
|
421 |
"and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) "
|
422 |
"will be at the top. TODO: implement this (at the end I think) and also put something in the text with "
|
423 |
"the actual parameters or something? ")
|