Update app.py
Browse files
app.py
CHANGED
@@ -254,11 +254,11 @@ def highlight_entities(article_content,summary_output):
|
|
254 |
matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
|
255 |
|
256 |
for entity in matched_entities:
|
257 |
-
|
258 |
|
259 |
for entity in unmatched_entities:
|
260 |
-
|
261 |
-
soup = BeautifulSoup(
|
262 |
return HTML_WRAPPER.format(soup)
|
263 |
|
264 |
|
@@ -338,11 +338,12 @@ def schleifer_model():
|
|
338 |
device=0 if torch.cuda.is_available() else -1)
|
339 |
return summarizer
|
340 |
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
|
|
346 |
|
347 |
@st.experimental_singleton(suppress_st_warning=True)
|
348 |
def get_sentence_embedding_model():
|
@@ -450,8 +451,8 @@ if summarize:
|
|
450 |
text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
451 |
):
|
452 |
summarizer_model = facebook_model()
|
453 |
-
summarized_text = summarizer_model(text_to_summarize
|
454 |
-
summarized_text = ' '.join([summ['summary_text']
|
455 |
|
456 |
elif model_type == "Sshleifer-DistilBart":
|
457 |
if url_text:
|
@@ -463,25 +464,26 @@ if summarize:
|
|
463 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
464 |
):
|
465 |
summarizer_model = schleifer_model()
|
466 |
-
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
|
467 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
468 |
|
469 |
elif model_type == "Google-Pegasus":
|
470 |
if url_text:
|
471 |
-
text_to_summarize = cleaned_text
|
|
|
472 |
else:
|
473 |
-
text_to_summarize = cleaned_text
|
474 |
|
475 |
with st.spinner(
|
476 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
477 |
):
|
478 |
summarizer_model = google_model()
|
479 |
-
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
|
480 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
481 |
|
482 |
with st.spinner("Calculating and matching entities, this takes a few seconds..."):
|
483 |
|
484 |
-
entity_match_html = highlight_entities(' '.join(
|
485 |
st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
|
486 |
st.markdown("####")
|
487 |
|
|
|
254 |
matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
|
255 |
|
256 |
for entity in matched_entities:
|
257 |
+
summary_output = summary_output.replace(entity, markdown_start_green + entity + markdown_end)
|
258 |
|
259 |
for entity in unmatched_entities:
|
260 |
+
summary_output = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
|
261 |
+
soup = BeautifulSoup(summary_output, features="html.parser")
|
262 |
return HTML_WRAPPER.format(soup)
|
263 |
|
264 |
|
|
|
338 |
device=0 if torch.cuda.is_available() else -1)
|
339 |
return summarizer
|
340 |
|
341 |
+
@st.experimental_singleton(suppress_st_warning=True)
|
342 |
+
def google_model():
|
343 |
+
model_name = 'google/pegasus-cnn_dailymail'
|
344 |
+
summarizer = pipeline('summarization',model=model=model_name, tokenizer=model_name,
|
345 |
+
device=0 if torch.cuda.is_available() else -1)
|
346 |
+
return summarizer
|
347 |
|
348 |
@st.experimental_singleton(suppress_st_warning=True)
|
349 |
def get_sentence_embedding_model():
|
|
|
451 |
text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
452 |
):
|
453 |
summarizer_model = facebook_model()
|
454 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
|
455 |
+
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
456 |
|
457 |
elif model_type == "Sshleifer-DistilBart":
|
458 |
if url_text:
|
|
|
464 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
465 |
):
|
466 |
summarizer_model = schleifer_model()
|
467 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
|
468 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
469 |
|
470 |
elif model_type == "Google-Pegasus":
|
471 |
if url_text:
|
472 |
+
text_to_summarize = cleaned_text[0]
|
473 |
+
|
474 |
else:
|
475 |
+
text_to_summarize = cleaned_text[0]
|
476 |
|
477 |
with st.spinner(
|
478 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
479 |
):
|
480 |
summarizer_model = google_model()
|
481 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
|
482 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
483 |
|
484 |
with st.spinner("Calculating and matching entities, this takes a few seconds..."):
|
485 |
|
486 |
+
entity_match_html = highlight_entities(' '.join(text_to_summarize),summarized_text)
|
487 |
st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
|
488 |
st.markdown("####")
|
489 |
|