import nltk import validators import streamlit as st from transformers import AutoTokenizer, pipeline # local modules from extractive_summarizer.model_processors import Summarizer from utils import ( clean_text, fetch_article_text, preprocess_text_for_abstractive_summarization, read_text_from_file, ) from rouge import Rouge if __name__ == "__main__": # --------------------------------- # Main Application # --------------------------------- st.title("Anavya-Text Summarizer 📝") st.markdown("Creator: [Team Anavya](Sharvesh") summarize_type = st.sidebar.selectbox( "Summarization type", options=["Extractive", "Abstractive"] ) st.markdown( "Children with learning disabilities face unique challenges in the education system. These challenges can include difficulties in reading, comprehending, and summarizing lengthy texts, making it harder for them to keep up with their peers. However, the use of text summarization technology can be a game-changer in improving their learning experiences " ) st.markdown( "Enter a text or a url to get a concise summary of the article while conserving the overall meaning. This app supports text in the following formats:" ) st.markdown( """- Raw text in text box - URL of article/news to be summarized - .txt, .pdf, .docx file formats""" ) st.markdown( """This app supports two type of summarization: 1. **Extractive Summarization**: The extractive approach involves picking up the most important phrases and lines from the documents. It then combines all the important lines to create the summary. So, in this case, every line and word of the summary actually belongs to the original document which is summarized. 2. **Abstractive Summarization**: The abstractive approach involves rephrasing the complete document while capturing the complete meaning of the document. This type of summarization provides more human-like summary""" ) st.markdown("---") # --------------------------- # SETUP & Constants nltk.download("punkt") abs_tokenizer_name = "facebook/bart-large-cnn" abs_model_name = "facebook/bart-large-cnn" abs_tokenizer = AutoTokenizer.from_pretrained(abs_tokenizer_name) abs_max_length = 250 abs_min_length = 30 # --------------------------- inp_text = st.text_input("Enter text or a url here") st.markdown( "