Spaces:

7jimmy
/

Build_a_News_Article_Summarizer

Runtime error

File size: 3,000 Bytes

from newspaper import Article
from newspaper import Config
import nltk
nltk.download('punkt')

from transformers import pipeline
import gradio as gr

USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'

config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10

url = 'https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/'
article = Article(url, config=config)
article.download() 
article.parse() 

authors = ", ".join(author for author in article.authors)
title = article.title
date = article.publish_date
text = article.text
image = article.top_image
videos = article.movies
url = article.url
print("Information about the article")
print("=" * 30)
print(f"Title: {title}")
print(f"Author(s): {authors}")
print(f"Publish date: {date}")
print(f"Image: {image}")
print(f"Videos: {videos}")
print(f"Article link: {url}")
print(f"Content: {text[:100] + '...'}")
article.nlp()
keywords = article.keywords
keywords.sort()
print(keywords)
print(f"Summary: \n{article.summary}")

io1 = gr.Interface.load('huggingface/sshleifer/distilbart-cnn-12-6')
io2 = gr.Interface.load("huggingface/facebook/bart-large-cnn")
io3 = gr.Interface.load("huggingface/google/pegasus-xsum")  
io4 = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-6-6")                   

# Use Parallel and Series directly from gradio
iface = gr.Parallel(io1, io2, io3, io4,
                    theme='huggingface', 
                    inputs=gr.inputs.Textbox(lines=10, label="Text"))

iface.launch()

def extract_article_text(url):
    USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
    config = Config()
    config.browser_user_agent = USER_AGENT
    config.request_timeout = 10

    article = Article(url, config=config)
    article.download()
    article.parse()
    text = article.text
    return text

extractor = gr.Interface(extract_article_text, 'text', 'text')
summarizer = gr.Interface.load("huggingface/facebook/bart-large-cnn")

sample_url = [['https://www.technologyreview.com/2021/07/22/1029973/deepmind-alphafold-protein-folding-biology-disease-drugs-proteome/'],
              ['https://www.technologyreview.com/2021/07/21/1029860/disability-rights-employment-discrimination-ai-hiring/'],
              ['https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/']]

desc = '''
        Let Hugging Face models summarize articles for you. 
        Note: Shorter articles generate faster summaries.
        This summarizer uses bart-large-cnn model by Facebook
        '''

# Use Parallel and Series directly from gradio
iface = gr.Series(extractor, summarizer, 
                   inputs=gr.inputs.Textbox(lines=2, label='URL'),
                   outputs='text',
                   title='News Summarizer',
                   theme='huggingface',
                   description=desc,
                   examples=sample_url)

iface.launch()