7jimmy's picture
Update app.py
8ee43ad
from newspaper import Article
from newspaper import Config
import nltk
nltk.download('punkt')
from transformers import pipeline
import gradio as gr
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10
url = 'https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/'
article = Article(url, config=config)
article.download()
article.parse()
authors = ", ".join(author for author in article.authors)
title = article.title
date = article.publish_date
text = article.text
image = article.top_image
videos = article.movies
url = article.url
print("Information about the article")
print("=" * 30)
print(f"Title: {title}")
print(f"Author(s): {authors}")
print(f"Publish date: {date}")
print(f"Image: {image}")
print(f"Videos: {videos}")
print(f"Article link: {url}")
print(f"Content: {text[:100] + '...'}")
article.nlp()
keywords = article.keywords
keywords.sort()
print(keywords)
print(f"Summary: \n{article.summary}")
io1 = gr.Interface.load('huggingface/sshleifer/distilbart-cnn-12-6')
io2 = gr.Interface.load("huggingface/facebook/bart-large-cnn")
io3 = gr.Interface.load("huggingface/google/pegasus-xsum")
io4 = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-6-6")
# Use Parallel and Series directly from gradio
iface = gr.Parallel(io1, io2, io3, io4,
theme='huggingface',
inputs=gr.inputs.Textbox(lines=10, label="Text"))
iface.launch()
def extract_article_text(url):
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10
article = Article(url, config=config)
article.download()
article.parse()
text = article.text
return text
extractor = gr.Interface(extract_article_text, 'text', 'text')
summarizer = gr.Interface.load("huggingface/facebook/bart-large-cnn")
sample_url = [['https://www.technologyreview.com/2021/07/22/1029973/deepmind-alphafold-protein-folding-biology-disease-drugs-proteome/'],
['https://www.technologyreview.com/2021/07/21/1029860/disability-rights-employment-discrimination-ai-hiring/'],
['https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/']]
desc = '''
Let Hugging Face models summarize articles for you.
Note: Shorter articles generate faster summaries.
This summarizer uses bart-large-cnn model by Facebook
'''
# Use Parallel and Series directly from gradio
iface = gr.Series(extractor, summarizer,
inputs=gr.inputs.Textbox(lines=2, label='URL'),
outputs='text',
title='News Summarizer',
theme='huggingface',
description=desc,
examples=sample_url)
iface.launch()