Spaces:
Runtime error
Runtime error
from newspaper import Article | |
from newspaper import Config | |
import nltk | |
nltk.download('punkt') | |
from transformers import pipeline | |
import gradio as gr | |
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0' | |
config = Config() | |
config.browser_user_agent = USER_AGENT | |
config.request_timeout = 10 | |
url = 'https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/' | |
article = Article(url, config=config) | |
article.download() | |
article.parse() | |
authors = ", ".join(author for author in article.authors) | |
title = article.title | |
date = article.publish_date | |
text = article.text | |
image = article.top_image | |
videos = article.movies | |
url = article.url | |
print("Information about the article") | |
print("=" * 30) | |
print(f"Title: {title}") | |
print(f"Author(s): {authors}") | |
print(f"Publish date: {date}") | |
print(f"Image: {image}") | |
print(f"Videos: {videos}") | |
print(f"Article link: {url}") | |
print(f"Content: {text[:100] + '...'}") | |
article.nlp() | |
keywords = article.keywords | |
keywords.sort() | |
print(keywords) | |
print(f"Summary: \n{article.summary}") | |
io1 = gr.Interface.load('huggingface/sshleifer/distilbart-cnn-12-6') | |
io2 = gr.Interface.load("huggingface/facebook/bart-large-cnn") | |
io3 = gr.Interface.load("huggingface/google/pegasus-xsum") | |
io4 = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-6-6") | |
# Use Parallel and Series directly from gradio | |
iface = gr.Parallel(io1, io2, io3, io4, | |
theme='huggingface', | |
inputs=gr.inputs.Textbox(lines=10, label="Text")) | |
iface.launch() | |
def extract_article_text(url): | |
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0' | |
config = Config() | |
config.browser_user_agent = USER_AGENT | |
config.request_timeout = 10 | |
article = Article(url, config=config) | |
article.download() | |
article.parse() | |
text = article.text | |
return text | |
extractor = gr.Interface(extract_article_text, 'text', 'text') | |
summarizer = gr.Interface.load("huggingface/facebook/bart-large-cnn") | |
sample_url = [['https://www.technologyreview.com/2021/07/22/1029973/deepmind-alphafold-protein-folding-biology-disease-drugs-proteome/'], | |
['https://www.technologyreview.com/2021/07/21/1029860/disability-rights-employment-discrimination-ai-hiring/'], | |
['https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/']] | |
desc = ''' | |
Let Hugging Face models summarize articles for you. | |
Note: Shorter articles generate faster summaries. | |
This summarizer uses bart-large-cnn model by Facebook | |
''' | |
# Use Parallel and Series directly from gradio | |
iface = gr.Series(extractor, summarizer, | |
inputs=gr.inputs.Textbox(lines=2, label='URL'), | |
outputs='text', | |
title='News Summarizer', | |
theme='huggingface', | |
description=desc, | |
examples=sample_url) | |
iface.launch() | |