import logging from bs4 import BeautifulSoup import requests import nltk from transformers import pipeline import gradio as gr from newsapi import NewsApiClient import asyncio # Configure logging logging.basicConfig(level=logging.DEBUG) # Initialize the summarization pipeline from Hugging Face Transformers summarizer = pipeline("summarization") # Initialize the NLTK sentence tokenizer nltk.download('punkt') # Initialize the News API client with your API key newsapi = NewsApiClient(api_key='5ab7bb1aaceb41b8993db03477098aad') # Function to fetch content from a given URL def fetch_article_content(url): try: r = requests.get(url) soup = BeautifulSoup(r.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] return ' '.join(text) except Exception as e: logging.error(f"Error fetching content from {url}: {e}") return "" # Function to summarize news articles based on a query async def summarize_news(query, num_results=3): logging.debug(f"Query received: {query}") logging.debug(f"Number of results requested: {num_results}") # Search for news articles logging.debug("Searching for news articles...") articles = [] aggregated_content = "" try: news_results = newsapi.get_everything(q=query, language='en', page_size=num_results) logging.debug(f"Search results: {news_results}") for article in news_results['articles']: url = article['url'] logging.debug(f"Fetching content from URL: {url}") content = fetch_article_content(url) aggregated_content += content + " " except Exception as e: logging.error(f"Error fetching news articles: {e}") # Summarize the aggregated content try: # Chunk the aggregated content into chunks sentences = nltk.sent_tokenize(aggregated_content) chunk_size = 500 # Adjust chunk size as needed chunks = [sentences[i:i + chunk_size] for i in range(0, len(sentences), chunk_size)] # Summarize each chunk separately summaries = [] for chunk in chunks: chunk_text = ' '.join(chunk) summary = summarizer(chunk_text, max_length=120, min_length=30, do_sample=False) summaries.append(summary[0]['summary_text']) # Combine all summaries final_summary = ' '.join(summaries) logging.debug(f"Final summarized text: {final_summary}") return final_summary except Exception as e: logging.error(f"Error during summarization: {e}") return "An error occurred during summarization." # Setting up Gradio interface iface = gr.Interface( fn=summarize_news, inputs=[ gr.Textbox(label="Query"), gr.Slider(minimum=1, maximum=10, value=3, label="Number of Results") ], outputs="textbox", title="News Summarizer", description="Enter a query to get a consolidated summary of the top news articles." ) if __name__ == "__main__": iface.launch()