import requests from bs4 import BeautifulSoup import streamlit as st import time import random # Target URL url = "https://m.news.naver.com/rankingList" # Header settings (User-Agent and Referer) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0", "Referer": "https://m.news.naver.com/" } # Random delay function def random_delay(): delay = random.uniform(1, 3) # Random delay between 1 to 3 seconds time.sleep(delay) # Function to scrape ranking news def scrape_ranking_news(): random_delay() # Apply random delay response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") # Select HTML sections containing the data ranking_news_sections = soup.find_all("div", class_="rankingnews_box") news_list = [] for section in ranking_news_sections: publisher = section.find("strong", class_="rankingnews_name").text # Extract publisher name articles = section.find_all("li") for article in articles: rank = article.find("em", class_="list_ranking_num").text title = article.find("strong", class_="list_title").text published_time = article.find("span", class_="list_time").text link = article.find("a")['href'] image = article.find("img")['src'] news_list.append({ "rank": rank, "title": title, "time": published_time, "link": link, "image": image, "publisher": publisher }) return news_list # Main title st.title("Daily News Scrap in Korea") # Execution button if st.button("Start News Scraping"): # Scrape ranking news data news_data = scrape_ranking_news() # Display in 5x5 grid layout, articles from same publisher in one row num_columns = 5 col_count = 0 cols = st.columns(num_columns) for index, news in enumerate(news_data): with cols[col_count]: st.image(news['image']) st.write(f"**Rank {news['rank']} - {news['publisher']}**") st.write(f"[{news['title']}]({news['link']})") st.write(f"🕒 Posted: {news['time']}") col_count += 1 # Create new row after 5 articles if col_count == num_columns: col_count = 0 cols = st.columns(num_columns)