Spaces:

gunship999
/

Korea-Daily-News

Running

App Files Files Community

gunship999 commited on 17 days ago

Commit

0585dec

verified ·

1 Parent(s): 35cca10

Create app.py

Browse files

Files changed (1) hide show

app.py +76 -0

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import requests
+from bs4 import BeautifulSoup
+import streamlit as st
+import time
+import random
+# Target URL
+url = "https://m.news.naver.com/rankingList"
+# Header settings (User-Agent and Referer)
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
+    "Referer": "https://m.news.naver.com/"
+}
+# Random delay function
+def random_delay():
+    delay = random.uniform(1, 3)  # Random delay between 1 to 3 seconds
+    time.sleep(delay)
+# Function to scrape ranking news
+def scrape_ranking_news():
+    random_delay()  # Apply random delay
+    response = requests.get(url, headers=headers)
+    soup = BeautifulSoup(response.text, "html.parser")
+    # Select HTML sections containing the data
+    ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
+    news_list = []
+    for section in ranking_news_sections:
+        publisher = section.find("strong", class_="rankingnews_name").text  # Extract publisher name
+        articles = section.find_all("li")
+        for article in articles:
+            rank = article.find("em", class_="list_ranking_num").text
+            title = article.find("strong", class_="list_title").text
+            published_time = article.find("span", class_="list_time").text
+            link = article.find("a")['href']
+            image = article.find("img")['src']
+            news_list.append({
+                "rank": rank,
+                "title": title,
+                "time": published_time,
+                "link": link,
+                "image": image,
+                "publisher": publisher
+            })
+    return news_list
+# Main title
+st.title("Daily News Scrap in Korea")
+# Execution button
+if st.button("Start News Scraping"):
+    # Scrape ranking news data
+    news_data = scrape_ranking_news()
+    # Display in 5x5 grid layout, articles from same publisher in one row
+    num_columns = 5
+    col_count = 0
+    cols = st.columns(num_columns)
+    for index, news in enumerate(news_data):
+        with cols[col_count]:
+            st.image(news['image'])
+            st.write(f"**Rank {news['rank']} - {news['publisher']}**")
+            st.write(f"[{news['title']}]({news['link']})")
+            st.write(f"🕒 Posted: {news['time']}")
+        col_count += 1
+        # Create new row after 5 articles
+        if col_count == num_columns:
+            col_count = 0
+            cols = st.columns(num_columns)