gunship999 commited on
Commit
0585dec
·
verified ·
1 Parent(s): 35cca10

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import streamlit as st
4
+ import time
5
+ import random
6
+
7
+ # Target URL
8
+ url = "https://m.news.naver.com/rankingList"
9
+
10
+ # Header settings (User-Agent and Referer)
11
+ headers = {
12
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
13
+ "Referer": "https://m.news.naver.com/"
14
+ }
15
+
16
+ # Random delay function
17
+ def random_delay():
18
+ delay = random.uniform(1, 3) # Random delay between 1 to 3 seconds
19
+ time.sleep(delay)
20
+
21
+ # Function to scrape ranking news
22
+ def scrape_ranking_news():
23
+ random_delay() # Apply random delay
24
+ response = requests.get(url, headers=headers)
25
+ soup = BeautifulSoup(response.text, "html.parser")
26
+
27
+ # Select HTML sections containing the data
28
+ ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
29
+ news_list = []
30
+
31
+ for section in ranking_news_sections:
32
+ publisher = section.find("strong", class_="rankingnews_name").text # Extract publisher name
33
+ articles = section.find_all("li")
34
+
35
+ for article in articles:
36
+ rank = article.find("em", class_="list_ranking_num").text
37
+ title = article.find("strong", class_="list_title").text
38
+ published_time = article.find("span", class_="list_time").text
39
+ link = article.find("a")['href']
40
+ image = article.find("img")['src']
41
+
42
+ news_list.append({
43
+ "rank": rank,
44
+ "title": title,
45
+ "time": published_time,
46
+ "link": link,
47
+ "image": image,
48
+ "publisher": publisher
49
+ })
50
+ return news_list
51
+
52
+ # Main title
53
+ st.title("Daily News Scrap in Korea")
54
+
55
+ # Execution button
56
+ if st.button("Start News Scraping"):
57
+ # Scrape ranking news data
58
+ news_data = scrape_ranking_news()
59
+
60
+ # Display in 5x5 grid layout, articles from same publisher in one row
61
+ num_columns = 5
62
+ col_count = 0
63
+ cols = st.columns(num_columns)
64
+
65
+ for index, news in enumerate(news_data):
66
+ with cols[col_count]:
67
+ st.image(news['image'])
68
+ st.write(f"**Rank {news['rank']} - {news['publisher']}**")
69
+ st.write(f"[{news['title']}]({news['link']})")
70
+ st.write(f"🕒 Posted: {news['time']}")
71
+
72
+ col_count += 1
73
+ # Create new row after 5 articles
74
+ if col_count == num_columns:
75
+ col_count = 0
76
+ cols = st.columns(num_columns)