File size: 3,176 Bytes
46f067b
 
86797a6
46f067b
c539900
7761dac
46f067b
c539900
 
 
 
 
 
 
 
 
 
 
 
7761dac
46f067b
 
c539900
46f067b
 
 
 
 
c539900
 
 
 
46f067b
c539900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7761dac
c539900
 
 
 
 
 
 
 
 
 
 
7761dac
c539900
 
86797a6
46f067b
 
86797a6
46f067b
c539900
86797a6
d27fedd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import requests
from bs4 import BeautifulSoup
import pandas as pd
import streamlit as st
import random
import time  # time ๋ชจ๋“ˆ์„ ์ž„ํฌํŠธ

# ๋„ค์ด๋ฒ„ ๋ชจ๋ฐ”์ผ ๋‰ด์Šค ๋žญํ‚น URL
url = "https://m.news.naver.com/rankingList"

# ํ—ค๋” ์„ค์ • (User-Agent ๋ฐ Referer ์ถ”๊ฐ€)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
    "Referer": "https://m.news.naver.com/"
}

# ๋žœ๋ค ๋”œ๋ ˆ์ด ํ•จ์ˆ˜
def random_delay(min_delay=1, max_delay=3):
    delay = random.uniform(min_delay, max_delay)
    time.sleep(delay)  # time ๋ชจ๋“ˆ์˜ sleep ํ•จ์ˆ˜ ์‚ฌ์šฉ

# ์›น ํŽ˜์ด์ง€ ์š”์ฒญ ๋ฐ ํŒŒ์‹ฑ
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# ๋‰ด์Šค ๋ฆฌ์ŠคํŠธ ์ถ”์ถœ
news_list = []

# ์ƒˆ๋กœ์šด HTML ๊ตฌ์กฐ์— ๋งž๊ฒŒ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
for news_box in soup.select('div.rankingnews_box'):
    # ์–ธ๋ก ์‚ฌ ์ด๋ฆ„ ์ถ”์ถœ
    press_name = news_box.find('strong', class_='rankingnews_name').text.strip()
    
    # ๊ฐ ๋‰ด์Šค ํ•ญ๋ชฉ์„ ๋ฆฌ์ŠคํŠธ๋กœ ์ถ”์ถœ
    for news_item in news_box.select('ul.rankingnews_list li'):
        random_delay()  # ๋”œ๋ ˆ์ด ์ถ”๊ฐ€

        # ์ˆœ์œ„ ๋ฒˆํ˜ธ ์ถ”์ถœ
        rank_tag = news_item.find('em', class_='list_ranking_num')
        rank = rank_tag.text if rank_tag else 'No Rank'

        # ์ œ๋ชฉ ์ถ”์ถœ
        title_tag = news_item.find('strong', class_='list_title')
        title = title_tag.text.strip() if title_tag else 'No Title'

        # ๋งํฌ ์ถ”์ถœ
        link = news_item.find('a')['href'] if news_item.find('a') else '#'

        # ์‹œ๊ฐ„ ์ถ”์ถœ
        time_tag = news_item.find('span', class_='list_time')
        time_info = time_tag.text.strip() if time_tag else 'No Time'

        # ์ด๋ฏธ์ง€ URL ์ถ”์ถœ
        img_tag = news_item.find('img')
        image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available'

        # ๋ฐ์ดํ„ฐ ๋ฆฌ์ŠคํŠธ์— ์ถ”๊ฐ€
        news_list.append({
            'Press': press_name,
            'Rank': rank,
            'Title': title,
            'Link': link,
            'Time': time_info,
            'Image URL': image_url
        })

# ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜
df = pd.DataFrame(news_list)

# Streamlit์—์„œ ๊ฒฐ๊ณผ ํ‘œ์‹œ
st.title("Naver Mobile Ranking News Scraper")

# ๋ฐ”๋‘‘ํŒ ๋ชจ์–‘์œผ๋กœ ๋‰ด์Šค๋ฅผ 3๊ฐœ์”ฉ ๋ฐฐ์น˜ (3์—ด ๊ทธ๋ฆฌ๋“œ)
columns_per_row = 3

# ๋‰ด์Šค ํ•ญ๋ชฉ์„ ๋ฐ”๋‘‘ํŒ ํ˜•ํƒœ๋กœ ์ถœ๋ ฅ
for i in range(0, len(df), columns_per_row):
    cols = st.columns(columns_per_row)  # 3์—ด๋กœ ๊ทธ๋ฆฌ๋“œ ์ƒ์„ฑ
    
    # ๊ฐ ์—ด์— ๋‰ด์Šค ๋ฐฐ์น˜
    for idx, col in enumerate(cols):
        if i + idx < len(df):
            row = df.iloc[i + idx]
            with col:
                # ์ด๋ฏธ์ง€๊ฐ€ ์žˆ์„ ๊ฒฝ์šฐ ์ถœ๋ ฅ
                if row['Image URL'] != 'No Image Available':
                    st.image(row['Image URL'], use_column_width=True)
                # ๋‰ด์Šค ์ œ๋ชฉ ํ•˜์ดํผ๋งํฌ
                st.markdown(f"**[{row['Title']}]({row['Link']})**")
                # ๋‰ด์Šค ์–ธ๋ก ์‚ฌ์™€ ์‹œ๊ฐ„ ์ •๋ณด
                st.write(f"Press: {row['Press']} | Time: {row['Time']}")