Spaces:
Sleeping
Sleeping
File size: 3,176 Bytes
46f067b 86797a6 46f067b c539900 7761dac 46f067b c539900 7761dac 46f067b c539900 46f067b c539900 46f067b c539900 7761dac c539900 7761dac c539900 86797a6 46f067b 86797a6 46f067b c539900 86797a6 d27fedd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
import streamlit as st
import random
import time # time ๋ชจ๋์ ์ํฌํธ
# ๋ค์ด๋ฒ ๋ชจ๋ฐ์ผ ๋ด์ค ๋ญํน URL
url = "https://m.news.naver.com/rankingList"
# ํค๋ ์ค์ (User-Agent ๋ฐ Referer ์ถ๊ฐ)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
"Referer": "https://m.news.naver.com/"
}
# ๋๋ค ๋๋ ์ด ํจ์
def random_delay(min_delay=1, max_delay=3):
delay = random.uniform(min_delay, max_delay)
time.sleep(delay) # time ๋ชจ๋์ sleep ํจ์ ์ฌ์ฉ
# ์น ํ์ด์ง ์์ฒญ ๋ฐ ํ์ฑ
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
# ๋ด์ค ๋ฆฌ์คํธ ์ถ์ถ
news_list = []
# ์๋ก์ด HTML ๊ตฌ์กฐ์ ๋ง๊ฒ ๋ฐ์ดํฐ ์ถ์ถ
for news_box in soup.select('div.rankingnews_box'):
# ์ธ๋ก ์ฌ ์ด๋ฆ ์ถ์ถ
press_name = news_box.find('strong', class_='rankingnews_name').text.strip()
# ๊ฐ ๋ด์ค ํญ๋ชฉ์ ๋ฆฌ์คํธ๋ก ์ถ์ถ
for news_item in news_box.select('ul.rankingnews_list li'):
random_delay() # ๋๋ ์ด ์ถ๊ฐ
# ์์ ๋ฒํธ ์ถ์ถ
rank_tag = news_item.find('em', class_='list_ranking_num')
rank = rank_tag.text if rank_tag else 'No Rank'
# ์ ๋ชฉ ์ถ์ถ
title_tag = news_item.find('strong', class_='list_title')
title = title_tag.text.strip() if title_tag else 'No Title'
# ๋งํฌ ์ถ์ถ
link = news_item.find('a')['href'] if news_item.find('a') else '#'
# ์๊ฐ ์ถ์ถ
time_tag = news_item.find('span', class_='list_time')
time_info = time_tag.text.strip() if time_tag else 'No Time'
# ์ด๋ฏธ์ง URL ์ถ์ถ
img_tag = news_item.find('img')
image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available'
# ๋ฐ์ดํฐ ๋ฆฌ์คํธ์ ์ถ๊ฐ
news_list.append({
'Press': press_name,
'Rank': rank,
'Title': title,
'Link': link,
'Time': time_info,
'Image URL': image_url
})
# ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํ
df = pd.DataFrame(news_list)
# Streamlit์์ ๊ฒฐ๊ณผ ํ์
st.title("Naver Mobile Ranking News Scraper")
# ๋ฐ๋ํ ๋ชจ์์ผ๋ก ๋ด์ค๋ฅผ 3๊ฐ์ฉ ๋ฐฐ์น (3์ด ๊ทธ๋ฆฌ๋)
columns_per_row = 3
# ๋ด์ค ํญ๋ชฉ์ ๋ฐ๋ํ ํํ๋ก ์ถ๋ ฅ
for i in range(0, len(df), columns_per_row):
cols = st.columns(columns_per_row) # 3์ด๋ก ๊ทธ๋ฆฌ๋ ์์ฑ
# ๊ฐ ์ด์ ๋ด์ค ๋ฐฐ์น
for idx, col in enumerate(cols):
if i + idx < len(df):
row = df.iloc[i + idx]
with col:
# ์ด๋ฏธ์ง๊ฐ ์์ ๊ฒฝ์ฐ ์ถ๋ ฅ
if row['Image URL'] != 'No Image Available':
st.image(row['Image URL'], use_column_width=True)
# ๋ด์ค ์ ๋ชฉ ํ์ดํผ๋งํฌ
st.markdown(f"**[{row['Title']}]({row['Link']})**")
# ๋ด์ค ์ธ๋ก ์ฌ์ ์๊ฐ ์ ๋ณด
st.write(f"Press: {row['Press']} | Time: {row['Time']}")
|