Spaces:
Sleeping
Sleeping
File size: 2,663 Bytes
46f067b 86797a6 46f067b c539900 7761dac 46f067b c539900 7761dac 46f067b c539900 46f067b c539900 46f067b c539900 7761dac c539900 7761dac c539900 86797a6 46f067b 86797a6 46f067b c539900 86797a6 46f067b 13e115d 46f067b 13e115d c539900 13e115d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
import streamlit as st
import random
import time # time ๋ชจ๋์ ์ํฌํธ
# ๋ค์ด๋ฒ ๋ชจ๋ฐ์ผ ๋ด์ค ๋ญํน URL
url = "https://m.news.naver.com/rankingList"
# ํค๋ ์ค์ (User-Agent ๋ฐ Referer ์ถ๊ฐ)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
"Referer": "https://m.news.naver.com/"
}
# ๋๋ค ๋๋ ์ด ํจ์
def random_delay(min_delay=1, max_delay=3):
delay = random.uniform(min_delay, max_delay)
time.sleep(delay) # time ๋ชจ๋์ sleep ํจ์ ์ฌ์ฉ
# ์น ํ์ด์ง ์์ฒญ ๋ฐ ํ์ฑ
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
# ๋ด์ค ๋ฆฌ์คํธ ์ถ์ถ
news_list = []
# ์๋ก์ด HTML ๊ตฌ์กฐ์ ๋ง๊ฒ ๋ฐ์ดํฐ ์ถ์ถ
for news_box in soup.select('div.rankingnews_box'):
# ์ธ๋ก ์ฌ ์ด๋ฆ ์ถ์ถ
press_name = news_box.find('strong', class_='rankingnews_name').text.strip()
# ๊ฐ ๋ด์ค ํญ๋ชฉ์ ๋ฆฌ์คํธ๋ก ์ถ์ถ
for news_item in news_box.select('ul.rankingnews_list li'):
random_delay() # ๋๋ ์ด ์ถ๊ฐ
# ์์ ๋ฒํธ ์ถ์ถ
rank_tag = news_item.find('em', class_='list_ranking_num')
rank = rank_tag.text if rank_tag else 'No Rank'
# ์ ๋ชฉ ์ถ์ถ
title_tag = news_item.find('strong', class_='list_title')
title = title_tag.text.strip() if title_tag else 'No Title'
# ๋งํฌ ์ถ์ถ
link = news_item.find('a')['href'] if news_item.find('a') else '#'
# ์๊ฐ ์ถ์ถ
time_tag = news_item.find('span', class_='list_time')
time_info = time_tag.text.strip() if time_tag else 'No Time'
# ์ด๋ฏธ์ง URL ์ถ์ถ
img_tag = news_item.find('img')
image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available'
# ๋ฐ์ดํฐ ๋ฆฌ์คํธ์ ์ถ๊ฐ
news_list.append({
'Press': press_name,
'Rank': rank,
'Title': title,
'Link': link,
'Time': time_info,
'Image URL': image_url
})
# ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํ
df = pd.DataFrame(news_list)
# Streamlit์์ ๊ฒฐ๊ณผ ํ์
st.title("Naver Mobile Ranking News Scraper")
# ๊ฐ๋ณ ๋ด์ค ํญ๋ชฉ ์ถ๋ ฅ
for index, row in df.iterrows():
if row['Image URL'] != 'No Image Available':
st.image(row['Image URL'], width=100)
st.markdown(f"**[{row['Title']}]({row['Link']})**")
st.write(f"Press: {row['Press']} | Rank: {row['Rank']} | Time: {row['Time']}")
st.write("---")
|