Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import streamlit as st | |
# ๋ค์ด๋ฒ ๋ญํน ๋ด์ค URL | |
url = "https://news.naver.com/main/ranking/popularDay.naver" | |
# ์น ํ์ด์ง ์์ฒญ ๋ฐ ํ์ฑ | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# ๋ด์ค ๋ฆฌ์คํธ ์ถ์ถ | |
news_list = [] | |
for news_item in soup.select('div.rankingnews_box ul.rankingnews_list li'): | |
# ์์ ๋ฒํธ๊ฐ ์กด์ฌํ๋์ง ํ์ธ | |
rank_tag = news_item.find('em', class_='list_ranking_num') | |
rank = rank_tag.text if rank_tag else 'No Rank' | |
# ์ ๋ชฉ์ด ์กด์ฌํ๋์ง ํ์ธ | |
title_tag = news_item.find('a', class_='list_title') | |
title = title_tag.text.strip() if title_tag else 'No Title' | |
# ๋งํฌ๊ฐ ์กด์ฌํ๋์ง ํ์ธ | |
link = title_tag['href'] if title_tag else '#' | |
# ์๊ฐ ์ ๋ณด๊ฐ ์กด์ฌํ๋์ง ํ์ธ | |
time_tag = news_item.find('span', class_='list_time') | |
time = time_tag.text.strip() if time_tag else 'No Time' | |
# ์ด๋ฏธ์ง ํ๊ทธ์ src ์์ฑ ํ์ธ | |
img_tag = news_item.find('img') | |
image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available' | |
news_list.append({ | |
'Rank': rank, | |
'Title': title, | |
'Link': link, | |
'Time': time, | |
'Image URL': image_url | |
}) | |
# ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํ | |
df = pd.DataFrame(news_list) | |
# Streamlit์์ ๊ฒฐ๊ณผ ํ์ | |
st.title("Naver Ranking News Scraper") | |
# ๊ฐ๋ณ ๋ด์ค ํญ๋ชฉ ์ถ๋ ฅ | |
for index, row in df.iterrows(): | |
if row['Image URL'] != 'No Image Available': | |
st.image(row['Image URL'], width=100) | |
st.markdown(f"**[{row['Title']}]({row['Link']})**") | |
st.write(f"Rank: {row['Rank']} | Time: {row['Time']}") | |
st.write("---") | |