newsranking / app.py
CSB261's picture
Update app.py
46f067b verified
raw
history blame
1.77 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
import streamlit as st
# ๋„ค์ด๋ฒ„ ๋žญํ‚น ๋‰ด์Šค URL
url = "https://news.naver.com/main/ranking/popularDay.naver"
# ์›น ํŽ˜์ด์ง€ ์š”์ฒญ ๋ฐ ํŒŒ์‹ฑ
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# ๋‰ด์Šค ๋ฆฌ์ŠคํŠธ ์ถ”์ถœ
news_list = []
for news_item in soup.select('div.rankingnews_box ul.rankingnews_list li'):
# ์ˆœ์œ„ ๋ฒˆํ˜ธ๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
rank_tag = news_item.find('em', class_='list_ranking_num')
rank = rank_tag.text if rank_tag else 'No Rank'
# ์ œ๋ชฉ์ด ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
title_tag = news_item.find('a', class_='list_title')
title = title_tag.text.strip() if title_tag else 'No Title'
# ๋งํฌ๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
link = title_tag['href'] if title_tag else '#'
# ์‹œ๊ฐ„ ์ •๋ณด๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
time_tag = news_item.find('span', class_='list_time')
time = time_tag.text.strip() if time_tag else 'No Time'
# ์ด๋ฏธ์ง€ ํƒœ๊ทธ์™€ src ์†์„ฑ ํ™•์ธ
img_tag = news_item.find('img')
image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available'
news_list.append({
'Rank': rank,
'Title': title,
'Link': link,
'Time': time,
'Image URL': image_url
})
# ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜
df = pd.DataFrame(news_list)
# Streamlit์—์„œ ๊ฒฐ๊ณผ ํ‘œ์‹œ
st.title("Naver Ranking News Scraper")
# ๊ฐœ๋ณ„ ๋‰ด์Šค ํ•ญ๋ชฉ ์ถœ๋ ฅ
for index, row in df.iterrows():
if row['Image URL'] != 'No Image Available':
st.image(row['Image URL'], width=100)
st.markdown(f"**[{row['Title']}]({row['Link']})**")
st.write(f"Rank: {row['Rank']} | Time: {row['Time']}")
st.write("---")