Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,56 @@
|
|
1 |
-
import
|
|
|
2 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
#
|
5 |
-
df = pd.DataFrame(
|
6 |
-
'Rank': [1, 2, 3],
|
7 |
-
'Title': ['๋ด์ค ์ ๋ชฉ 1', '๋ด์ค ์ ๋ชฉ 2', '๋ด์ค ์ ๋ชฉ 3'],
|
8 |
-
'Link': ['https://news1.com', 'https://news2.com', 'https://news3.com'],
|
9 |
-
'Time': ['1์๊ฐ ์ ', '2์๊ฐ ์ ', '3์๊ฐ ์ '],
|
10 |
-
'Image URL': ['https://example.com/image1.jpg', 'https://example.com/image2.jpg', 'https://example.com/image3.jpg']
|
11 |
-
})
|
12 |
|
13 |
-
|
|
|
14 |
|
15 |
-
# ๊ฐ๋ณ ๋ด์ค
|
16 |
for index, row in df.iterrows():
|
17 |
-
|
|
|
18 |
st.markdown(f"**[{row['Title']}]({row['Link']})**")
|
19 |
st.write(f"Rank: {row['Rank']} | Time: {row['Time']}")
|
20 |
st.write("---")
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
import pandas as pd
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
# ๋ค์ด๋ฒ ๋ญํน ๋ด์ค URL
|
7 |
+
url = "https://news.naver.com/main/ranking/popularDay.naver"
|
8 |
+
|
9 |
+
# ์น ํ์ด์ง ์์ฒญ ๋ฐ ํ์ฑ
|
10 |
+
response = requests.get(url)
|
11 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
12 |
+
|
13 |
+
# ๋ด์ค ๋ฆฌ์คํธ ์ถ์ถ
|
14 |
+
news_list = []
|
15 |
+
|
16 |
+
for news_item in soup.select('div.rankingnews_box ul.rankingnews_list li'):
|
17 |
+
# ์์ ๋ฒํธ๊ฐ ์กด์ฌํ๋์ง ํ์ธ
|
18 |
+
rank_tag = news_item.find('em', class_='list_ranking_num')
|
19 |
+
rank = rank_tag.text if rank_tag else 'No Rank'
|
20 |
+
|
21 |
+
# ์ ๋ชฉ์ด ์กด์ฌํ๋์ง ํ์ธ
|
22 |
+
title_tag = news_item.find('a', class_='list_title')
|
23 |
+
title = title_tag.text.strip() if title_tag else 'No Title'
|
24 |
+
|
25 |
+
# ๋งํฌ๊ฐ ์กด์ฌํ๋์ง ํ์ธ
|
26 |
+
link = title_tag['href'] if title_tag else '#'
|
27 |
+
|
28 |
+
# ์๊ฐ ์ ๋ณด๊ฐ ์กด์ฌํ๋์ง ํ์ธ
|
29 |
+
time_tag = news_item.find('span', class_='list_time')
|
30 |
+
time = time_tag.text.strip() if time_tag else 'No Time'
|
31 |
+
|
32 |
+
# ์ด๋ฏธ์ง ํ๊ทธ์ src ์์ฑ ํ์ธ
|
33 |
+
img_tag = news_item.find('img')
|
34 |
+
image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available'
|
35 |
+
|
36 |
+
news_list.append({
|
37 |
+
'Rank': rank,
|
38 |
+
'Title': title,
|
39 |
+
'Link': link,
|
40 |
+
'Time': time,
|
41 |
+
'Image URL': image_url
|
42 |
+
})
|
43 |
|
44 |
+
# ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํ
|
45 |
+
df = pd.DataFrame(news_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
# Streamlit์์ ๊ฒฐ๊ณผ ํ์
|
48 |
+
st.title("Naver Ranking News Scraper")
|
49 |
|
50 |
+
# ๊ฐ๋ณ ๋ด์ค ํญ๋ชฉ ์ถ๋ ฅ
|
51 |
for index, row in df.iterrows():
|
52 |
+
if row['Image URL'] != 'No Image Available':
|
53 |
+
st.image(row['Image URL'], width=100)
|
54 |
st.markdown(f"**[{row['Title']}]({row['Link']})**")
|
55 |
st.write(f"Rank: {row['Rank']} | Time: {row['Time']}")
|
56 |
st.write("---")
|