CSB261 commited on
Commit
46f067b
โ€ข
1 Parent(s): 13e115d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -12
app.py CHANGED
@@ -1,20 +1,56 @@
1
- import streamlit as st
 
2
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- # ์˜ˆ์‹œ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ (์‹ค์ œ ๋ฐ์ดํ„ฐ๋กœ ๊ต์ฒด)
5
- df = pd.DataFrame({
6
- 'Rank': [1, 2, 3],
7
- 'Title': ['๋‰ด์Šค ์ œ๋ชฉ 1', '๋‰ด์Šค ์ œ๋ชฉ 2', '๋‰ด์Šค ์ œ๋ชฉ 3'],
8
- 'Link': ['https://news1.com', 'https://news2.com', 'https://news3.com'],
9
- 'Time': ['1์‹œ๊ฐ„ ์ „', '2์‹œ๊ฐ„ ์ „', '3์‹œ๊ฐ„ ์ „'],
10
- 'Image URL': ['https://example.com/image1.jpg', 'https://example.com/image2.jpg', 'https://example.com/image3.jpg']
11
- })
12
 
13
- st.title('Naver Ranking News')
 
14
 
15
- # ๊ฐœ๋ณ„ ๋‰ด์Šค ํ•ญ๋ชฉ์„ ์ถœ๋ ฅ
16
  for index, row in df.iterrows():
17
- st.image(row['Image URL'], width=100)
 
18
  st.markdown(f"**[{row['Title']}]({row['Link']})**")
19
  st.write(f"Rank: {row['Rank']} | Time: {row['Time']}")
20
  st.write("---")
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
  import pandas as pd
4
+ import streamlit as st
5
+
6
+ # ๋„ค์ด๋ฒ„ ๋žญํ‚น ๋‰ด์Šค URL
7
+ url = "https://news.naver.com/main/ranking/popularDay.naver"
8
+
9
+ # ์›น ํŽ˜์ด์ง€ ์š”์ฒญ ๋ฐ ํŒŒ์‹ฑ
10
+ response = requests.get(url)
11
+ soup = BeautifulSoup(response.content, 'html.parser')
12
+
13
+ # ๋‰ด์Šค ๋ฆฌ์ŠคํŠธ ์ถ”์ถœ
14
+ news_list = []
15
+
16
+ for news_item in soup.select('div.rankingnews_box ul.rankingnews_list li'):
17
+ # ์ˆœ์œ„ ๋ฒˆํ˜ธ๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
18
+ rank_tag = news_item.find('em', class_='list_ranking_num')
19
+ rank = rank_tag.text if rank_tag else 'No Rank'
20
+
21
+ # ์ œ๋ชฉ์ด ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
22
+ title_tag = news_item.find('a', class_='list_title')
23
+ title = title_tag.text.strip() if title_tag else 'No Title'
24
+
25
+ # ๋งํฌ๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
26
+ link = title_tag['href'] if title_tag else '#'
27
+
28
+ # ์‹œ๊ฐ„ ์ •๋ณด๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธ
29
+ time_tag = news_item.find('span', class_='list_time')
30
+ time = time_tag.text.strip() if time_tag else 'No Time'
31
+
32
+ # ์ด๋ฏธ์ง€ ํƒœ๊ทธ์™€ src ์†์„ฑ ํ™•์ธ
33
+ img_tag = news_item.find('img')
34
+ image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available'
35
+
36
+ news_list.append({
37
+ 'Rank': rank,
38
+ 'Title': title,
39
+ 'Link': link,
40
+ 'Time': time,
41
+ 'Image URL': image_url
42
+ })
43
 
44
+ # ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜
45
+ df = pd.DataFrame(news_list)
 
 
 
 
 
 
46
 
47
+ # Streamlit์—์„œ ๊ฒฐ๊ณผ ํ‘œ์‹œ
48
+ st.title("Naver Ranking News Scraper")
49
 
50
+ # ๊ฐœ๋ณ„ ๋‰ด์Šค ํ•ญ๋ชฉ ์ถœ๋ ฅ
51
  for index, row in df.iterrows():
52
+ if row['Image URL'] != 'No Image Available':
53
+ st.image(row['Image URL'], width=100)
54
  st.markdown(f"**[{row['Title']}]({row['Link']})**")
55
  st.write(f"Rank: {row['Rank']} | Time: {row['Time']}")
56
  st.write("---")