AIRider commited on
Commit
3544f83
ยท
verified ยท
1 Parent(s): b3b13e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -68
app.py CHANGED
@@ -3,78 +3,72 @@ from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import gradio as gr
5
 
6
- # ๋„ค์ด๋ฒ„ ์ฝ”์Šค๋‹ฅ URL
7
- KOSDAQ_URL = "https://finance.naver.com/sise/sise_rise.naver?sosok=1"
8
-
9
- def scrape_kosdaq_data():
10
- print("๋””๋ฒ„๊น…: ๋„ค์ด๋ฒ„ ์ฆ๊ถŒ ํŽ˜์ด์ง€ ์š”์ฒญ ์‹œ์ž‘...")
11
-
12
- # ์š”์ฒญ ํ—ค๋” ์ถ”๊ฐ€
13
  headers = {
14
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
15
  }
16
-
17
- # ํŽ˜์ด์ง€ ์š”์ฒญ
18
- response = requests.get(KOSDAQ_URL, headers=headers)
19
- if response.status_code == 200:
20
- print("๋””๋ฒ„๊น…: ๋„ค์ด๋ฒ„ ์ฆ๊ถŒ ํŽ˜์ด์ง€ ์š”์ฒญ ์„ฑ๊ณต")
21
- else:
22
- print(f"๋””๋ฒ„๊น…: ์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
23
- return pd.DataFrame() # ๋นˆ DataFrame ๋ฐ˜ํ™˜
24
 
25
- # HTML ํŒŒ์‹ฑ
26
- soup = BeautifulSoup(response.text, "html.parser")
27
-
28
- print("๋””๋ฒ„๊น…: ํ‘œ ํ—ค๋” ์ถ”์ถœ ์‹œ์ž‘...")
29
- # ํ‘œ์˜ ํ—ค๋” ์ถ”์ถœ
30
- headers = []
31
- header_tags = soup.select("table.type_2 thead tr th")
32
- for tag in header_tags:
33
- header_text = tag.get_text(strip=True)
34
- if header_text: # ๋นˆ ๊ฐ’ ์ œ๊ฑฐ
35
- headers.append(header_text)
36
- print(f"๋””๋ฒ„๊น…: ์ถ”์ถœ๋œ ํ—ค๋” - {headers}")
37
-
38
- print("๋””๋ฒ„๊น…: ํ‘œ ๋ฐ์ดํ„ฐ ์ถ”์ถœ ์‹œ์ž‘...")
39
- # ํ‘œ์˜ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
40
- rows = soup.select("table.type_2 tbody tr")
41
- data = []
42
- for row in rows:
43
- cols = row.find_all("td")
44
- if cols: # ๋นˆ ํ–‰ ์ œ์™ธ
45
- # ๊ฐ ์—ด์˜ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœ
46
- row_data = [col.get_text(strip=True) for col in cols]
47
- # ๋งํฌ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ ์ข…๋ชฉ๋ช…์„ ์ถ”๊ฐ€
48
- link = row.find("a", class_="tltle")
49
- if link:
50
- row_data[1] = link.text.strip()
51
- data.append(row_data)
52
- print(f"๋””๋ฒ„๊น…: ์ถ”์ถœ๋œ ๋ฐ์ดํ„ฐ ํ–‰ ์ˆ˜ - {len(data)}")
53
-
54
- if data:
55
- print(f"๋””๋ฒ„๊น…: ์ฒซ ๋ฒˆ์งธ ๋ฐ์ดํ„ฐ ํ–‰ - {data[0]}")
56
- else:
57
- print("๋””๋ฒ„๊น…: ๋ฐ์ดํ„ฐ ์ถ”์ถœ ์‹คํŒจ")
58
- return pd.DataFrame(columns=headers) # ๋นˆ DataFrame ๋ฐ˜ํ™˜
59
 
60
- # DataFrame ์ƒ์„ฑ
61
- df = pd.DataFrame(data, columns=headers)
62
- print("๋””๋ฒ„๊น…: DataFrame ์ƒ์„ฑ ์™„๋ฃŒ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  return df
64
 
65
- def display_kosdaq_info():
66
- # ๋ฐ์ดํ„ฐ ์Šคํฌ๋ž˜ํ•‘ ๋ฐ ๋ฐ˜ํ™˜
67
- df = scrape_kosdaq_data()
68
- if not df.empty:
69
- return df
70
- else:
71
- return pd.DataFrame({"๊ฒฐ๊ณผ": ["๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."]})
 
 
 
72
 
73
- # Gradio UI ์„ค์ •
74
- gr.Interface(
75
- fn=display_kosdaq_info,
76
- inputs=None,
77
- outputs="dataframe",
78
- title="์ฝ”์Šค๋‹ฅ ์ข…๋ชฉ ์ •๋ณด ์Šคํฌ๋ž˜ํผ",
79
- description="๋„ค์ด๋ฒ„ ์ฆ๊ถŒ ์‚ฌ์ดํŠธ์—์„œ ์ฝ”์Šค๋‹ฅ ์ข…๋ชฉ ์ •๋ณด๋ฅผ ์Šคํฌ๋ž˜ํ•‘ํ•˜์—ฌ ํ‘œ์‹œ๏ฟฝ๏ฟฝ๋‹ˆ๋‹ค."
80
- ).launch()
 
3
  import pandas as pd
4
  import gradio as gr
5
 
6
+ def scrape_kosdaq():
7
+ url = "https://finance.naver.com/sise/sise_rise.naver?sosok=1"
 
 
 
 
 
8
  headers = {
9
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
10
  }
 
 
 
 
 
 
 
 
11
 
12
+ try:
13
+ # Request the webpage
14
+ response = requests.get(url, headers=headers)
15
+ response.raise_for_status()
16
+ print("[INFO] Page fetched successfully.")
17
+
18
+ # Parse the HTML
19
+ soup = BeautifulSoup(response.content, "html.parser")
20
+
21
+ # Locate the table
22
+ table = soup.find("table", class_="type_2")
23
+ rows = table.find_all("tr")[2:] # Skip the header rows
24
+
25
+ data = []
26
+
27
+ # Extract data row by row
28
+ for row in rows:
29
+ cols = row.find_all("td")
30
+ if len(cols) < 12: # Skip blank or irrelevant rows
31
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ entry = {
34
+ "Rank": cols[0].get_text(strip=True),
35
+ "Name": cols[1].get_text(strip=True),
36
+ "Price": cols[2].get_text(strip=True),
37
+ "Change": cols[3].get_text(strip=True),
38
+ "Change_Rate": cols[4].get_text(strip=True),
39
+ "Volume": cols[5].get_text(strip=True),
40
+ "Buy_Price": cols[6].get_text(strip=True),
41
+ "Sell_Price": cols[7].get_text(strip=True),
42
+ "Total_Buy_Quantity": cols[8].get_text(strip=True),
43
+ "Total_Sell_Quantity": cols[9].get_text(strip=True),
44
+ "PER": cols[10].get_text(strip=True),
45
+ "ROE": cols[11].get_text(strip=True),
46
+ }
47
+ data.append(entry)
48
+
49
+ print(f"[DEBUG] Extracted {len(data)} rows.")
50
+ return pd.DataFrame(data)
51
+
52
+ except requests.exceptions.RequestException as e:
53
+ print(f"[ERROR] Failed to fetch page: {e}")
54
+ return pd.DataFrame()
55
+
56
+ def display_data():
57
+ df = scrape_kosdaq()
58
+ if df.empty:
59
+ return "Failed to fetch data or no data available."
60
  return df
61
 
62
+ # Gradio Interface
63
+ def gradio_interface():
64
+ with gr.Blocks() as demo:
65
+ gr.Markdown("### Naver Kosdaq Stock Scraper")
66
+ output = gr.Dataframe()
67
+ fetch_button = gr.Button("Fetch Data")
68
+
69
+ fetch_button.click(display_data, inputs=[], outputs=output)
70
+
71
+ return demo
72
 
73
+ if __name__ == "__main__":
74
+ gradio_interface().launch()