Spaces:
openfree
/
Running on CPU Upgrade

seawolf2357 commited on
Commit
8aede25
ยท
verified ยท
1 Parent(s): 76d493c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -55
app.py CHANGED
@@ -1,9 +1,17 @@
1
  import gradio as gr
2
  import requests
3
  import json
 
4
  from datetime import datetime, timedelta
 
 
5
 
6
- API_KEY = "V38CNn4HXpLtynJQyOeoUensTEYoFy8PBUxKpDqAW1pawT1vfJ2BWtPQ98h6"
 
 
 
 
 
7
 
8
  MAJOR_COUNTRIES = [
9
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
@@ -58,11 +66,8 @@ def search_serphouse(query, country, page=1, num_result=100):
58
 
59
  def format_results_from_raw(results):
60
  try:
61
- # ๋””๋ฒ„๊ทธ ์ •๋ณด ์ƒ๋žต
62
- debug_info = ""
63
-
64
  if isinstance(results, dict) and "error" in results:
65
- return "Error: " + results["error"], ""
66
 
67
  if not isinstance(results, dict):
68
  raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
@@ -83,10 +88,9 @@ def format_results_from_raw(results):
83
  news_results = []
84
 
85
  if not news_results:
86
- return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", ""
87
 
88
- # ๋‰ด์Šค ๊ฒฐ๊ณผ๋ฅผ ๋ฆฌ์ŠคํŠธ ํ˜•ํƒœ๋กœ ํฌ๋งทํŒ… (์ด๋ฏธ์ง€ ์ธ๋„ค์ผ ํฌํ•จ)
89
- list_output = ""
90
 
91
  for idx, result in enumerate(news_results, 1):
92
  title = result.get("title", "์ œ๋ชฉ ์—†์Œ")
@@ -96,85 +100,124 @@ def format_results_from_raw(results):
96
  time = result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„"))
97
  image_url = result.get("img", result.get("thumbnail", ""))
98
 
99
- # base64๋กœ ์ธ์ฝ”๋”ฉ๋œ ์ด๋ฏธ์ง€๋ฅผ ์ฒ˜๋ฆฌํ•˜์ง€ ์•Š์Œ
100
- if image_url and not image_url.startswith("data:image"):
101
- thumbnail_html = f'<img src="{image_url}" alt="Thumbnail" style="width: 100px; height: auto;">'
102
- else:
103
- thumbnail_html = ''
104
-
105
- # ๋ฆฌ์ŠคํŠธ ํ˜•์‹์˜ ๊ธฐ์‚ฌ (์ด๋ฏธ์ง€ ์ธ๋„ค์ผ ํฌํ•จ)
106
- list_item = f"""
107
- <div style="margin-bottom: 20px;">
108
- <h4>{idx}. <a href="{link}" target="_blank">{title}</a></h4>
109
- <p>{thumbnail_html}</p>
110
- <p>์š”์•ฝ: {snippet}</p>
111
- <p>์ถœ์ฒ˜: {channel} | ์‹œ๊ฐ„: {time}</p>
112
- <hr>
113
- </div>
114
- """
115
- list_output += list_item
116
 
117
- return list_output, ""
118
 
119
  except Exception as e:
120
  error_message = f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
121
- return "Error: " + error_message, ""
122
 
123
  def serphouse_search(query, country):
124
  # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
125
  page = 1
126
  num_result = 100
127
  results = search_serphouse(query, country, page, num_result)
128
- list_output, debug_info = format_results_from_raw(results)
129
- return list_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  css = """
132
  footer {
133
  visibility: hidden;
134
  }
135
- /* '๋‰ด์Šค ๊ฒฐ๊ณผ'์™€ '๋””๋ฒ„๊ทธ ์ •๋ณด' ํƒญ ์ˆจ๊ธฐ๊ธฐ */
136
- #tab-๋‰ด์Šค_๊ฒฐ๊ณผ, #tab-๋””๋ฒ„๊ทธ_์ •๋ณด {
137
- display: none !important;
138
- }
139
- /* 'ํŽ˜์ด์ง€'์™€ '๊ฒฐ๊ณผ ์ˆ˜' ์ž…๋ ฅ ์š”์†Œ ์ˆจ๊ธฐ๊ธฐ */
140
- .slider-container {
141
- display: none !important;
142
- }
143
  """
144
 
145
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
146
- with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
147
  gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
148
 
149
- with gr.Tab("๊ฒ€์ƒ‰"):
150
  with gr.Row():
151
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
152
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
153
- # 'ํŽ˜์ด์ง€'์™€ '๊ฒฐ๊ณผ ์ˆ˜' ์ž…๋ ฅ ์š”์†Œ ์ œ๊ฑฐ
154
- # with gr.Row():
155
- # page = gr.Slider(1, 10, 1, label="ํŽ˜์ด์ง€")
156
- # num_result = gr.Slider(1, 100, 100, label="๊ฒฐ๊ณผ ์ˆ˜")
157
 
158
- search_button = gr.Button("๊ฒ€์ƒ‰")
 
159
 
160
- # '๋‰ด์Šค ๊ฒฐ๊ณผ'์™€ '๋””๋ฒ„๊ทธ ์ •๋ณด' ํƒญ ์ œ๊ฑฐ
161
- # with gr.Tab("๋‰ด์Šค ๊ฒฐ๊ณผ"):
162
- # news_output = gr.HTML(label="๋‰ด์Šค ๊ฒฐ๊ณผ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- with gr.Tab("๋ฆฌ์ŠคํŠธ"):
165
- list_output = gr.HTML(label="๋ฆฌ์ŠคํŠธ ๊ฒฐ๊ณผ") # HTML๋กœ ๋ณ€๊ฒฝ
166
 
167
- # with gr.Tab("๋””๋ฒ„๊ทธ ์ •๋ณด"):
168
- # debug_output = gr.Textbox(label="๋””๋ฒ„๊ทธ ์ •๋ณด", lines=10)
169
 
170
- def search_and_display(query, country):
171
- list_output_text = serphouse_search(query, country)
172
- return {list_output: list_output_text}
 
 
 
173
 
174
  search_button.click(
175
  search_and_display,
176
  inputs=[query, country],
177
- outputs=[list_output]
 
 
 
 
 
 
178
  )
179
 
180
  iface.launch(auth=("gini", "pick"))
 
1
  import gradio as gr
2
  import requests
3
  import json
4
+ import os
5
  from datetime import datetime, timedelta
6
+ from bs4 import BeautifulSoup # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
7
+ from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
8
 
9
+ # ํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์„ค์น˜ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ฃผ์„์„ ์ œ๊ฑฐํ•˜๊ณ  ์‹คํ–‰)
10
+ # !pip install bs4 huggingface_hub
11
+
12
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
13
+ API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
14
+ HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
15
 
16
  MAJOR_COUNTRIES = [
17
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
 
66
 
67
  def format_results_from_raw(results):
68
  try:
 
 
 
69
  if isinstance(results, dict) and "error" in results:
70
+ return "Error: " + results["error"], []
71
 
72
  if not isinstance(results, dict):
73
  raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
 
88
  news_results = []
89
 
90
  if not news_results:
91
+ return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
92
 
93
+ articles = []
 
94
 
95
  for idx, result in enumerate(news_results, 1):
96
  title = result.get("title", "์ œ๋ชฉ ์—†์Œ")
 
100
  time = result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„"))
101
  image_url = result.get("img", result.get("thumbnail", ""))
102
 
103
+ articles.append({
104
+ "index": idx,
105
+ "title": title,
106
+ "link": link,
107
+ "snippet": snippet,
108
+ "channel": channel,
109
+ "time": time,
110
+ "image_url": image_url
111
+ })
 
 
 
 
 
 
 
 
112
 
113
+ return "", articles
114
 
115
  except Exception as e:
116
  error_message = f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
117
+ return "Error: " + error_message, []
118
 
119
  def serphouse_search(query, country):
120
  # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
121
  page = 1
122
  num_result = 100
123
  results = search_serphouse(query, country, page, num_result)
124
+ error_message, articles = format_results_from_raw(results)
125
+ return error_message, articles
126
+
127
+ # LLM ์„ค์ •
128
+ hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
129
+
130
+ def summarize_article(url):
131
+ try:
132
+ # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
133
+ response = requests.get(url)
134
+ response.raise_for_status()
135
+ soup = BeautifulSoup(response.text, 'html.parser')
136
+ # ๋ชจ๋“  ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
137
+ text = ' '.join([p.get_text() for p in soup.find_all('p')])
138
+ if not text.strip():
139
+ return "๊ธฐ์‚ฌ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
140
+
141
+ # ์š”์•ฝ ์ƒ์„ฑ
142
+ prompt = f"๋‹ค์Œ ์˜์–ด ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n{text}"
143
+ summary = hf_client.text_generation(prompt, max_new_tokens=500)
144
+ return summary
145
+ except Exception as e:
146
+ return f"์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
147
 
148
  css = """
149
  footer {
150
  visibility: hidden;
151
  }
 
 
 
 
 
 
 
 
152
  """
153
 
154
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
155
+ with gr.Blocks(css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
156
  gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
157
 
158
+ with gr.Column():
159
  with gr.Row():
160
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
161
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
162
+ search_button = gr.Button("๊ฒ€์ƒ‰")
 
 
 
163
 
164
+ output_table = gr.HTML()
165
+ summary_output = gr.Markdown(visible=False)
166
 
167
+ def search_and_display(query, country):
168
+ error_message, articles = serphouse_search(query, country)
169
+ if error_message:
170
+ return f"<p>{error_message}</p>", gr.update(visible=False)
171
+ else:
172
+ # ๊ธฐ์‚ฌ ๋ชฉ๋ก์„ HTML ํ…Œ์ด๋ธ”๋กœ ์ƒ์„ฑ
173
+ table_html = "<table border='1' style='width:100%; text-align:left;'><tr><th>๋ฒˆํ˜ธ</th><th>์ œ๋ชฉ</th><th>์ถœ์ฒ˜</th><th>์‹œ๊ฐ„</th><th>๋ถ„์„</th></tr>"
174
+ for article in articles:
175
+ # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ๋ฒ„ํŠผ์— ํ•ด๋‹นํ•˜๋Š” JavaScript ์ฝ”๋“œ๋ฅผ ์‚ฝ์ž…
176
+ analyze_button = f"""<button onclick="analyzeArticle('{article['link']}')">๋ถ„์„</button>"""
177
+ row = f"""
178
+ <tr>
179
+ <td>{article['index']}</td>
180
+ <td><a href="{article['link']}" target="_blank">{article['title']}</a></td>
181
+ <td>{article['channel']}</td>
182
+ <td>{article['time']}</td>
183
+ <td>{analyze_button}</td>
184
+ </tr>
185
+ """
186
+ table_html += row
187
+ table_html += "</table>"
188
+
189
+ # JavaScript ํ•จ์ˆ˜ ์ •์˜
190
+ js_code = """
191
+ <script>
192
+ function analyzeArticle(url) {
193
+ // Gradio์˜ handle_function์„ ์‚ฌ์šฉํ•˜์—ฌ Python ํ•จ์ˆ˜ ํ˜ธ์ถœ
194
+ gradioApp().querySelector('#article_url_input textarea').value = url;
195
+ gradioApp().querySelector('#analyze_button').click();
196
+ }
197
+ </script>
198
+ """
199
 
200
+ full_html = table_html + js_code
 
201
 
202
+ return full_html, gr.update(visible=True, value="") # summary_output ์ดˆ๊ธฐํ™”
 
203
 
204
+ def analyze_article(url):
205
+ summary = summarize_article(url)
206
+ return summary
207
+
208
+ article_url_input = gr.Textbox(visible=False, elem_id="article_url_input")
209
+ analyze_button = gr.Button("๋ถ„์„", visible=False, elem_id="analyze_button")
210
 
211
  search_button.click(
212
  search_and_display,
213
  inputs=[query, country],
214
+ outputs=[output_table, summary_output]
215
+ )
216
+
217
+ analyze_button.click(
218
+ analyze_article,
219
+ inputs=[article_url_input],
220
+ outputs=[summary_output]
221
  )
222
 
223
  iface.launch(auth=("gini", "pick"))