Spaces:
openfree
/
Running on CPU Upgrade

seawolf2357 commited on
Commit
f10671e
ยท
verified ยท
1 Parent(s): c763389

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -60
app.py CHANGED
@@ -3,13 +3,9 @@ import requests
3
  import json
4
  import os
5
  from datetime import datetime, timedelta
6
- from bs4 import BeautifulSoup # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
7
  from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
8
 
9
- # ํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์„ค์น˜ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ฃผ์„์„ ์ œ๊ฑฐํ•˜๊ณ  ์‹คํ–‰)
10
- # !pip install bs4 huggingface_hub
11
-
12
- # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
13
  API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
14
  HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
15
 
@@ -37,7 +33,7 @@ def search_serphouse(query, country, page=1, num_result=10):
37
  "data": {
38
  "q": query,
39
  "domain": "google.com",
40
- "loc": country,
41
  "lang": "en",
42
  "device": "desktop",
43
  "serp_type": "news",
@@ -60,7 +56,7 @@ def search_serphouse(query, country, page=1, num_result=10):
60
  return response.json()
61
  except requests.RequestException as e:
62
  error_msg = f"Error: {str(e)}"
63
- if response.text:
64
  error_msg += f"\nResponse content: {response.text}"
65
  return {"error": error_msg}
66
 
@@ -72,12 +68,11 @@ def format_results_from_raw(results):
72
  if not isinstance(results, dict):
73
  raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
74
 
75
- # 'results' ํ‚ค ๋‚ด๋ถ€์˜ ๊ตฌ์กฐ ํ™•์ธ (์ค‘์ฒฉ๋œ 'results' ์ฒ˜๋ฆฌ)
76
  if 'results' in results:
77
  results_content = results['results']
78
  if 'results' in results_content:
79
  results_content = results_content['results']
80
- # 'news' ํ‚ค ํ™•์ธ
81
  if 'news' in results_content:
82
  news_results = results_content['news']
83
  else:
@@ -101,6 +96,7 @@ def format_results_from_raw(results):
101
  image_url = result.get("img", result.get("thumbnail", ""))
102
 
103
  articles.append({
 
104
  "title": title,
105
  "link": link,
106
  "snippet": snippet,
@@ -116,7 +112,6 @@ def format_results_from_raw(results):
116
  return "Error: " + error_message, []
117
 
118
  def serphouse_search(query, country):
119
- # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
120
  page = 1
121
  num_result = 10
122
  results = search_serphouse(query, country, page, num_result)
@@ -126,19 +121,10 @@ def serphouse_search(query, country):
126
  # LLM ์„ค์ •
127
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
128
 
129
- def summarize_article(url):
130
  try:
131
- # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
132
- response = requests.get(url)
133
- response.raise_for_status()
134
- soup = BeautifulSoup(response.text, 'html.parser')
135
- # ๋ชจ๋“  ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
136
- text = ' '.join([p.get_text() for p in soup.find_all('p')])
137
- if not text.strip():
138
- return "๊ธฐ์‚ฌ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
139
-
140
- # ์š”์•ฝ ์ƒ์„ฑ
141
- prompt = f"๋‹ค์Œ ์˜์–ด ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n{text}"
142
  summary = hf_client.text_generation(prompt, max_new_tokens=500)
143
  return summary
144
  except Exception as e:
@@ -160,43 +146,112 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
160
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
161
  search_button = gr.Button("๊ฒ€์ƒ‰")
162
 
163
- article_outputs = []
164
-
165
- def search_and_display(query, country):
166
- error_message, articles = serphouse_search(query, country)
167
- if error_message:
168
- return gr.update(visible=True, value=error_message)
169
- else:
170
- # ๊ธฐ์กด ์ถœ๋ ฅ๋ฌผ ์ œ๊ฑฐ
171
- for components in article_outputs:
172
- for component in components:
173
- component.visible = False
174
- article_outputs.clear()
175
-
176
- # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ์ถœ๋ ฅ ์ƒ์„ฑ
177
- for article in articles:
178
- with gr.Column():
179
- title = gr.Markdown(f"### [{article['title']}]({article['link']})")
180
- image = gr.Image(value=article['image_url'], visible=bool(article['image_url']), shape=(200, 150))
181
- snippet = gr.Markdown(f"**์š”์•ฝ:** {article['snippet']}")
182
- info = gr.Markdown(f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}")
183
- analyze_button = gr.Button("๋ถ„์„")
184
- summary_output = gr.Markdown(visible=False)
185
-
186
- def analyze_article(url):
187
- summary = summarize_article(url)
188
- summary_output.update(value=summary, visible=True)
189
-
190
- analyze_button.click(analyze_article, inputs=gr.State(article['link']), outputs=summary_output)
191
-
192
- article_outputs.append([title, image, snippet, info, analyze_button, summary_output])
193
-
194
- return gr.update()
195
-
196
- search_button.click(
197
- search_and_display,
198
- inputs=[query, country],
199
- outputs=[]
200
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  iface.launch(auth=("gini", "pick"))
 
3
  import json
4
  import os
5
  from datetime import datetime, timedelta
 
6
  from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
7
 
8
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ
 
 
 
9
  API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
10
  HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
11
 
 
33
  "data": {
34
  "q": query,
35
  "domain": "google.com",
36
+ "loc": country, # ๊ตญ๊ฐ€ ์ด๋ฆ„์„ ์ง์ ‘ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
37
  "lang": "en",
38
  "device": "desktop",
39
  "serp_type": "news",
 
56
  return response.json()
57
  except requests.RequestException as e:
58
  error_msg = f"Error: {str(e)}"
59
+ if hasattr(response, 'text'):
60
  error_msg += f"\nResponse content: {response.text}"
61
  return {"error": error_msg}
62
 
 
68
  if not isinstance(results, dict):
69
  raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
70
 
71
+ # 'results' ํ‚ค ๋‚ด๋ถ€์˜ ๊ตฌ์กฐ ํ™•์ธ
72
  if 'results' in results:
73
  results_content = results['results']
74
  if 'results' in results_content:
75
  results_content = results_content['results']
 
76
  if 'news' in results_content:
77
  news_results = results_content['news']
78
  else:
 
96
  image_url = result.get("img", result.get("thumbnail", ""))
97
 
98
  articles.append({
99
+ "index": idx,
100
  "title": title,
101
  "link": link,
102
  "snippet": snippet,
 
112
  return "Error: " + error_message, []
113
 
114
  def serphouse_search(query, country):
 
115
  page = 1
116
  num_result = 10
117
  results = search_serphouse(query, country, page, num_result)
 
121
  # LLM ์„ค์ •
122
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
123
 
124
+ def summarize_article(title, snippet):
125
  try:
126
+ # ๊ธฐ์‚ฌ ์ œ๋ชฉ๊ณผ ์Šค๋‹ˆํŽซ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์š”์•ฝ ์ƒ์„ฑ
127
+ prompt = f"๋‹ค์Œ ๋‰ด์Šค ์ œ๋ชฉ๊ณผ ์š”์•ฝ์„ ๋ฐ”ํƒ•์œผ๋กœ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n์ œ๋ชฉ: {title}\n์š”์•ฝ: {snippet}"
 
 
 
 
 
 
 
 
 
128
  summary = hf_client.text_generation(prompt, max_new_tokens=500)
129
  return summary
130
  except Exception as e:
 
146
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
147
  search_button = gr.Button("๊ฒ€์ƒ‰")
148
 
149
+ # ์ตœ๋Œ€ 10๊ฐœ์˜ ๊ธฐ์‚ฌ์— ๋Œ€ํ•œ ์ปดํฌ๋„ŒํŠธ๋ฅผ ๋ฏธ๋ฆฌ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
150
+ article_components = []
151
+ for i in range(10):
152
+ with gr.Group(visible=False) as article_group:
153
+ title = gr.Markdown()
154
+ image = gr.Image(width=200, height=150)
155
+ snippet = gr.Markdown()
156
+ info = gr.Markdown()
157
+ analyze_button = gr.Button("๋ถ„์„")
158
+ summary_output = gr.Markdown(visible=False)
159
+
160
+ article_components.append({
161
+ 'group': article_group,
162
+ 'title': title,
163
+ 'image': image,
164
+ 'snippet': snippet,
165
+ 'info': info,
166
+ 'analyze_button': analyze_button,
167
+ 'summary_output': summary_output,
168
+ })
169
+
170
+ def search_and_display(query, country):
171
+ error_message, articles = serphouse_search(query, country)
172
+ outputs = []
173
+ if error_message:
174
+ outputs.append(gr.update(value=error_message, visible=True))
175
+ # ๋‚˜๋จธ์ง€ ์ปดํฌ๋„ŒํŠธ ์ˆจ๊ธฐ๊ธฐ
176
+ for comp in article_components:
177
+ outputs.extend([
178
+ gr.update(visible=False), # group
179
+ gr.update(), # title
180
+ gr.update(), # image
181
+ gr.update(), # snippet
182
+ gr.update(), # info
183
+ gr.update(), # analyze_button
184
+ gr.update(visible=False), # summary_output
185
+ ])
186
+ return outputs
187
+ else:
188
+ # ๊ธฐ์‚ฌ ์ปดํฌ๋„ŒํŠธ ์—…๋ฐ์ดํŠธ
189
+ for idx, comp in enumerate(article_components):
190
+ if idx < len(articles):
191
+ article = articles[idx]
192
+ comp['group'].visible = True
193
+ comp['title'].value = f"### [{article['title']}]({article['link']})"
194
+ if article['image_url'] and not article['image_url'].startswith("data:image"):
195
+ comp['image'].value = article['image_url']
196
+ comp['image'].visible = True
197
+ else:
198
+ comp['image'].visible = False
199
+ comp['snippet'].value = f"**์š”์•ฝ:** {article['snippet']}"
200
+ comp['info'].value = f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}"
201
+ comp['summary_output'].visible = False # ์ดˆ๊ธฐ์—๋Š” ์š”์•ฝ ์ˆจ๊น€
202
+
203
+ # ๋ถ„์„ ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ ์ •์˜
204
+ def create_analyze_function(article_title, article_snippet):
205
+ def analyze_article():
206
+ summary = summarize_article(article_title, article_snippet)
207
+ return gr.update(value=summary, visible=True)
208
+ return analyze_article
209
+
210
+ comp['analyze_button'].click(
211
+ create_analyze_function(article['title'], article['snippet']),
212
+ inputs=[],
213
+ outputs=comp['summary_output']
214
+ )
215
+
216
+ outputs.extend([
217
+ gr.update(visible=True), # group
218
+ gr.update(), # title
219
+ gr.update(), # image
220
+ gr.update(), # snippet
221
+ gr.update(), # info
222
+ gr.update(), # analyze_button
223
+ gr.update(visible=False), # summary_output
224
+ ])
225
+ else:
226
+ # ๋‚จ์€ ์ปดํฌ๋„ŒํŠธ ์ˆจ๊ธฐ๊ธฐ
227
+ comp['group'].visible = False
228
+ outputs.extend([
229
+ gr.update(visible=False), # group
230
+ gr.update(), # title
231
+ gr.update(), # image
232
+ gr.update(), # snippet
233
+ gr.update(), # info
234
+ gr.update(), # analyze_button
235
+ gr.update(visible=False), # summary_output
236
+ ])
237
+ return outputs
238
+
239
+ # search_button ํด๋ฆญ ์‹œ ์—…๋ฐ์ดํŠธ๋  ์ถœ๋ ฅ ์ปดํฌ๋„ŒํŠธ ๋ชฉ๋ก ์ƒ์„ฑ
240
+ search_outputs = []
241
+ search_outputs.append(gr.Markdown(visible=False)) # ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ์šฉ
242
+ for comp in article_components:
243
+ search_outputs.append(comp['group'])
244
+ search_outputs.append(comp['title'])
245
+ search_outputs.append(comp['image'])
246
+ search_outputs.append(comp['snippet'])
247
+ search_outputs.append(comp['info'])
248
+ search_outputs.append(comp['analyze_button'])
249
+ search_outputs.append(comp['summary_output'])
250
+
251
+ search_button.click(
252
+ search_and_display,
253
+ inputs=[query, country],
254
+ outputs=search_outputs
255
+ )
256
 
257
  iface.launch(auth=("gini", "pick"))