Spaces:
openfree
/
Running on CPU Upgrade

seawolf2357 commited on
Commit
5528b6f
ยท
verified ยท
1 Parent(s): 71d4a4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -173
app.py CHANGED
@@ -3,66 +3,16 @@ import requests
3
  import json
4
  import os
5
  from datetime import datetime, timedelta
 
6
  from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
7
 
 
 
 
8
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
9
  API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
10
  HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
11
 
12
- # ๊ตญ๊ฐ€ ์ด๋ฆ„๊ณผ Google ๊ฒ€์ƒ‰์—์„œ ์‚ฌ์šฉํ•˜๋Š” ๊ตญ๊ฐ€ ์ฝ”๋“œ๋ฅผ ๋งคํ•‘
13
- COUNTRY_CODE_MAPPING = {
14
- "United States": "us",
15
- "United Kingdom": "uk",
16
- "Canada": "ca",
17
- "Australia": "au",
18
- "Germany": "de",
19
- "France": "fr",
20
- "Japan": "jp",
21
- "South Korea": "kr",
22
- "China": "cn",
23
- "India": "in",
24
- "Brazil": "br",
25
- "Mexico": "mx",
26
- "Russia": "ru",
27
- "Italy": "it",
28
- "Spain": "es",
29
- "Netherlands": "nl",
30
- "Sweden": "se",
31
- "Switzerland": "ch",
32
- "Norway": "no",
33
- "Denmark": "dk",
34
- "Finland": "fi",
35
- "Belgium": "be",
36
- "Austria": "at",
37
- "New Zealand": "nz",
38
- "Ireland": "ie",
39
- "Singapore": "sg",
40
- "Hong Kong": "hk",
41
- "Israel": "il",
42
- "United Arab Emirates": "ae",
43
- "Saudi Arabia": "sa",
44
- "South Africa": "za",
45
- "Turkey": "tr",
46
- "Egypt": "eg",
47
- "Poland": "pl",
48
- "Czech Republic": "cz",
49
- "Hungary": "hu",
50
- "Greece": "gr",
51
- "Portugal": "pt",
52
- "Argentina": "ar",
53
- "Chile": "cl",
54
- "Colombia": "co",
55
- "Peru": "pe",
56
- "Venezuela": "ve",
57
- "Thailand": "th",
58
- "Malaysia": "my",
59
- "Indonesia": "id",
60
- "Philippines": "ph",
61
- "Vietnam": "vn",
62
- "Pakistan": "pk",
63
- "Bangladesh": "bd"
64
- }
65
-
66
  MAJOR_COUNTRIES = [
67
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
68
  "France", "Japan", "South Korea", "China", "India",
@@ -76,7 +26,7 @@ MAJOR_COUNTRIES = [
76
  "Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
77
  ]
78
 
79
- def search_serphouse(query, country, page=1, num_result=100):
80
  url = "https://api.serphouse.com/serp/live"
81
 
82
  now = datetime.utcnow()
@@ -87,7 +37,7 @@ def search_serphouse(query, country, page=1, num_result=100):
87
  "data": {
88
  "q": query,
89
  "domain": "google.com",
90
- "loc": COUNTRY_CODE_MAPPING.get(country, "us"), # ์—ฌ๊ธฐ๊ฐ€ ๋ฌธ์ œ์˜ ์›์ธ์ž…๋‹ˆ๋‹ค
91
  "lang": "en",
92
  "device": "desktop",
93
  "serp_type": "news",
@@ -98,33 +48,40 @@ def search_serphouse(query, country, page=1, num_result=100):
98
  }
99
  }
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def format_results_from_raw(results):
102
  try:
103
- # ๊ฒฐ๊ณผ๊ฐ€ ๋ฌธ์ž์—ด์ธ ๊ฒฝ์šฐ (JSON ๋ฌธ์ž์—ด์ผ ์ˆ˜ ์žˆ์Œ)
104
- if isinstance(results, str):
105
- try:
106
- results = json.loads(results)
107
- except json.JSONDecodeError:
108
- return "Error: API ์‘๋‹ต์„ JSON์œผ๋กœ ํŒŒ์‹ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", []
109
 
110
  if not isinstance(results, dict):
111
- return f"Error: ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ๊ฒฐ๊ณผ ํ˜•์‹์ž…๋‹ˆ๋‹ค. ๋ฐ›์€ ํ˜•์‹: {type(results)}", []
112
-
113
- if "error" in results:
114
- return "Error: " + str(results["error"]), []
115
 
116
  # 'results' ํ‚ค ๋‚ด๋ถ€์˜ ๊ตฌ์กฐ ํ™•์ธ (์ค‘์ฒฉ๋œ 'results' ์ฒ˜๋ฆฌ)
117
  if 'results' in results:
118
  results_content = results['results']
119
- if isinstance(results_content, dict) and 'results' in results_content:
120
  results_content = results_content['results']
121
  # 'news' ํ‚ค ํ™•์ธ
122
  if 'news' in results_content:
123
  news_results = results_content['news']
124
  else:
125
  news_results = []
126
- elif isinstance(results_content, list):
127
- news_results = results_content
128
  else:
129
  news_results = []
130
  else:
@@ -144,7 +101,6 @@ def format_results_from_raw(results):
144
  image_url = result.get("img", result.get("thumbnail", ""))
145
 
146
  articles.append({
147
- "index": idx,
148
  "title": title,
149
  "link": link,
150
  "snippet": snippet,
@@ -162,7 +118,7 @@ def format_results_from_raw(results):
162
  def serphouse_search(query, country):
163
  # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
164
  page = 1
165
- num_result = 100
166
  results = search_serphouse(query, country, page, num_result)
167
  error_message, articles = format_results_from_raw(results)
168
  return error_message, articles
@@ -170,10 +126,19 @@ def serphouse_search(query, country):
170
  # LLM ์„ค์ •
171
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
172
 
173
- def summarize_article(title, snippet):
174
  try:
175
- # ๊ธฐ์‚ฌ ์ œ๋ชฉ๊ณผ ์Šค๋‹ˆํŽซ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์š”์•ฝ ์ƒ์„ฑ
176
- prompt = f"๋‹ค์Œ ๋‰ด์Šค ์ œ๋ชฉ๊ณผ ์š”์•ฝ์„ ๋ฐ”ํƒ•์œผ๋กœ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n์ œ๋ชฉ: {title}\n์š”์•ฝ: {snippet}"
 
 
 
 
 
 
 
 
 
177
  summary = hf_client.text_generation(prompt, max_new_tokens=500)
178
  return summary
179
  except Exception as e:
@@ -183,27 +148,11 @@ css = """
183
  footer {
184
  visibility: hidden;
185
  }
186
- /* ๋ถ„์„ ๋ฒ„ํŠผ ์Šคํƒ€์ผ ๊ฐœ์„  */
187
- .analyze-button {
188
- background-color: #4CAF50; /* Green */
189
- border: none;
190
- color: white;
191
- padding: 6px 12px;
192
- text-align: center;
193
- text-decoration: none;
194
- font-size: 14px;
195
- margin: 2px;
196
- cursor: pointer;
197
- border-radius: 4px;
198
- }
199
- .analyze-button:hover {
200
- background-color: #45a049;
201
- }
202
  """
203
 
204
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
205
- with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
206
- gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
207
 
208
  with gr.Column():
209
  with gr.Row():
@@ -211,85 +160,43 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
211
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
212
  search_button = gr.Button("๊ฒ€์ƒ‰")
213
 
214
- output_table = gr.HTML()
215
- summary_output = gr.Markdown(visible=False)
216
-
217
- def search_and_display(query, country):
218
- error_message, articles = serphouse_search(query, country)
219
- if error_message:
220
- return f"<p>{error_message}</p>", gr.update(visible=False)
221
- else:
222
- # ๊ธฐ์‚ฌ ๋ชฉ๋ก์„ HTML ํ…Œ์ด๋ธ”๋กœ ์ƒ์„ฑ
223
- table_html = """
224
- <table border='1' style='width:100%; text-align:left;'>
225
- <tr>
226
- <th>๋ฒˆํ˜ธ</th>
227
- <th>์ œ๋ชฉ</th>
228
- <th>์ถœ์ฒ˜</th>
229
- <th>์‹œ๊ฐ„</th>
230
- <th>๋ถ„์„</th>
231
- </tr>
232
- """
233
- for article in articles:
234
- # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ๋ฒ„ํŠผ์— ํ•ด๋‹นํ•˜๋Š” JavaScript ์ฝ”๋“œ๋ฅผ ์‚ฝ์ž…
235
- analyze_button = f"""<button class="analyze-button" onclick="analyzeArticle('{article['index']}')">๋ถ„์„</button>"""
236
- row = f"""
237
- <tr>
238
- <td>{article['index']}</td>
239
- <td><a href="{article['link']}" target="_blank">{article['title']}</a></td>
240
- <td>{article['channel']}</td>
241
- <td>{article['time']}</td>
242
- <td>{analyze_button}</td>
243
- </tr>
244
- """
245
- table_html += row
246
- table_html += "</table>"
247
-
248
- # JavaScript ํ•จ์ˆ˜ ์ •์˜
249
- js_code = """
250
- <script>
251
- function analyzeArticle(index) {
252
- // Gradio์˜ handleFunction์„ ์‚ฌ์šฉํ•˜์—ฌ Python ํ•จ์ˆ˜ ํ˜ธ์ถœ
253
- const articleData = JSON.parse(document.getElementById('articles_data').textContent);
254
- const selectedArticle = articleData.find(article => article.index == index);
255
- if (selectedArticle) {
256
- gradioApp().querySelector('#article_title textarea').value = selectedArticle.title;
257
- gradioApp().querySelector('#article_snippet textarea').value = selectedArticle.snippet;
258
- gradioApp().querySelector('#analyze_button').click();
259
- }
260
- }
261
- </script>
262
- """
263
-
264
- # ๊ธฐ์‚ฌ ๋ฐ์ดํ„ฐ๋ฅผ JSON์œผ๋กœ ์ €์žฅํ•˜์—ฌ JavaScript์—์„œ ์ ‘๊ทผ ๊ฐ€๋Šฅํ•˜๋„๋ก ํ•จ
265
- articles_json = json.dumps(articles)
266
-
267
- full_html = f"""
268
- <div id="articles_data" style="display:none;">{articles_json}</div>
269
- {table_html}
270
- {js_code}
271
- """
272
-
273
- return full_html, gr.update(visible=True, value="") # summary_output ์ดˆ๊ธฐํ™”
274
-
275
- def analyze_article(title, snippet):
276
- summary = summarize_article(title, snippet)
277
- return summary
278
-
279
- article_title = gr.Textbox(visible=False, elem_id="article_title")
280
- article_snippet = gr.Textbox(visible=False, elem_id="article_snippet")
281
- analyze_button = gr.Button("๋ถ„์„", visible=False, elem_id="analyze_button")
282
-
283
- search_button.click(
284
- search_and_display,
285
- inputs=[query, country],
286
- outputs=[output_table, summary_output]
287
- )
288
 
289
- analyze_button.click(
290
- analyze_article,
291
- inputs=[article_title, article_snippet],
292
- outputs=[summary_output]
293
- )
294
-
295
- iface.launch(auth=("gini", "pick"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import json
4
  import os
5
  from datetime import datetime, timedelta
6
+ from bs4 import BeautifulSoup # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
7
  from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
8
 
9
+ # ํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์„ค์น˜ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ฃผ์„์„ ์ œ๊ฑฐํ•˜๊ณ  ์‹คํ–‰)
10
+ # !pip install bs4 huggingface_hub
11
+
12
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
13
  API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
14
  HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  MAJOR_COUNTRIES = [
17
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
18
  "France", "Japan", "South Korea", "China", "India",
 
26
  "Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
27
  ]
28
 
29
+ def search_serphouse(query, country, page=1, num_result=10):
30
  url = "https://api.serphouse.com/serp/live"
31
 
32
  now = datetime.utcnow()
 
37
  "data": {
38
  "q": query,
39
  "domain": "google.com",
40
+ "loc": country,
41
  "lang": "en",
42
  "device": "desktop",
43
  "serp_type": "news",
 
48
  }
49
  }
50
 
51
+ headers = {
52
+ "accept": "application/json",
53
+ "content-type": "application/json",
54
+ "authorization": f"Bearer {API_KEY}"
55
+ }
56
+
57
+ try:
58
+ response = requests.post(url, json=payload, headers=headers)
59
+ response.raise_for_status()
60
+ return response.json()
61
+ except requests.RequestException as e:
62
+ error_msg = f"Error: {str(e)}"
63
+ if response.text:
64
+ error_msg += f"\nResponse content: {response.text}"
65
+ return {"error": error_msg}
66
+
67
  def format_results_from_raw(results):
68
  try:
69
+ if isinstance(results, dict) and "error" in results:
70
+ return "Error: " + results["error"], []
 
 
 
 
71
 
72
  if not isinstance(results, dict):
73
+ raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
 
 
 
74
 
75
  # 'results' ํ‚ค ๋‚ด๋ถ€์˜ ๊ตฌ์กฐ ํ™•์ธ (์ค‘์ฒฉ๋œ 'results' ์ฒ˜๋ฆฌ)
76
  if 'results' in results:
77
  results_content = results['results']
78
+ if 'results' in results_content:
79
  results_content = results_content['results']
80
  # 'news' ํ‚ค ํ™•์ธ
81
  if 'news' in results_content:
82
  news_results = results_content['news']
83
  else:
84
  news_results = []
 
 
85
  else:
86
  news_results = []
87
  else:
 
101
  image_url = result.get("img", result.get("thumbnail", ""))
102
 
103
  articles.append({
 
104
  "title": title,
105
  "link": link,
106
  "snippet": snippet,
 
118
  def serphouse_search(query, country):
119
  # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
120
  page = 1
121
+ num_result = 10
122
  results = search_serphouse(query, country, page, num_result)
123
  error_message, articles = format_results_from_raw(results)
124
  return error_message, articles
 
126
  # LLM ์„ค์ •
127
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
128
 
129
+ def summarize_article(url):
130
  try:
131
+ # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
132
+ response = requests.get(url)
133
+ response.raise_for_status()
134
+ soup = BeautifulSoup(response.text, 'html.parser')
135
+ # ๋ชจ๋“  ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
136
+ text = ' '.join([p.get_text() for p in soup.find_all('p')])
137
+ if not text.strip():
138
+ return "๊ธฐ์‚ฌ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
139
+
140
+ # ์š”์•ฝ ์ƒ์„ฑ
141
+ prompt = f"๋‹ค์Œ ์˜์–ด ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n{text}"
142
  summary = hf_client.text_generation(prompt, max_new_tokens=500)
143
  return summary
144
  except Exception as e:
 
148
  footer {
149
  visibility: hidden;
150
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  """
152
 
153
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
154
+ with gr.Blocks(css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
155
+ gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 10๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
156
 
157
  with gr.Column():
158
  with gr.Row():
 
160
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
161
  search_button = gr.Button("๊ฒ€์ƒ‰")
162
 
163
+ article_outputs = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ def search_and_display(query, country):
166
+ error_message, articles = serphouse_search(query, country)
167
+ if error_message:
168
+ return gr.update(visible=True, value=error_message)
169
+ else:
170
+ # ๊ธฐ์กด ์ถœ๋ ฅ๋ฌผ ์ œ๊ฑฐ
171
+ for components in article_outputs:
172
+ for component in components:
173
+ component.visible = False
174
+ article_outputs.clear()
175
+
176
+ # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ์ถœ๋ ฅ ์ƒ์„ฑ
177
+ for article in articles:
178
+ with gr.Column():
179
+ title = gr.Markdown(f"### [{article['title']}]({article['link']})")
180
+ image = gr.Image(value=article['image_url'], visible=bool(article['image_url']), shape=(200, 150))
181
+ snippet = gr.Markdown(f"**์š”์•ฝ:** {article['snippet']}")
182
+ info = gr.Markdown(f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}")
183
+ analyze_button = gr.Button("๋ถ„์„")
184
+ summary_output = gr.Markdown(visible=False)
185
+
186
+ def analyze_article(url):
187
+ summary = summarize_article(url)
188
+ summary_output.update(value=summary, visible=True)
189
+
190
+ analyze_button.click(analyze_article, inputs=gr.State(article['link']), outputs=summary_output)
191
+
192
+ article_outputs.append([title, image, snippet, info, analyze_button, summary_output])
193
+
194
+ return gr.update()
195
+
196
+ search_button.click(
197
+ search_and_display,
198
+ inputs=[query, country],
199
+ outputs=[]
200
+ )
201
+
202
+ iface.launch(auth=("gini", "pick"))