openfree commited on
Commit
501fd62
·
verified ·
1 Parent(s): 6f4289e

Delete app-korea.py

Browse files
Files changed (1) hide show
  1. app-korea.py +0 -613
app-korea.py DELETED
@@ -1,613 +0,0 @@
1
- import gradio as gr
2
- import requests
3
- import json
4
- import os
5
- from datetime import datetime, timedelta
6
- from huggingface_hub import InferenceClient
7
-
8
- MAX_COUNTRY_RESULTS = 100 # 국가별 최대 결과 수
9
- MAX_GLOBAL_RESULTS = 1000 # 전세계 최대 결과 수
10
-
11
- def create_article_components(max_results):
12
- article_components = []
13
- for i in range(max_results):
14
- with gr.Group(visible=False) as article_group:
15
- title = gr.Markdown()
16
- image = gr.Image(width=200, height=150)
17
- snippet = gr.Markdown()
18
- info = gr.Markdown()
19
-
20
- article_components.append({
21
- 'group': article_group,
22
- 'title': title,
23
- 'image': image,
24
- 'snippet': snippet,
25
- 'info': info,
26
- 'index': i,
27
- })
28
- return article_components
29
-
30
- API_KEY = os.getenv("SERPHOUSE_API_KEY")
31
- # hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
32
-
33
- # 국가별 언어 코드 매핑
34
- COUNTRY_LANGUAGES = {
35
- "United States": "en",
36
- "United Kingdom": "en",
37
- "Taiwan": "zh-TW", # 대만어(번체 중국어)
38
- "Canada": "en",
39
- "Australia": "en",
40
- "Germany": "de",
41
- "France": "fr",
42
- "Japan": "ja",
43
- "South Korea": "ko",
44
- "China": "zh",
45
- "India": "hi",
46
- "Brazil": "pt",
47
- "Mexico": "es",
48
- "Russia": "ru",
49
- "Italy": "it",
50
- "Spain": "es",
51
- "Netherlands": "nl",
52
- "Singapore": "en",
53
- "Hong Kong": "zh-HK",
54
- "Indonesia": "id",
55
- "Malaysia": "ms",
56
- "Philippines": "tl",
57
- "Thailand": "th",
58
- "Vietnam": "vi",
59
- "Belgium": "nl",
60
- "Denmark": "da",
61
- "Finland": "fi",
62
- "Ireland": "en",
63
- "Norway": "no",
64
- "Poland": "pl",
65
- "Sweden": "sv",
66
- "Switzerland": "de",
67
- "Austria": "de",
68
- "Czech Republic": "cs",
69
- "Greece": "el",
70
- "Hungary": "hu",
71
- "Portugal": "pt",
72
- "Romania": "ro",
73
- "Turkey": "tr",
74
- "Israel": "he",
75
- "Saudi Arabia": "ar",
76
- "United Arab Emirates": "ar",
77
- "South Africa": "en",
78
- "Argentina": "es",
79
- "Chile": "es",
80
- "Colombia": "es",
81
- "Peru": "es",
82
- "Venezuela": "es",
83
- "New Zealand": "en",
84
- "Bangladesh": "bn",
85
- "Pakistan": "ur",
86
- "Egypt": "ar",
87
- "Morocco": "ar",
88
- "Nigeria": "en",
89
- "Kenya": "sw",
90
- "Ukraine": "uk",
91
- "Croatia": "hr",
92
- "Slovakia": "sk",
93
- "Bulgaria": "bg",
94
- "Serbia": "sr",
95
- "Estonia": "et",
96
- "Latvia": "lv",
97
- "Lithuania": "lt",
98
- "Slovenia": "sl",
99
- "Luxembourg": "fr",
100
- "Malta": "mt",
101
- "Cyprus": "el",
102
- "Iceland": "is"
103
- }
104
-
105
- COUNTRY_LOCATIONS = {
106
- "United States": "United States",
107
- "United Kingdom": "United Kingdom",
108
- "Taiwan": "Taiwan", # 국가명 사용
109
- "Canada": "Canada",
110
- "Australia": "Australia",
111
- "Germany": "Germany",
112
- "France": "France",
113
- "Japan": "Japan",
114
- "South Korea": "South Korea",
115
- "China": "China",
116
- "India": "India",
117
- "Brazil": "Brazil",
118
- "Mexico": "Mexico",
119
- "Russia": "Russia",
120
- "Italy": "Italy",
121
- "Spain": "Spain",
122
- "Netherlands": "Netherlands",
123
- "Singapore": "Singapore",
124
- "Hong Kong": "Hong Kong",
125
- "Indonesia": "Indonesia",
126
- "Malaysia": "Malaysia",
127
- "Philippines": "Philippines",
128
- "Thailand": "Thailand",
129
- "Vietnam": "Vietnam",
130
- "Belgium": "Belgium",
131
- "Denmark": "Denmark",
132
- "Finland": "Finland",
133
- "Ireland": "Ireland",
134
- "Norway": "Norway",
135
- "Poland": "Poland",
136
- "Sweden": "Sweden",
137
- "Switzerland": "Switzerland",
138
- "Austria": "Austria",
139
- "Czech Republic": "Czech Republic",
140
- "Greece": "Greece",
141
- "Hungary": "Hungary",
142
- "Portugal": "Portugal",
143
- "Romania": "Romania",
144
- "Turkey": "Turkey",
145
- "Israel": "Israel",
146
- "Saudi Arabia": "Saudi Arabia",
147
- "United Arab Emirates": "United Arab Emirates",
148
- "South Africa": "South Africa",
149
- "Argentina": "Argentina",
150
- "Chile": "Chile",
151
- "Colombia": "Colombia",
152
- "Peru": "Peru",
153
- "Venezuela": "Venezuela",
154
- "New Zealand": "New Zealand",
155
- "Bangladesh": "Bangladesh",
156
- "Pakistan": "Pakistan",
157
- "Egypt": "Egypt",
158
- "Morocco": "Morocco",
159
- "Nigeria": "Nigeria",
160
- "Kenya": "Kenya",
161
- "Ukraine": "Ukraine",
162
- "Croatia": "Croatia",
163
- "Slovakia": "Slovakia",
164
- "Bulgaria": "Bulgaria",
165
- "Serbia": "Serbia",
166
- "Estonia": "Estonia",
167
- "Latvia": "Latvia",
168
- "Lithuania": "Lithuania",
169
- "Slovenia": "Slovenia",
170
- "Luxembourg": "Luxembourg",
171
- "Malta": "Malta",
172
- "Cyprus": "Cyprus",
173
- "Iceland": "Iceland"
174
- }
175
-
176
- MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys())
177
-
178
- def translate_query(query, country):
179
- try:
180
- # 영어 입력 확인
181
- if is_english(query):
182
- print(f"영어 검색어 감지 - 원본 사용: {query}")
183
- return query
184
-
185
- # 선택된 국가가 번역 지원 국가인 경우
186
- if country in COUNTRY_LANGUAGES:
187
- # South Korea 선택시 한글 입력은 그대로 사용
188
- if country == "South Korea":
189
- print(f"한국 선택 - 원본 사용: {query}")
190
- return query
191
-
192
- target_lang = COUNTRY_LANGUAGES[country]
193
- print(f"번역 시도: {query} -> {country}({target_lang})")
194
-
195
- url = f"https://translate.googleapis.com/translate_a/single"
196
- params = {
197
- "client": "gtx",
198
- "sl": "auto",
199
- "tl": target_lang,
200
- "dt": "t",
201
- "q": query
202
- }
203
-
204
- response = requests.get(url, params=params)
205
- translated_text = response.json()[0][0][0]
206
- print(f"번역 완료: {query} -> {translated_text} ({country})")
207
- return translated_text
208
-
209
- return query
210
-
211
- except Exception as e:
212
- print(f"번역 오류: {str(e)}")
213
- return query
214
-
215
- def translate_to_korean(text):
216
- try:
217
- url = "https://translate.googleapis.com/translate_a/single"
218
- params = {
219
- "client": "gtx",
220
- "sl": "auto",
221
- "tl": "ko",
222
- "dt": "t",
223
- "q": text
224
- }
225
-
226
- response = requests.get(url, params=params)
227
- translated_text = response.json()[0][0][0]
228
- return translated_text
229
- except Exception as e:
230
- print(f"한글 번역 오류: {str(e)}")
231
- return text
232
-
233
- def is_english(text):
234
- return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
235
-
236
- def is_korean(text):
237
- return any('\uAC00' <= char <= '\uD7A3' for char in text)
238
-
239
- def search_serphouse(query, country, page=1, num_result=10):
240
- url = "https://api.serphouse.com/serp/live"
241
-
242
- now = datetime.utcnow()
243
- yesterday = now - timedelta(days=1)
244
- date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
245
-
246
- translated_query = translate_query(query, country)
247
- print(f"Original query: {query}")
248
- print(f"Translated query: {translated_query}")
249
-
250
- payload = {
251
- "data": {
252
- "q": translated_query,
253
- "domain": "google.com",
254
- "loc": COUNTRY_LOCATIONS.get(country, "United States"),
255
- "lang": COUNTRY_LANGUAGES.get(country, "en"),
256
- "device": "desktop",
257
- "serp_type": "news",
258
- "page": "1",
259
- "num": "10",
260
- "date_range": date_range,
261
- "sort_by": "date"
262
- }
263
- }
264
-
265
- headers = {
266
- "accept": "application/json",
267
- "content-type": "application/json",
268
- "authorization": f"Bearer {API_KEY}"
269
- }
270
-
271
- try:
272
- response = requests.post(url, json=payload, headers=headers)
273
- print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False))
274
- print("Response status:", response.status_code)
275
-
276
- response.raise_for_status()
277
- return {"results": response.json(), "translated_query": translated_query}
278
- except requests.RequestException as e:
279
- return {"error": f"Error: {str(e)}", "translated_query": query}
280
-
281
- def format_results_from_raw(response_data):
282
- if "error" in response_data:
283
- return "Error: " + response_data["error"], []
284
-
285
- try:
286
- results = response_data["results"]
287
- translated_query = response_data["translated_query"]
288
-
289
- news_results = results.get('results', {}).get('results', {}).get('news', [])
290
- if not news_results:
291
- return "검색 결과가 없습니다.", []
292
-
293
- articles = []
294
- for idx, result in enumerate(news_results, 1):
295
- articles.append({
296
- "index": idx,
297
- "title": result.get("title", "제목 없음"),
298
- "link": result.get("url", result.get("link", "#")),
299
- "snippet": result.get("snippet", "내용 없음"),
300
- "channel": result.get("channel", result.get("source", "알 수 없음")),
301
- "time": result.get("time", result.get("date", "알 수 없는 시간")),
302
- "image_url": result.get("img", result.get("thumbnail", "")),
303
- "translated_query": translated_query
304
- })
305
- return "", articles
306
- except Exception as e:
307
- return f"결과 처리 중 오류 발생: {str(e)}", []
308
-
309
- def serphouse_search(query, country):
310
- response_data = search_serphouse(query, country)
311
- return format_results_from_raw(response_data)
312
-
313
- css = """
314
- footer {visibility: hidden;}
315
- #status_area {
316
- background: rgba(255, 255, 255, 0.9); /* 약간 투명한 흰색 배경 */
317
- padding: 15px;
318
- border-bottom: 1px solid #ddd;
319
- margin-bottom: 20px;
320
- box-shadow: 0 2px 5px rgba(0,0,0,0.1); /* 부드러운 그림자 효과 */
321
- }
322
- #results_area {
323
- padding: 10px;
324
- margin-top: 10px;
325
- }
326
- /* 탭 스타일 개선 */
327
- .tabs {
328
- border-bottom: 2px solid #ddd !important;
329
- margin-bottom: 20px !important;
330
- }
331
- .tab-nav {
332
- border-bottom: none !important;
333
- margin-bottom: 0 !important;
334
- }
335
- .tab-nav button {
336
- font-weight: bold !important;
337
- padding: 10px 20px !important;
338
- }
339
- .tab-nav button.selected {
340
- border-bottom: 2px solid #1f77b4 !important; /* 선택된 탭 강조 */
341
- color: #1f77b4 !important;
342
- }
343
- /* 검색 상태 메시지 스타일 */
344
- #status_area .markdown-text {
345
- font-size: 1.1em;
346
- color: #2c3e50;
347
- padding: 10px 0;
348
- }
349
- /* 검색 결과 컨테이너 스타일 */
350
- .group {
351
- border: 1px solid #eee;
352
- padding: 15px;
353
- margin-bottom: 15px;
354
- border-radius: 5px;
355
- background: white;
356
- }
357
- /* 검색 버튼 스타일 */
358
- .primary-btn {
359
- background: #1f77b4 !important;
360
- border: none !important;
361
- }
362
- /* 검색어 입력창 스타일 */
363
- .textbox {
364
- border: 1px solid #ddd !important;
365
- border-radius: 4px !important;
366
- }
367
- """
368
-
369
- with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface:
370
- with gr.Tabs():
371
- # 국가별 탭
372
- with gr.Tab("국가별"):
373
- gr.Markdown("검색어를 입력하고 원하는 국가(68개국)를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.")
374
- gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색")
375
-
376
- with gr.Column():
377
- with gr.Row():
378
- query = gr.Textbox(label="검색어")
379
- country = gr.Dropdown(MAJOR_COUNTRIES, label="국가", value="South Korea")
380
-
381
- status_message = gr.Markdown("", visible=True)
382
- translated_query_display = gr.Markdown(visible=False)
383
- search_button = gr.Button("검색", variant="primary")
384
-
385
- progress = gr.Progress()
386
- articles_state = gr.State([])
387
-
388
- article_components = []
389
- for i in range(100):
390
- with gr.Group(visible=False) as article_group:
391
- title = gr.Markdown()
392
- image = gr.Image(width=200, height=150)
393
- snippet = gr.Markdown()
394
- info = gr.Markdown()
395
-
396
- article_components.append({
397
- 'group': article_group,
398
- 'title': title,
399
- 'image': image,
400
- 'snippet': snippet,
401
- 'info': info,
402
- 'index': i,
403
- })
404
-
405
- with gr.Tab("전세계"):
406
- gr.Markdown("검색어를 입력하면 68개국 전체에 대해 국가별로 구분하여 24시간 이내 뉴스가 최대 1000개 순차 출력됩니다.")
407
- gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색")
408
-
409
- with gr.Column():
410
- # 상단 영역
411
- with gr.Column(elem_id="status_area"):
412
- with gr.Row():
413
- query_global = gr.Textbox(label="검색어")
414
- search_button_global = gr.Button("전세계 검색", variant="primary")
415
-
416
- status_message_global = gr.Markdown("")
417
- translated_query_display_global = gr.Markdown("")
418
-
419
- # 결과 출력 영역
420
- with gr.Column(elem_id="results_area"):
421
- articles_state_global = gr.State([])
422
-
423
- global_article_components = []
424
- for i in range(1000):
425
- with gr.Group(visible=False) as article_group:
426
- title = gr.Markdown()
427
- image = gr.Image(width=200, height=150)
428
- snippet = gr.Markdown()
429
- info = gr.Markdown()
430
-
431
- global_article_components.append({
432
- 'group': article_group,
433
- 'title': title,
434
- 'image': image,
435
- 'snippet': snippet,
436
- 'info': info,
437
- 'index': i,
438
- })
439
-
440
- def search_and_display(query, country, articles_state, progress=gr.Progress()):
441
- status_msg = "검색을 진행중입니다. 잠시만 기다리세요..."
442
-
443
- progress(0, desc="검색어 번역 중...")
444
- translated_query = translate_query(query, country)
445
- translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}"
446
-
447
- progress(0.2, desc="검색 시작...")
448
- error_message, articles = serphouse_search(query, country)
449
- progress(0.5, desc="결과 처리 중...")
450
-
451
- outputs = []
452
- outputs.append(gr.update(value=status_msg, visible=True))
453
- outputs.append(gr.update(value=translated_display, visible=True))
454
-
455
- if error_message:
456
- outputs.append(gr.update(value=error_message, visible=True))
457
- for comp in article_components:
458
- outputs.extend([
459
- gr.update(visible=False), gr.update(), gr.update(),
460
- gr.update(), gr.update()
461
- ])
462
- articles_state = []
463
- else:
464
- outputs.append(gr.update(value="", visible=False))
465
- total_articles = len(articles)
466
- for idx, comp in enumerate(article_components):
467
- progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}")
468
- if idx < len(articles):
469
- article = articles[idx]
470
- image_url = article['image_url']
471
- image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
472
-
473
- korean_summary = translate_to_korean(article['snippet'])
474
-
475
- outputs.extend([
476
- gr.update(visible=True),
477
- gr.update(value=f"### [{article['title']}]({article['link']})"),
478
- image_update,
479
- gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"),
480
- gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}")
481
- ])
482
- else:
483
- outputs.extend([
484
- gr.update(visible=False), gr.update(), gr.update(),
485
- gr.update(), gr.update()
486
- ])
487
- articles_state = articles
488
-
489
- progress(1.0, desc="완료!")
490
- outputs.append(articles_state)
491
- outputs[0] = gr.update(value="", visible=False)
492
-
493
- return outputs
494
-
495
- def search_global(query, articles_state_global):
496
- status_msg = "전세계 검색을 시작합니다..."
497
- all_results = []
498
-
499
- outputs = [
500
- gr.update(value=status_msg, visible=True),
501
- gr.update(value=f"**검색어:** {query}", visible=True),
502
- ]
503
-
504
- for _ in global_article_components:
505
- outputs.extend([
506
- gr.update(visible=False), gr.update(), gr.update(),
507
- gr.update(), gr.update()
508
- ])
509
- outputs.append([])
510
-
511
- yield outputs
512
-
513
- total_countries = len(COUNTRY_LOCATIONS)
514
- for idx, (country, location) in enumerate(COUNTRY_LOCATIONS.items(), 1):
515
- try:
516
- status_msg = f"{country} 검색 중... ({idx}/{total_countries} 국가)"
517
- outputs[0] = gr.update(value=status_msg, visible=True)
518
- yield outputs
519
-
520
- error_message, articles = serphouse_search(query, country)
521
- if not error_message and articles:
522
- for article in articles:
523
- article['source_country'] = country
524
-
525
- all_results.extend(articles)
526
- sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True)
527
-
528
- seen_urls = set()
529
- unique_results = []
530
- for article in sorted_results:
531
- url = article.get('link', '')
532
- if url not in seen_urls:
533
- seen_urls.add(url)
534
- unique_results.append(article)
535
-
536
- unique_results = unique_results[:1000]
537
-
538
- outputs = [
539
- gr.update(value=f"{idx}/{total_countries} 국가 검색 완료\n현재까지 발견된 뉴스: {len(unique_results)}건", visible=True),
540
- gr.update(value=f"**검색어:** {query}", visible=True),
541
- ]
542
-
543
- for idx, comp in enumerate(global_article_components):
544
- if idx < len(unique_results):
545
- article = unique_results[idx]
546
- image_url = article.get('image_url', '')
547
- image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
548
-
549
- korean_summary = translate_to_korean(article['snippet'])
550
-
551
- outputs.extend([
552
- gr.update(visible=True),
553
- gr.update(value=f"### [{article['title']}]({article['link']})"),
554
- image_update,
555
- gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"),
556
- gr.update(value=f"**출처:** {article['channel']} | **국가:** {article['source_country']} | **시간:** {article['time']}")
557
- ])
558
- else:
559
- outputs.extend([
560
- gr.update(visible=False), gr.update(), gr.update(),
561
- gr.update(), gr.update()
562
- ])
563
-
564
- outputs.append(unique_results)
565
- yield outputs
566
-
567
- except Exception as e:
568
- print(f"Error searching {country}: {str(e)}")
569
- continue
570
-
571
- final_status = f"검색 완료! 총 {len(unique_results)}개의 뉴스가 발견되었습니다."
572
- outputs[0] = gr.update(value=final_status, visible=True)
573
- yield outputs
574
-
575
- search_outputs = [
576
- status_message,
577
- translated_query_display,
578
- gr.Markdown(visible=False)
579
- ]
580
-
581
- for comp in article_components:
582
- search_outputs.extend([
583
- comp['group'], comp['title'], comp['image'],
584
- comp['snippet'], comp['info']
585
- ])
586
- search_outputs.append(articles_state)
587
-
588
- search_button.click(
589
- search_and_display,
590
- inputs=[query, country, articles_state],
591
- outputs=search_outputs,
592
- show_progress=True
593
- )
594
-
595
- global_search_outputs = [
596
- status_message_global,
597
- translated_query_display_global,
598
- ]
599
-
600
- for comp in global_article_components:
601
- global_search_outputs.extend([
602
- comp['group'], comp['title'], comp['image'],
603
- comp['snippet'], comp['info']
604
- ])
605
- global_search_outputs.append(articles_state_global)
606
-
607
- search_button_global.click(
608
- search_global,
609
- inputs=[query_global, articles_state_global],
610
- outputs=global_search_outputs
611
- )
612
-
613
- iface.launch(auth=("it1","chosun1"))