import gradio as gr import requests import json import os from datetime import datetime, timedelta from concurrent.futures import ThreadPoolExecutor from functools import lru_cache from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry from openai import OpenAI from bs4 import BeautifulSoup ACCESS_TOKEN = os.getenv("HF_TOKEN") if not ACCESS_TOKEN: raise ValueError("HF_TOKEN environment variable is not set") client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=ACCESS_TOKEN, ) MAX_COUNTRY_RESULTS = 100 # 국가별 최대 결과 수 MAX_GLOBAL_RESULTS = 1000 # 전세계 최대 결과 수 def create_article_components(max_results): article_components = [] for i in range(max_results): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) return article_components API_KEY = os.getenv("SERPHOUSE_API_KEY") # 국가별 언어 코드 매핑 COUNTRY_LANGUAGES = { "United States": "en", "United Kingdom": "en", "Taiwan": "zh-TW", "Canada": "en", "Australia": "en", "Germany": "de", "France": "fr", "Japan": "ja", "China": "zh", "India": "hi", "Brazil": "pt", "Mexico": "es", "Russia": "ru", "Italy": "it", "Spain": "es", "Netherlands": "nl", "Singapore": "en", "Hong Kong": "zh-HK", "Indonesia": "id", "Malaysia": "ms", "Philippines": "tl", "Thailand": "th", "Vietnam": "vi", "Belgium": "nl", "Denmark": "da", "Finland": "fi", "Ireland": "en", "Norway": "no", "Poland": "pl", "Sweden": "sv", "Switzerland": "de", "Austria": "de", "Czech Republic": "cs", "Greece": "el", "Hungary": "hu", "Portugal": "pt", "Romania": "ro", "Turkey": "tr", "Israel": "he", "Saudi Arabia": "ar", "United Arab Emirates": "ar", "South Africa": "en", "Argentina": "es", "Chile": "es", "Colombia": "es", "Peru": "es", "Venezuela": "es", "New Zealand": "en", "Bangladesh": "bn", "Pakistan": "ur", "Egypt": "ar", "Morocco": "ar", "Nigeria": "en", "Kenya": "sw", "Ukraine": "uk", "Croatia": "hr", "Slovakia": "sk", "Bulgaria": "bg", "Serbia": "sr", "Estonia": "et", "Latvia": "lv", "Lithuania": "lt", "Slovenia": "sl", "Luxembourg": "fr", "Malta": "mt", "Cyprus": "el", "Iceland": "is" } COUNTRY_LOCATIONS = { "United States": "United States", "United Kingdom": "United Kingdom", "Taiwan": "Taiwan", "Canada": "Canada", "Australia": "Australia", "Germany": "Germany", "France": "France", "Japan": "Japan", "China": "China", "India": "India", "Brazil": "Brazil", "Mexico": "Mexico", "Russia": "Russia", "Italy": "Italy", "Spain": "Spain", "Netherlands": "Netherlands", "Singapore": "Singapore", "Hong Kong": "Hong Kong", "Indonesia": "Indonesia", "Malaysia": "Malaysia", "Philippines": "Philippines", "Thailand": "Thailand", "Vietnam": "Vietnam", "Belgium": "Belgium", "Denmark": "Denmark", "Finland": "Finland", "Ireland": "Ireland", "Norway": "Norway", "Poland": "Poland", "Sweden": "Sweden", "Switzerland": "Switzerland", "Austria": "Austria", "Czech Republic": "Czech Republic", "Greece": "Greece", "Hungary": "Hungary", "Portugal": "Portugal", "Romania": "Romania", "Turkey": "Turkey", "Israel": "Israel", "Saudi Arabia": "Saudi Arabia", "United Arab Emirates": "United Arab Emirates", "South Africa": "South Africa", "Argentina": "Argentina", "Chile": "Chile", "Colombia": "Colombia", "Peru": "Peru", "Venezuela": "Venezuela", "New Zealand": "New Zealand", "Bangladesh": "Bangladesh", "Pakistan": "Pakistan", "Egypt": "Egypt", "Morocco": "Morocco", "Nigeria": "Nigeria", "Kenya": "Kenya", "Ukraine": "Ukraine", "Croatia": "Croatia", "Slovakia": "Slovakia", "Bulgaria": "Bulgaria", "Serbia": "Serbia", "Estonia": "Estonia", "Latvia": "Latvia", "Lithuania": "Lithuania", "Slovenia": "Slovenia", "Luxembourg": "Luxembourg", "Malta": "Malta", "Cyprus": "Cyprus", "Iceland": "Iceland" } # 지역 정의 # 동아시아 지역 COUNTRY_LANGUAGES_EAST_ASIA = { "Taiwan": "zh-TW", "Japan": "ja", "China": "zh", "Hong Kong": "zh-HK" } COUNTRY_LOCATIONS_EAST_ASIA = { "Taiwan": "Taiwan", "Japan": "Japan", "China": "China", "Hong Kong": "Hong Kong" } # 동남아시아/오세아니아 지역 COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA = { "Indonesia": "id", "Malaysia": "ms", "Philippines": "tl", "Thailand": "th", "Vietnam": "vi", "Singapore": "en", "Papua New Guinea": "en", "Australia": "en", "New Zealand": "en" } COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA = { "Indonesia": "Indonesia", "Malaysia": "Malaysia", "Philippines": "Philippines", "Thailand": "Thailand", "Vietnam": "Vietnam", "Singapore": "Singapore", "Papua New Guinea": "Papua New Guinea", "Australia": "Australia", "New Zealand": "New Zealand" } # 동유럽 지역 COUNTRY_LANGUAGES_EAST_EUROPE = { "Poland": "pl", "Czech Republic": "cs", "Greece": "el", "Hungary": "hu", "Romania": "ro", "Ukraine": "uk", "Croatia": "hr", "Slovakia": "sk", "Bulgaria": "bg", "Serbia": "sr", "Estonia": "et", "Latvia": "lv", "Lithuania": "lt", "Slovenia": "sl", "Malta": "mt", "Cyprus": "el", "Iceland": "is", "Russia": "ru" } COUNTRY_LOCATIONS_EAST_EUROPE = { "Poland": "Poland", "Czech Republic": "Czech Republic", "Greece": "Greece", "Hungary": "Hungary", "Romania": "Romania", "Ukraine": "Ukraine", "Croatia": "Croatia", "Slovakia": "Slovakia", "Bulgaria": "Bulgaria", "Serbia": "Serbia", "Estonia": "Estonia", "Latvia": "Latvia", "Lithuania": "Lithuania", "Slovenia": "Slovenia", "Malta": "Malta", "Cyprus": "Cyprus", "Iceland": "Iceland", "Russia": "Russia" } # 서유럽 지역 COUNTRY_LANGUAGES_WEST_EUROPE = { "Germany": "de", "France": "fr", "Italy": "it", "Spain": "es", "Netherlands": "nl", "Belgium": "nl", "Ireland": "en", "Sweden": "sv", "Switzerland": "de", "Austria": "de", "Portugal": "pt", "Luxembourg": "fr", "United Kingdom": "en" } COUNTRY_LOCATIONS_WEST_EUROPE = { "Germany": "Germany", "France": "France", "Italy": "Italy", "Spain": "Spain", "Netherlands": "Netherlands", "Belgium": "Belgium", "Ireland": "Ireland", "Sweden": "Sweden", "Switzerland": "Switzerland", "Austria": "Austria", "Portugal": "Portugal", "Luxembourg": "Luxembourg", "United Kingdom": "United Kingdom" } # 중동/아프리카 지역 COUNTRY_LANGUAGES_ARAB_AFRICA = { "South Africa": "en", "Nigeria": "en", "Kenya": "sw", "Egypt": "ar", "Morocco": "ar", "Saudi Arabia": "ar", "United Arab Emirates": "ar", "Israel": "he" } COUNTRY_LOCATIONS_ARAB_AFRICA = { "South Africa": "South Africa", "Nigeria": "Nigeria", "Kenya": "Kenya", "Egypt": "Egypt", "Morocco": "Morocco", "Saudi Arabia": "Saudi Arabia", "United Arab Emirates": "United Arab Emirates", "Israel": "Israel" } # 아메리카 지역 COUNTRY_LANGUAGES_AMERICA = { "United States": "en", "Canada": "en", "Mexico": "es", "Brazil": "pt", "Argentina": "es", "Chile": "es", "Colombia": "es", "Peru": "es", "Venezuela": "es" } COUNTRY_LOCATIONS_AMERICA = { "United States": "United States", "Canada": "Canada", "Mexico": "Mexico", "Brazil": "Brazil", "Argentina": "Argentina", "Chile": "Chile", "Colombia": "Colombia", "Peru": "Peru", "Venezuela": "Venezuela" } # 지역 선택 리스트 REGIONS = [ "동아시아", "동남아시아/오세아니아", "동유럽", "서유럽", "중동/아프리카", "아메리카" ] @lru_cache(maxsize=100) def translate_query(query, country): try: if is_english(query): return query if country in COUNTRY_LANGUAGES: if country == "South Korea": return query target_lang = COUNTRY_LANGUAGES[country] url = "https://translate.googleapis.com/translate_a/single" params = { "client": "gtx", "sl": "auto", "tl": target_lang, "dt": "t", "q": query } session = requests.Session() retries = Retry(total=3, backoff_factor=0.5) session.mount('https://', HTTPAdapter(max_retries=retries)) response = session.get(url, params=params, timeout=(5, 10)) translated_text = response.json()[0][0][0] return translated_text return query except Exception as e: print(f"번역 오류: {str(e)}") return query @lru_cache(maxsize=200) def translate_to_korean(text): try: url = "https://translate.googleapis.com/translate_a/single" params = { "client": "gtx", "sl": "auto", "tl": "ko", "dt": "t", "q": text } session = requests.Session() retries = Retry(total=3, backoff_factor=0.5) session.mount('https://', HTTPAdapter(max_retries=retries)) response = session.get(url, params=params, timeout=(5, 10)) translated_text = response.json()[0][0][0] return translated_text except Exception as e: print(f"한글 번역 오류: {str(e)}") return text def is_english(text): return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', '')) def is_korean(text): return any('\uAC00' <= char <= '\uD7A3' for char in text) def search_serphouse(query, country, page=1, num_result=10): url = "https://api.serphouse.com/serp/live" now = datetime.utcnow() yesterday = now - timedelta(days=1) date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" translated_query = translate_query(query, country) payload = { "data": { "q": translated_query, "domain": "google.com", "loc": COUNTRY_LOCATIONS.get(country, "United States"), "lang": COUNTRY_LANGUAGES.get(country, "en"), "device": "desktop", "serp_type": "news", "page": "1", "num": "100", "date_range": date_range, "sort_by": "date" } } headers = { "accept": "application/json", "content-type": "application/json", "authorization": f"Bearer {API_KEY}" } try: # 세션 설정 개선 session = requests.Session() # 재시도 설정 강화 retries = Retry( total=5, # 최대 재시도 횟수 증가 backoff_factor=1, # 재시도 간격 증가 status_forcelist=[500, 502, 503, 504, 429], # 재시도할 HTTP 상태 코드 allowed_methods=["POST"] # POST 요청에 대한 재시도 허용 ) # 타임아웃 설정 조정 adapter = HTTPAdapter(max_retries=retries) session.mount('http://', adapter) session.mount('https://', adapter) # 타임아웃 값 증가 (connect timeout, read timeout) response = session.post( url, json=payload, headers=headers, timeout=(30, 30) # 연결 타임아웃 30초, 읽기 타임아웃 30초 ) response.raise_for_status() return {"results": response.json(), "translated_query": translated_query} except requests.exceptions.Timeout: return { "error": "검색 시간이 초과되었습니다. 잠시 후 다시 시도해주세요.", "translated_query": query } except requests.exceptions.RequestException as e: return { "error": f"검색 중 오류가 발생했습니다: {str(e)}", "translated_query": query } except Exception as e: return { "error": f"예기치 않은 오류가 발생했습니다: {str(e)}", "translated_query": query } def format_results_from_raw(response_data): if "error" in response_data: return "Error: " + response_data["error"], [] try: results = response_data["results"] translated_query = response_data["translated_query"] news_results = results.get('results', {}).get('results', {}).get('news', []) if not news_results: return "검색 결과가 없습니다.", [] # 한국 도메인 및 한국 관련 키워드 필터링 korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun', 'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'] korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu', 'gwangju', 'daejeon', 'ulsan', 'sejong'] filtered_articles = [] for idx, result in enumerate(news_results, 1): url = result.get("url", result.get("link", "")).lower() title = result.get("title", "").lower() channel = result.get("channel", result.get("source", "")).lower() # 한국 관련 컨텐츠 필터링 is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \ any(keyword in title.lower() for keyword in korean_keywords) if not is_korean_content: filtered_articles.append({ "index": idx, "title": result.get("title", "제목 없음"), "link": url, "snippet": result.get("snippet", "내용 없음"), "channel": result.get("channel", result.get("source", "알 수 없음")), "time": result.get("time", result.get("date", "알 수 없는 시간")), "image_url": result.get("img", result.get("thumbnail", "")), "translated_query": translated_query }) return "", filtered_articles except Exception as e: return f"결과 처리 중 오류 발생: {str(e)}", [] def serphouse_search(query, country): response_data = search_serphouse(query, country) return format_results_from_raw(response_data) def search_and_display(query, country, articles_state, progress=gr.Progress()): with ThreadPoolExecutor(max_workers=3) as executor: progress(0, desc="검색어 번역 중...") future_translation = executor.submit(translate_query, query, country) translated_query = future_translation.result() translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}" progress(0.3, desc="검색 중...") response_data = search_serphouse(query, country) progress(0.6, desc="결과 처리 중...") error_message, articles = format_results_from_raw(response_data) outputs = [] outputs.append(gr.update(value="검색을 진행중입니다...", visible=True)) outputs.append(gr.update(value=translated_display, visible=True)) if error_message: outputs.append(gr.update(value=error_message, visible=True)) for comp in article_components: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = [] else: outputs.append(gr.update(value="", visible=False)) if not error_message and articles: futures = [] for article in articles: future = executor.submit(translate_to_korean, article['snippet']) futures.append((article, future)) progress(0.8, desc="번역 처리 중...") for article, future in futures: article['korean_summary'] = future.result() total_articles = len(articles) for idx, comp in enumerate(article_components): progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}") if idx < len(articles): article = articles[idx] image_url = article['image_url'] image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) outputs.extend([ gr.update(visible=True), gr.update(value=f"### [{article['title']}]({article['link']})"), image_update, gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {article['korean_summary']}"), gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}") ]) else: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = articles progress(1.0, desc="완료!") outputs.append(articles_state) outputs[0] = gr.update(value="", visible=False) return outputs def get_region_countries(region): """선택된 지역의 국가 및 언어 정보 반환""" if region == "동아시아": return COUNTRY_LOCATIONS_EAST_ASIA, COUNTRY_LANGUAGES_EAST_ASIA elif region == "동남아시아/오세아니아": return COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA, COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA elif region == "동유럽": return COUNTRY_LOCATIONS_EAST_EUROPE, COUNTRY_LANGUAGES_EAST_EUROPE elif region == "서유럽": return COUNTRY_LOCATIONS_WEST_EUROPE, COUNTRY_LANGUAGES_WEST_EUROPE elif region == "중동/아프리카": return COUNTRY_LOCATIONS_ARAB_AFRICA, COUNTRY_LANGUAGES_ARAB_AFRICA elif region == "아메리카": return COUNTRY_LOCATIONS_AMERICA, COUNTRY_LANGUAGES_AMERICA return {}, {} def search_global(query, region, articles_state_global): """지역별 검색 함수""" status_msg = f"{region} 지역 검색을 시작합니다..." all_results = [] outputs = [ gr.update(value=status_msg, visible=True), gr.update(value=f"**검색어:** {query}", visible=True), ] for _ in global_article_components: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) outputs.append([]) yield outputs # 선택된 지역의 국가 정보 가져오기 locations, languages = get_region_countries(region) total_countries = len(locations) for idx, (country, location) in enumerate(locations.items(), 1): try: status_msg = f"{region} - {country} 검색 중... ({idx}/{total_countries} 국가)" outputs[0] = gr.update(value=status_msg, visible=True) yield outputs error_message, articles = serphouse_search(query, country) if not error_message and articles: for article in articles: article['source_country'] = country article['region'] = region all_results.extend(articles) sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True) seen_urls = set() unique_results = [] for article in sorted_results: url = article.get('link', '') if url not in seen_urls: seen_urls.add(url) unique_results.append(article) unique_results = unique_results[:MAX_GLOBAL_RESULTS] outputs = [ gr.update(value=f"{region} - {idx}/{total_countries} 국가 검색 완료\n현재까지 발견된 뉴스: {len(unique_results)}건", visible=True), gr.update(value=f"**검색어:** {query} | **지역:** {region}", visible=True), ] for idx, comp in enumerate(global_article_components): if idx < len(unique_results): article = unique_results[idx] image_url = article.get('image_url', '') image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) korean_summary = translate_to_korean(article['snippet']) outputs.extend([ gr.update(visible=True), gr.update(value=f"### [{article['title']}]({article['link']})"), image_update, gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"), gr.update(value=f"**출처:** {article['channel']} | **국가:** {article['source_country']} | **지역:** {article['region']} | **시간:** {article['time']}") ]) else: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) outputs.append(unique_results) yield outputs except Exception as e: print(f"Error searching {country}: {str(e)}") continue final_status = f"{region} 검색 완료! 총 {len(unique_results)}개의 뉴스가 발견되었습니다." outputs[0] = gr.update(value=final_status, visible=True) yield outputs css = """ /* 전역 스타일 */ footer {visibility: hidden;} /* 레이아웃 컨테이너 */ #status_area { background: rgba(255, 255, 255, 0.9); padding: 15px; border-bottom: 1px solid #ddd; margin-bottom: 20px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); } #results_area { padding: 10px; margin-top: 10px; } /* 탭 스타일 */ .tabs { border-bottom: 2px solid #ddd !important; margin-bottom: 20px !important; } .tab-nav { border-bottom: none !important; margin-bottom: 0 !important; } .tab-nav button { font-weight: bold !important; padding: 10px 20px !important; } .tab-nav button.selected { border-bottom: 2px solid #1f77b4 !important; color: #1f77b4 !important; } /* 상태 메시지 */ #status_area .markdown-text { font-size: 1.1em; color: #2c3e50; padding: 10px 0; } /* 기본 컨테이너 */ .group { border: 1px solid #eee; padding: 15px; margin-bottom: 15px; border-radius: 5px; background: white; } /* 버튼 스타일 */ .primary-btn { background: #1f77b4 !important; border: none !important; } /* 입력 필드 */ .textbox { border: 1px solid #ddd !important; border-radius: 4px !important; } /* 프로그레스바 컨테이너 */ .progress-container { position: fixed; top: 0; left: 0; width: 100%; height: 6px; background: #e0e0e0; z-index: 1000; } /* 프로그레스바 */ .progress-bar { height: 100%; background: linear-gradient(90deg, #2196F3, #00BCD4); box-shadow: 0 0 10px rgba(33, 150, 243, 0.5); transition: width 0.3s ease; animation: progress-glow 1.5s ease-in-out infinite; } /* 프로그레스 텍스트 */ .progress-text { position: fixed; top: 8px; left: 50%; transform: translateX(-50%); background: #333; color: white; padding: 4px 12px; border-radius: 15px; font-size: 14px; z-index: 1001; box-shadow: 0 2px 5px rgba(0,0,0,0.2); } /* 프로그레스바 애니메이션 */ @keyframes progress-glow { 0% { box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); } 50% { box-shadow: 0 0 20px rgba(33, 150, 243, 0.8); } 100% { box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); } } /* 반응형 디자인 */ @media (max-width: 768px) { .group { padding: 10px; margin-bottom: 15px; } .progress-text { font-size: 12px; padding: 3px 10px; } } /* 로딩 상태 표시 개선 */ .loading { opacity: 0.7; pointer-events: none; transition: opacity 0.3s ease; } /* 결과 컨테이너 애니메이션 */ .group { transition: all 0.3s ease; opacity: 0; transform: translateY(20px); } .group.visible { opacity: 1; transform: translateY(0); } /* Examples 스타일링 */ .examples-table { margin-top: 10px !important; margin-bottom: 20px !important; } .examples-table button { background-color: #f0f0f0 !important; border: 1px solid #ddd !important; border-radius: 4px !important; padding: 5px 10px !important; margin: 2px !important; transition: all 0.3s ease !important; } .examples-table button:hover { background-color: #e0e0e0 !important; transform: translateY(-1px) !important; box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important; } .examples-table .label { font-weight: bold !important; color: #444 !important; margin-bottom: 5px !important; } """ def get_article_content(url): try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.content, 'html.parser') # 일반적인 기사 본문 컨테이너 검색 article_body = None possible_content_elements = [ soup.find('article'), soup.find('div', class_='article-body'), soup.find('div', class_='content'), soup.find('div', {'id': 'article-body'}) ] for element in possible_content_elements: if element: article_body = element break if article_body: # 불필요한 요소 제거 for tag in article_body.find_all(['script', 'style', 'nav', 'header', 'footer']): tag.decompose() content = ' '.join([p.get_text().strip() for p in article_body.find_all('p') if p.get_text().strip()]) else: content = ' '.join([p.get_text().strip() for p in soup.find_all('p') if p.get_text().strip()]) return content except Exception as e: return f"Error crawling content: {str(e)}" def respond( url, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): if not url.startswith('http'): history.append((url, "올바른 URL을 입력해주세요.")) return history try: # 기사 내용 추출 article_content = get_article_content(url) # 2단계 프로세스를 위한 프롬프트 구성 translation_prompt = f"""다음 작업을 순차적으로 수행하세요: 1단계: 번역 아래 영문 기사를 한국어로 정확하게 번역하세요. 구분선: ===번역 시작=== {article_content} 구분선: ===번역 끝=== 2단계: 기사 작성 위의 번역된 내용을 바탕으로 새로운 한국어 기사를 작성하세요. 다음 형식을 반드시 준수하세요: - 제목: [헤드라인] - 부제: [서브헤드라인] - 본문: [기사 내용] - 작성 규칙: * 문장은 '다.'로 끝나야 함 * 신문 기사 형식 준수 * 단락 구분을 명확히 할 것 * 핵심 정보를 앞부분에 배치 * 인용구는 따옴표로 처리 각 단계는 '===번역===', '===기사==='로 구분하여 출력하세요. """ messages = [ { "role": "system", "content": """당신은 전문 번역가이자 기자입니다. 모든 작업은 반드시 다음 두 단계로 진행하고, 각 단계를 명확히 구분하여 출력해야 합니다: 1. 원문 번역: ===번역=== 표시 후 정확한 한국어 번역 제공 2. 기사 작성: ===기사=== 표시 후 번역본을 기반으로 한국어 뉴스 기사 작성 두 단계를 건너뛰거나 통합하지 말고 반드시 순차적으로 진행하세요.""" }, {"role": "user", "content": translation_prompt} ] history.append((url, "번역 및 기사 작성을 시작합니다...")) full_response = "" current_section = "" for message in client.chat.completions.create( model="CohereForAI/c4ai-command-r-plus-08-2024", max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, messages=messages, ): if hasattr(message.choices[0].delta, 'content'): token = message.choices[0].delta.content if token: full_response += token # 섹션 구분자 확인 및 포맷팅 if "===번역===" in token or "===기사===" in token: current_section = token.strip() full_response += "\n\n" history[-1] = (url, full_response) yield history except Exception as e: error_message = f"처리 중 오류가 발생했습니다: {str(e)}" history.append((url, error_message)) yield history return history with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface: with gr.Tabs(): # 국가별 탭 with gr.Tab("국가별"): gr.Markdown("검색어를 입력하고 원하는 국가(한국 제외)를를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.") gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색") with gr.Column(): with gr.Row(): query = gr.Textbox(label="검색어") country = gr.Dropdown( choices=sorted(list(COUNTRY_LOCATIONS.keys())), label="국가", value="United States" ) # Examples 추가 gr.Examples( examples=[ "artificial intelligence", "NVIDIA", "OPENAI", "META LLAMA", "black forest labs", "GOOGLE gemini", "anthropic Claude", "X.AI", "HUGGINGFACE", "HYNIX", "Large Language model", "CHATGPT", "StabilityAI", "MISTRALAI", "QWEN", "MIDJOURNEY", "GPU" ], inputs=query, label="자주 사용되는 검색어" ) status_message = gr.Markdown("", visible=True) translated_query_display = gr.Markdown(visible=False) search_button = gr.Button("검색", variant="primary") progress = gr.Progress() articles_state = gr.State([]) article_components = [] for i in range(100): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) # 전세계 탭 with gr.Tab("전세계"): gr.Markdown("대륙별로 24시간 이내 뉴스를 검색합니다.") with gr.Column(): with gr.Column(elem_id="status_area"): with gr.Row(): query_global = gr.Textbox(label="검색어") region_select = gr.Dropdown( choices=REGIONS, label="지역 선택", value="동아시아" ) search_button_global = gr.Button("검색", variant="primary") status_message_global = gr.Markdown("") translated_query_display_global = gr.Markdown("") with gr.Column(elem_id="results_area"): articles_state_global = gr.State([]) global_article_components = [] for i in range(MAX_GLOBAL_RESULTS): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() global_article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) # AI 번역 탭 추가 with gr.Tab("AI 기사 생성"): gr.Markdown("뉴스 URL을 입력하면 AI가 한국어로 번역하여 기사 형식으로 작성합니다.") with gr.Column(): chatbot = gr.Chatbot(height=600) with gr.Row(): url_input = gr.Textbox( label="뉴스 URL", placeholder="https://..." ) with gr.Accordion("고급 설정", open=False): system_message = gr.Textbox( value="""You are a professional translator and journalist. Follow these steps strictly: 1. TRANSLATION - Start with ===번역=== marker - Provide accurate Korean translation - Maintain original meaning and context 2. ARTICLE WRITING - Start with ===기사=== marker - Write a new Korean news article based on the translation - Follow newspaper article format - Use formal news writing style - End sentences with '다.' - Include headline and subheadline - Organize paragraphs clearly - Put key information first - Use quotes appropriately IMPORTANT: - Must complete both steps in order - Clearly separate each section with markers - Never skip or combine steps""", label="System message" ) max_tokens = gr.Slider( minimum=1, maximum=7800, value=7624, step=1, label="Max new tokens" ) temperature = gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P" ) translate_button = gr.Button("기사 생성", variant="primary") # 이벤트 연결 translate_button.click( fn=respond, inputs=[ url_input, chatbot, system_message, max_tokens, temperature, top_p, ], outputs=chatbot ) # 이벤트 연결 부분 # 국가별 탭 이벤트 search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)] for comp in article_components: search_outputs.extend([ comp['group'], comp['title'], comp['image'], comp['snippet'], comp['info'] ]) search_outputs.append(articles_state) search_button.click( fn=search_and_display, inputs=[query, country, articles_state], outputs=search_outputs, show_progress=True ) # 전세계 탭 이벤트 global_search_outputs = [status_message_global, translated_query_display_global] for comp in global_article_components: global_search_outputs.extend([ comp['group'], comp['title'], comp['image'], comp['snippet'], comp['info'] ]) global_search_outputs.append(articles_state_global) search_button_global.click( fn=search_global, inputs=[query_global, region_select, articles_state_global], outputs=global_search_outputs, show_progress=True ) iface.launch( server_name="0.0.0.0", server_port=7860, share=True, auth=("ai","news"), ssl_verify=False, show_error=True )