import gradio as gr import requests import json import os from datetime import datetime, timedelta from concurrent.futures import ThreadPoolExecutor from functools import lru_cache from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry from openai import OpenAI from bs4 import BeautifulSoup import re import pathlib import sqlite3 import pytz # 한국 기업 리스트 KOREAN_COMPANIES = [ "SAMSUNG", "HYNIX", "HYUNDAI", "KIA", "LG", "HANWHA", "KB", "SKT", "KT", "HANA", "SK", "POSCO", "DOOSAN", "WOORI", "KAKAO", "Celltrion" ] def convert_to_seoul_time(timestamp_str): try: utc_time = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S') utc_time = pytz.utc.localize(utc_time) seoul_tz = pytz.timezone('Asia/Seoul') seoul_time = utc_time.astimezone(seoul_tz) return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST') except: return timestamp_str def analyze_sentiment_batch(articles, client): try: # 모든 기사의 제목과 내용을 하나의 텍스트로 결합 combined_text = "\n\n".join([ f"제목: {article.get('title', '')}\n내용: {article.get('snippet', '')}" for article in articles ]) prompt = f"""다음 뉴스 모음에 대해 전반적인 감성 분석을 수행하세요: 뉴스 내용: {combined_text} 다음 형식으로 분석해주세요: 1. 전반적 감성: [긍정/부정/중립] 2. 주요 긍정적 요소: - [항목1] - [항목2] 3. 주요 부정적 요소: - [항목1] - [항목2] 4. 종합 평가: [상세 설명] """ response = client.chat.completions.create( model="CohereForAI/c4ai-command-r-plus-08-2024", messages=[{"role": "user", "content": prompt}], temperature=0.3, max_tokens=1000 ) return response.choices[0].message.content except Exception as e: return f"감성 분석 실패: {str(e)}" # DB 초기화 함수 def init_db(): db_path = pathlib.Path("search_results.db") conn = sqlite3.connect(db_path) c = conn.cursor() c.execute('''CREATE TABLE IF NOT EXISTS searches (id INTEGER PRIMARY KEY AUTOINCREMENT, keyword TEXT, country TEXT, results TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''') conn.commit() conn.close() # 검색 결과 저장 함수 def save_to_db(keyword, country, results): conn = sqlite3.connect("search_results.db") c = conn.cursor() # 서울 시간으로 변환 seoul_tz = pytz.timezone('Asia/Seoul') seoul_time = datetime.now(seoul_tz) c.execute("""INSERT INTO searches (keyword, country, results, timestamp) VALUES (?, ?, ?, ?)""", (keyword, country, json.dumps(results), seoul_time.strftime('%Y-%m-%d %H:%M:%S'))) conn.commit() conn.close() # DB에서 검색 결과 불러오기 함수 def load_from_db(keyword, country): conn = sqlite3.connect("search_results.db") c = conn.cursor() c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1", (keyword, country)) result = c.fetchone() conn.close() if result: return json.loads(result[0]), convert_to_seoul_time(result[1]) return None, None # 결과 표시 함수 def display_results(articles): output = "" for idx, article in enumerate(articles, 1): output += f"### {idx}. {article['title']}\n" output += f"출처: {article['channel']}\n" output += f"시간: {article['time']}\n" output += f"링크: {article['link']}\n" output += f"요약: {article['snippet']}\n\n" return output # 기업 검색 함수 def search_company(company): error_message, articles = serphouse_search(company, "United States") if not error_message and articles: save_to_db(company, "United States", articles) return display_results(articles) return f"{company}에 대한 검색 결과가 없습니다." # 기업 결과 불러오기 함수 def load_company(company): results, timestamp = load_from_db(company, "United States") if results: return f"### {company} 검색 결과\n저장 시간: {timestamp}\n\n" + display_results(results) return f"{company}에 대한 저장된 결과가 없습니다." # 통계 분석 함수 def show_stats(): conn = sqlite3.connect("search_results.db") c = conn.cursor() output = "## 한국 기업 뉴스 분석 리포트\n\n" for company in KOREAN_COMPANIES: c.execute(""" SELECT results, timestamp FROM searches WHERE keyword = ? ORDER BY timestamp DESC LIMIT 1 """, (company,)) result = c.fetchone() if result: results_json, timestamp = result articles = json.loads(results_json) seoul_time = convert_to_seoul_time(timestamp) output += f"### {company}\n" output += f"- 마지막 업데이트: {seoul_time}\n" output += f"- 저장된 기사 수: {len(articles)}건\n\n" if articles: # 전체 기사에 대한 감성 분석 sentiment_analysis = analyze_sentiment_batch(articles, client) output += "#### 뉴스 감성 분석\n" output += f"{sentiment_analysis}\n\n" output += "---\n\n" conn.close() return output ACCESS_TOKEN = os.getenv("HF_TOKEN") if not ACCESS_TOKEN: raise ValueError("HF_TOKEN environment variable is not set") client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=ACCESS_TOKEN, ) MAX_COUNTRY_RESULTS = 100 # 국가별 최대 결과 수 MAX_GLOBAL_RESULTS = 1000 # 전세계 최대 결과 수 def create_article_components(max_results): article_components = [] for i in range(max_results): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) return article_components API_KEY = os.getenv("SERPHOUSE_API_KEY") # 국가별 언어 코드 매핑 COUNTRY_LANGUAGES = { "United States": "en", "KOREA": "ko", "United Kingdom": "en", "Taiwan": "zh-TW", "Canada": "en", "Australia": "en", "Germany": "de", "France": "fr", "Japan": "ja", "China": "zh", "India": "hi", "Brazil": "pt", "Mexico": "es", "Russia": "ru", "Italy": "it", "Spain": "es", "Netherlands": "nl", "Singapore": "en", "Hong Kong": "zh-HK", "Indonesia": "id", "Malaysia": "ms", "Philippines": "tl", "Thailand": "th", "Vietnam": "vi", "Belgium": "nl", "Denmark": "da", "Finland": "fi", "Ireland": "en", "Norway": "no", "Poland": "pl", "Sweden": "sv", "Switzerland": "de", "Austria": "de", "Czech Republic": "cs", "Greece": "el", "Hungary": "hu", "Portugal": "pt", "Romania": "ro", "Turkey": "tr", "Israel": "he", "Saudi Arabia": "ar", "United Arab Emirates": "ar", "South Africa": "en", "Argentina": "es", "Chile": "es", "Colombia": "es", "Peru": "es", "Venezuela": "es", "New Zealand": "en", "Bangladesh": "bn", "Pakistan": "ur", "Egypt": "ar", "Morocco": "ar", "Nigeria": "en", "Kenya": "sw", "Ukraine": "uk", "Croatia": "hr", "Slovakia": "sk", "Bulgaria": "bg", "Serbia": "sr", "Estonia": "et", "Latvia": "lv", "Lithuania": "lt", "Slovenia": "sl", "Luxembourg": "fr", "Malta": "mt", "Cyprus": "el", "Iceland": "is" } COUNTRY_LOCATIONS = { "United States": "United States", "KOREA": "kr", "United Kingdom": "United Kingdom", "Taiwan": "Taiwan", "Canada": "Canada", "Australia": "Australia", "Germany": "Germany", "France": "France", "Japan": "Japan", "China": "China", "India": "India", "Brazil": "Brazil", "Mexico": "Mexico", "Russia": "Russia", "Italy": "Italy", "Spain": "Spain", "Netherlands": "Netherlands", "Singapore": "Singapore", "Hong Kong": "Hong Kong", "Indonesia": "Indonesia", "Malaysia": "Malaysia", "Philippines": "Philippines", "Thailand": "Thailand", "Vietnam": "Vietnam", "Belgium": "Belgium", "Denmark": "Denmark", "Finland": "Finland", "Ireland": "Ireland", "Norway": "Norway", "Poland": "Poland", "Sweden": "Sweden", "Switzerland": "Switzerland", "Austria": "Austria", "Czech Republic": "Czech Republic", "Greece": "Greece", "Hungary": "Hungary", "Portugal": "Portugal", "Romania": "Romania", "Turkey": "Turkey", "Israel": "Israel", "Saudi Arabia": "Saudi Arabia", "United Arab Emirates": "United Arab Emirates", "South Africa": "South Africa", "Argentina": "Argentina", "Chile": "Chile", "Colombia": "Colombia", "Peru": "Peru", "Venezuela": "Venezuela", "New Zealand": "New Zealand", "Bangladesh": "Bangladesh", "Pakistan": "Pakistan", "Egypt": "Egypt", "Morocco": "Morocco", "Nigeria": "Nigeria", "Kenya": "Kenya", "Ukraine": "Ukraine", "Croatia": "Croatia", "Slovakia": "Slovakia", "Bulgaria": "Bulgaria", "Serbia": "Serbia", "Estonia": "Estonia", "Latvia": "Latvia", "Lithuania": "Lithuania", "Slovenia": "Slovenia", "Luxembourg": "Luxembourg", "Malta": "Malta", "Cyprus": "Cyprus", "Iceland": "Iceland" } # 지역 정의 # 동아시아 지역 COUNTRY_LANGUAGES_EAST_ASIA = { "KOREA": "ko", "Taiwan": "zh-TW", "Japan": "ja", "China": "zh", "Hong Kong": "zh-HK" } COUNTRY_LOCATIONS_EAST_ASIA = { "KOREA": "KOREA", "Taiwan": "Taiwan", "Japan": "Japan", "China": "China", "Hong Kong": "Hong Kong" } # 동남아시아/오세아니아 지역 COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA = { "Indonesia": "id", "Malaysia": "ms", "Philippines": "tl", "Thailand": "th", "Vietnam": "vi", "Singapore": "en", "Papua New Guinea": "en", "Australia": "en", "New Zealand": "en" } COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA = { "Indonesia": "Indonesia", "Malaysia": "Malaysia", "Philippines": "Philippines", "Thailand": "Thailand", "Vietnam": "Vietnam", "Singapore": "Singapore", "Papua New Guinea": "Papua New Guinea", "Australia": "Australia", "New Zealand": "New Zealand" } # 동유럽 지역 COUNTRY_LANGUAGES_EAST_EUROPE = { "Poland": "pl", "Czech Republic": "cs", "Greece": "el", "Hungary": "hu", "Romania": "ro", "Ukraine": "uk", "Croatia": "hr", "Slovakia": "sk", "Bulgaria": "bg", "Serbia": "sr", "Estonia": "et", "Latvia": "lv", "Lithuania": "lt", "Slovenia": "sl", "Malta": "mt", "Cyprus": "el", "Iceland": "is", "Russia": "ru" } COUNTRY_LOCATIONS_EAST_EUROPE = { "Poland": "Poland", "Czech Republic": "Czech Republic", "Greece": "Greece", "Hungary": "Hungary", "Romania": "Romania", "Ukraine": "Ukraine", "Croatia": "Croatia", "Slovakia": "Slovakia", "Bulgaria": "Bulgaria", "Serbia": "Serbia", "Estonia": "Estonia", "Latvia": "Latvia", "Lithuania": "Lithuania", "Slovenia": "Slovenia", "Malta": "Malta", "Cyprus": "Cyprus", "Iceland": "Iceland", "Russia": "Russia" } # 서유럽 지역 COUNTRY_LANGUAGES_WEST_EUROPE = { "Germany": "de", "France": "fr", "Italy": "it", "Spain": "es", "Netherlands": "nl", "Belgium": "nl", "Ireland": "en", "Sweden": "sv", "Switzerland": "de", "Austria": "de", "Portugal": "pt", "Luxembourg": "fr", "United Kingdom": "en" } COUNTRY_LOCATIONS_WEST_EUROPE = { "Germany": "Germany", "France": "France", "Italy": "Italy", "Spain": "Spain", "Netherlands": "Netherlands", "Belgium": "Belgium", "Ireland": "Ireland", "Sweden": "Sweden", "Switzerland": "Switzerland", "Austria": "Austria", "Portugal": "Portugal", "Luxembourg": "Luxembourg", "United Kingdom": "United Kingdom" } # 중동/아프리카 지역 COUNTRY_LANGUAGES_ARAB_AFRICA = { "South Africa": "en", "Nigeria": "en", "Kenya": "sw", "Egypt": "ar", "Morocco": "ar", "Saudi Arabia": "ar", "United Arab Emirates": "ar", "Israel": "he" } COUNTRY_LOCATIONS_ARAB_AFRICA = { "South Africa": "South Africa", "Nigeria": "Nigeria", "Kenya": "Kenya", "Egypt": "Egypt", "Morocco": "Morocco", "Saudi Arabia": "Saudi Arabia", "United Arab Emirates": "United Arab Emirates", "Israel": "Israel" } # 아메리카 지역 COUNTRY_LANGUAGES_AMERICA = { "United States": "en", "Canada": "en", "Mexico": "es", "Brazil": "pt", "Argentina": "es", "Chile": "es", "Colombia": "es", "Peru": "es", "Venezuela": "es" } COUNTRY_LOCATIONS_AMERICA = { "United States": "United States", "Canada": "Canada", "Mexico": "Mexico", "Brazil": "Brazil", "Argentina": "Argentina", "Chile": "Chile", "Colombia": "Colombia", "Peru": "Peru", "Venezuela": "Venezuela" } # 지역 선택 리스트 REGIONS = [ "동아시아", "동남아시아/오세아니아", "동유럽", "서유럽", "중동/아프리카", "아메리카" ] @lru_cache(maxsize=100) def translate_query(query, country): try: if is_english(query): return query if country in COUNTRY_LANGUAGES: if country == "South Korea": return query target_lang = COUNTRY_LANGUAGES[country] url = "https://translate.googleapis.com/translate_a/single" params = { "client": "gtx", "sl": "auto", "tl": target_lang, "dt": "t", "q": query } session = requests.Session() retries = Retry(total=3, backoff_factor=0.5) session.mount('https://', HTTPAdapter(max_retries=retries)) response = session.get(url, params=params, timeout=(5, 10)) translated_text = response.json()[0][0][0] return translated_text return query except Exception as e: print(f"번역 오류: {str(e)}") return query @lru_cache(maxsize=200) def translate_to_korean(text): try: url = "https://translate.googleapis.com/translate_a/single" params = { "client": "gtx", "sl": "auto", "tl": "ko", "dt": "t", "q": text } session = requests.Session() retries = Retry(total=3, backoff_factor=0.5) session.mount('https://', HTTPAdapter(max_retries=retries)) response = session.get(url, params=params, timeout=(5, 10)) translated_text = response.json()[0][0][0] return translated_text except Exception as e: print(f"한글 번역 오류: {str(e)}") return text def is_english(text): return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', '')) def is_korean(text): return any('\uAC00' <= char <= '\uD7A3' for char in text) def search_serphouse(query, country, page=1, num_result=10): url = "https://api.serphouse.com/serp/live" now = datetime.utcnow() yesterday = now - timedelta(days=1) date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" translated_query = translate_query(query, country) payload = { "data": { "q": translated_query, "domain": "google.com", "loc": COUNTRY_LOCATIONS.get(country, "United States"), "lang": COUNTRY_LANGUAGES.get(country, "en"), "device": "desktop", "serp_type": "news", "page": "1", "num": "100", "date_range": date_range, "sort_by": "date" } } headers = { "accept": "application/json", "content-type": "application/json", "authorization": f"Bearer {API_KEY}" } try: # 세션 설정 개선 session = requests.Session() # 재시도 설정 강화 retries = Retry( total=5, # 최대 재시도 횟수 증가 backoff_factor=1, # 재시도 간격 증가 status_forcelist=[500, 502, 503, 504, 429], # 재시도할 HTTP 상태 코드 allowed_methods=["POST"] # POST 요청에 대한 재시도 허용 ) # 타임아웃 설정 조정 adapter = HTTPAdapter(max_retries=retries) session.mount('http://', adapter) session.mount('https://', adapter) # 타임아웃 값 증가 (connect timeout, read timeout) response = session.post( url, json=payload, headers=headers, timeout=(30, 30) # 연결 타임아웃 30초, 읽기 타임아웃 30초 ) response.raise_for_status() return {"results": response.json(), "translated_query": translated_query} except requests.exceptions.Timeout: return { "error": "검색 시간이 초과되었습니다. 잠시 후 다시 시도해주세요.", "translated_query": query } except requests.exceptions.RequestException as e: return { "error": f"검색 중 오류가 발생했습니다: {str(e)}", "translated_query": query } except Exception as e: return { "error": f"예기치 않은 오류가 발생했습니다: {str(e)}", "translated_query": query } def format_results_from_raw(response_data): if "error" in response_data: return "Error: " + response_data["error"], [] try: results = response_data["results"] translated_query = response_data["translated_query"] news_results = results.get('results', {}).get('results', {}).get('news', []) if not news_results: return "검색 결과가 없습니다.", [] # 한국 도메인 및 한국 관련 키워드 필터링 korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun', 'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'] korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu', 'gwangju', 'daejeon', 'ulsan', 'sejong'] filtered_articles = [] for idx, result in enumerate(news_results, 1): url = result.get("url", result.get("link", "")).lower() title = result.get("title", "").lower() channel = result.get("channel", result.get("source", "")).lower() # 한국 관련 컨텐츠 필터링 is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \ any(keyword in title.lower() for keyword in korean_keywords) if not is_korean_content: filtered_articles.append({ "index": idx, "title": result.get("title", "제목 없음"), "link": url, "snippet": result.get("snippet", "내용 없음"), "channel": result.get("channel", result.get("source", "알 수 없음")), "time": result.get("time", result.get("date", "알 수 없는 시간")), "image_url": result.get("img", result.get("thumbnail", "")), "translated_query": translated_query }) return "", filtered_articles except Exception as e: return f"결과 처리 중 오류 발생: {str(e)}", [] def serphouse_search(query, country): response_data = search_serphouse(query, country) return format_results_from_raw(response_data) def search_and_display(query, country, articles_state, progress=gr.Progress()): with ThreadPoolExecutor(max_workers=3) as executor: progress(0, desc="검색어 번역 중...") future_translation = executor.submit(translate_query, query, country) translated_query = future_translation.result() translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}" progress(0.3, desc="검색 중...") response_data = search_serphouse(query, country) progress(0.6, desc="결과 처리 중...") error_message, articles = format_results_from_raw(response_data) outputs = [] outputs.append(gr.update(value="검색을 진행중입니다...", visible=True)) outputs.append(gr.update(value=translated_display, visible=True)) if error_message: outputs.append(gr.update(value=error_message, visible=True)) for comp in article_components: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = [] else: outputs.append(gr.update(value="", visible=False)) if not error_message and articles: futures = [] for article in articles: future = executor.submit(translate_to_korean, article['snippet']) futures.append((article, future)) progress(0.8, desc="번역 처리 중...") for article, future in futures: article['korean_summary'] = future.result() total_articles = len(articles) for idx, comp in enumerate(article_components): progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}") if idx < len(articles): article = articles[idx] image_url = article['image_url'] image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) outputs.extend([ gr.update(visible=True), gr.update(value=f"### [{article['title']}]({article['link']})"), image_update, gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {article['korean_summary']}"), gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}") ]) else: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = articles progress(1.0, desc="완료!") outputs.append(articles_state) outputs[0] = gr.update(value="", visible=False) return outputs def get_region_countries(region): """선택된 지역의 국가 및 언어 정보 반환""" if region == "동아시아": return COUNTRY_LOCATIONS_EAST_ASIA, COUNTRY_LANGUAGES_EAST_ASIA elif region == "동남아시아/오세아니아": return COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA, COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA elif region == "동유럽": return COUNTRY_LOCATIONS_EAST_EUROPE, COUNTRY_LANGUAGES_EAST_EUROPE elif region == "서유럽": return COUNTRY_LOCATIONS_WEST_EUROPE, COUNTRY_LANGUAGES_WEST_EUROPE elif region == "중동/아프리카": return COUNTRY_LOCATIONS_ARAB_AFRICA, COUNTRY_LANGUAGES_ARAB_AFRICA elif region == "아메리카": return COUNTRY_LOCATIONS_AMERICA, COUNTRY_LANGUAGES_AMERICA return {}, {} def search_global(query, region, articles_state_global): """지역별 검색 함수""" status_msg = f"{region} 지역 검색을 시작합니다..." all_results = [] outputs = [ gr.update(value=status_msg, visible=True), gr.update(value=f"**검색어:** {query}", visible=True), ] for _ in global_article_components: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) outputs.append([]) yield outputs # 선택된 지역의 국가 정보 가져오기 locations, languages = get_region_countries(region) total_countries = len(locations) for idx, (country, location) in enumerate(locations.items(), 1): try: status_msg = f"{region} - {country} 검색 중... ({idx}/{total_countries} 국가)" outputs[0] = gr.update(value=status_msg, visible=True) yield outputs error_message, articles = serphouse_search(query, country) if not error_message and articles: for article in articles: article['source_country'] = country article['region'] = region all_results.extend(articles) sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True) seen_urls = set() unique_results = [] for article in sorted_results: url = article.get('link', '') if url not in seen_urls: seen_urls.add(url) unique_results.append(article) unique_results = unique_results[:MAX_GLOBAL_RESULTS] outputs = [ gr.update(value=f"{region} - {idx}/{total_countries} 국가 검색 완료\n현재까지 발견된 뉴스: {len(unique_results)}건", visible=True), gr.update(value=f"**검색어:** {query} | **지역:** {region}", visible=True), ] for idx, comp in enumerate(global_article_components): if idx < len(unique_results): article = unique_results[idx] image_url = article.get('image_url', '') image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) korean_summary = translate_to_korean(article['snippet']) outputs.extend([ gr.update(visible=True), gr.update(value=f"### [{article['title']}]({article['link']})"), image_update, gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"), gr.update(value=f"**출처:** {article['channel']} | **국가:** {article['source_country']} | **지역:** {article['region']} | **시간:** {article['time']}") ]) else: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) outputs.append(unique_results) yield outputs except Exception as e: print(f"Error searching {country}: {str(e)}") continue final_status = f"{region} 검색 완료! 총 {len(unique_results)}개의 뉴스가 발견되었습니다." outputs[0] = gr.update(value=final_status, visible=True) yield outputs css = """ /* 전역 스타일 */ footer {visibility: hidden;} /* 레이아웃 컨테이너 */ #status_area { background: rgba(255, 255, 255, 0.9); padding: 15px; border-bottom: 1px solid #ddd; margin-bottom: 20px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); } #results_area { padding: 10px; margin-top: 10px; } /* 탭 스타일 */ .tabs { border-bottom: 2px solid #ddd !important; margin-bottom: 20px !important; } .tab-nav { border-bottom: none !important; margin-bottom: 0 !important; } .tab-nav button { font-weight: bold !important; padding: 10px 20px !important; } .tab-nav button.selected { border-bottom: 2px solid #1f77b4 !important; color: #1f77b4 !important; } /* 상태 메시지 */ #status_area .markdown-text { font-size: 1.1em; color: #2c3e50; padding: 10px 0; } /* 기본 컨테이너 */ .group { border: 1px solid #eee; padding: 15px; margin-bottom: 15px; border-radius: 5px; background: white; } /* 버튼 스타일 */ .primary-btn { background: #1f77b4 !important; border: none !important; } /* 입력 필드 */ .textbox { border: 1px solid #ddd !important; border-radius: 4px !important; } /* 프로그레스바 컨테이너 */ .progress-container { position: fixed; top: 0; left: 0; width: 100%; height: 6px; background: #e0e0e0; z-index: 1000; } /* 프로그레스바 */ .progress-bar { height: 100%; background: linear-gradient(90deg, #2196F3, #00BCD4); box-shadow: 0 0 10px rgba(33, 150, 243, 0.5); transition: width 0.3s ease; animation: progress-glow 1.5s ease-in-out infinite; } /* 프로그레스 텍스트 */ .progress-text { position: fixed; top: 8px; left: 50%; transform: translateX(-50%); background: #333; color: white; padding: 4px 12px; border-radius: 15px; font-size: 14px; z-index: 1001; box-shadow: 0 2px 5px rgba(0,0,0,0.2); } /* 프로그레스바 애니메이션 */ @keyframes progress-glow { 0% { box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); } 50% { box-shadow: 0 0 20px rgba(33, 150, 243, 0.8); } 100% { box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); } } /* 반응형 디자인 */ @media (max-width: 768px) { .group { padding: 10px; margin-bottom: 15px; } .progress-text { font-size: 12px; padding: 3px 10px; } } /* 로딩 상태 표시 개선 */ .loading { opacity: 0.7; pointer-events: none; transition: opacity 0.3s ease; } /* 결과 컨테이너 애니메이션 */ .group { transition: all 0.3s ease; opacity: 0; transform: translateY(20px); } .group.visible { opacity: 1; transform: translateY(0); } /* Examples 스타일링 */ .examples-table { margin-top: 10px !important; margin-bottom: 20px !important; } .examples-table button { background-color: #f0f0f0 !important; border: 1px solid #ddd !important; border-radius: 4px !important; padding: 5px 10px !important; margin: 2px !important; transition: all 0.3s ease !important; } .examples-table button:hover { background-color: #e0e0e0 !important; transform: translateY(-1px) !important; box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important; } .examples-table .label { font-weight: bold !important; color: #444 !important; margin-bottom: 5px !important; } """ def get_article_content(url): try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } session = requests.Session() retries = Retry(total=3, backoff_factor=0.5) session.mount('https://', HTTPAdapter(max_retries=retries)) response = session.get(url, headers=headers, timeout=30) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') # 메타 데이터 추출 title = soup.find('meta', property='og:title') or soup.find('title') title = title.get('content', '') if hasattr(title, 'get') else title.string if title else '' description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'}) description = description.get('content', '') if description else '' # 본문 추출 개선 article_content = '' # 일반적인 기사 본문 컨테이너 검색 content_selectors = [ 'article', '.article-body', '.article-content', '#article-body', '.story-body', '.post-content', '.entry-content', '.content-body', '[itemprop="articleBody"]', '.story-content' ] for selector in content_selectors: content = soup.select_one(selector) if content: # 불필요한 요소 제거 for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']): tag.decompose() # 단락 추출 paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) if paragraphs: article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()]) break # 백업 방법: 모든 단락 추출 if not article_content: paragraphs = soup.find_all('p') article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50]) # 최종 콘텐츠 구성 full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}" # 텍스트 정제 full_content = re.sub(r'\s+', ' ', full_content) # 연속된 공백 제거 full_content = re.sub(r'\n\s*\n', '\n\n', full_content) # 연속된 빈 줄 제거 return full_content.strip() except Exception as e: print(f"Crawling error details: {str(e)}") # 디버깅을 위한 상세 에러 출력 return f"Error crawling content: {str(e)}" def respond(url, history, system_message, max_tokens, temperature, top_p): if not url.startswith('http'): history.append((url, "올바른 URL을 입력해주세요.")) return history try: article_content = get_article_content(url) translation_prompt = f"""다음 영문 기사를 한국어로 번역하고 기사를 작성해주세요. 1단계: 전문 번역 ===번역 시작=== {article_content} ===번역 끝=== 2단계: 기사 작성 가이드라인 다음 요구사항에 따라 한국어 기사를 작성하세요: 1. 구조 - 헤드라인: 핵심 내용을 담은 강력한 제목 - 부제목: 헤드라인 보완 설명 - 리드문: 기사의 핵심을 요약한 첫 문단 - 본문: 상세 내용 전개 2. 작성 규칙 - 객관적이고 정확한 사실 전달 - 문장은 '다.'로 종결 - 단락 간 자연스러운 흐름 - 인용구는 따옴표 처리 - 핵심 정보를 앞부분에 배치 - 전문 용어는 적절한 설명 추가 3. 형식 - 적절한 단락 구분 - 읽기 쉬운 문장 길이 - 논리적인 정보 구성 각 단계는 '===번역===', '===기사==='로 명확히 구분하여 출력하세요. """ messages = [ { "role": "system", "content": system_message }, {"role": "user", "content": translation_prompt} ] history.append((url, "번역 및 기사 작성을 시작합니다...")) full_response = "" for message in client.chat.completions.create( model="CohereForAI/c4ai-command-r-plus-08-2024", max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, messages=messages, ): if hasattr(message.choices[0].delta, 'content'): token = message.choices[0].delta.content if token: full_response += token history[-1] = (url, full_response) yield history except Exception as e: error_message = f"처리 중 오류가 발생했습니다: {str(e)}" history.append((url, error_message)) yield history return history def continue_writing(history, system_message, max_tokens, temperature, top_p): if not history: return history last_response = history[-1][1] if history else "" continue_prompt = f"""이전 내용을 이어서 계속 작성해주세요. 마지막 응답: {last_response} 추가 지침: 1. 이전 내용의 맥락을 유지하며 자연스럽게 이어서 작성 2. 새로운 정보나 상세 설명을 추가 3. 필요한 경우 보충 설명이나 분석 제공 4. 기사 형식과 스타일 유지 5. 필요한 경우 추가적인 이미지 프롬프트 생성 """ # 메시지 구조 수정 messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": continue_prompt} # 사용자 메시지로 시작 ] try: full_response = "" for message in client.chat.completions.create( model="CohereForAI/c4ai-command-r-plus-08-2024", max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, messages=messages, ): if hasattr(message.choices[0].delta, 'content'): token = message.choices[0].delta.content if token: full_response += token # 이전 대화 기록을 유지하면서 새로운 응답 추가 new_history = history.copy() new_history.append(("계속 작성", full_response)) yield new_history except Exception as e: error_message = f"계속 작성 중 오류가 발생했습니다: {str(e)}" new_history = history.copy() new_history.append(("오류", error_message)) yield new_history return history with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI 서비스") as iface: init_db() with gr.Tabs(): # DB 저장/불러오기 탭 with gr.Tab("DB 검색"): gr.Markdown("## 한국 주요 기업 미국 뉴스 DB") gr.Markdown("각 기업의 미국 뉴스를 검색하여 DB에 저장하고 불러올 수 있습니다.") with gr.Column(): for i in range(0, len(KOREAN_COMPANIES), 2): with gr.Row(): # 첫 번째 열 with gr.Column(): company = KOREAN_COMPANIES[i] with gr.Group(): gr.Markdown(f"### {company}") with gr.Row(): search_btn = gr.Button(f"검색", variant="primary") load_btn = gr.Button(f"출력", variant="secondary") result_display = gr.Markdown() search_btn.click( fn=lambda c=company: search_company(c), outputs=result_display ) load_btn.click( fn=lambda c=company: load_company(c), outputs=result_display ) # 두 번째 열 if i + 1 < len(KOREAN_COMPANIES): with gr.Column(): company = KOREAN_COMPANIES[i + 1] with gr.Group(): gr.Markdown(f"### {company}") with gr.Row(): search_btn = gr.Button(f"검색", variant="primary") load_btn = gr.Button(f"출력", variant="secondary") result_display = gr.Markdown() search_btn.click( fn=lambda c=company: search_company(c), outputs=result_display ) load_btn.click( fn=lambda c=company: load_company(c), outputs=result_display ) # 전체 검색 통계 with gr.Row(): stats_btn = gr.Button("전체 검색 통계 보기", variant="secondary") stats_display = gr.Markdown() stats_btn.click( fn=show_stats, outputs=stats_display ) with gr.Tab("국가별"): gr.Markdown("검색어를 입력하고 원하는 국가(한국 제외)를를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.") gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색") with gr.Column(): with gr.Row(): query = gr.Textbox(label="검색어") country = gr.Dropdown( choices=sorted(list(COUNTRY_LOCATIONS.keys())), label="국가", value="United States" ) # Examples 추가 gr.Examples( examples=[ "artificial intelligence", "NVIDIA", "OPENAI", "META LLAMA", "black forest labs", "GOOGLE gemini", "anthropic Claude", "X.AI", "HUGGINGFACE", "HYNIX", "Large Language model", "CHATGPT", "StabilityAI", "MISTRALAI", "QWEN", "MIDJOURNEY", "GPU" ], inputs=query, label="자주 사용되는 검색어" ) status_message = gr.Markdown("", visible=True) translated_query_display = gr.Markdown(visible=False) search_button = gr.Button("검색", variant="primary") progress = gr.Progress() articles_state = gr.State([]) article_components = [] for i in range(100): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) # 전세계 탭 with gr.Tab("전세계"): gr.Markdown("대륙별로 24시간 이내 뉴스를 검색합니다.") with gr.Column(): with gr.Column(elem_id="status_area"): with gr.Row(): query_global = gr.Textbox(label="검색어") region_select = gr.Dropdown( choices=REGIONS, label="지역 선택", value="동아시아" ) search_button_global = gr.Button("검색", variant="primary") status_message_global = gr.Markdown("") translated_query_display_global = gr.Markdown("") with gr.Column(elem_id="results_area"): articles_state_global = gr.State([]) global_article_components = [] for i in range(MAX_GLOBAL_RESULTS): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() global_article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) # AI 번역 탭 with gr.Tab("AI 기사 생성"): gr.Markdown("뉴스 URL을 입력하면 AI가 한국어로 번역하여 기사 형식으로 작성합니다.") gr.Markdown("이미지 생성: https://huggingface.co/spaces/ginipick/FLUXllama ") with gr.Column(): chatbot = gr.Chatbot(height=600) with gr.Row(): url_input = gr.Textbox( label="뉴스 URL", placeholder="https://..." ) with gr.Row(): translate_button = gr.Button("기사 생성", variant="primary") continue_button = gr.Button("계속 이어서 작성", variant="secondary") with gr.Accordion("고급 설정", open=False): system_message = gr.Textbox( value="""You are a professional translator and journalist. Follow these steps strictly: 1. TRANSLATION - Start with ===번역=== marker - Provide accurate Korean translation - Maintain original meaning and context 2. ARTICLE WRITING - Start with ===기사=== marker - Write a new Korean news article based on the translation - Follow newspaper article format - Use formal news writing style - End sentences with '다.' - Include headline and subheadline - Organize paragraphs clearly - Put key information first - Use quotes appropriately 3. IMAGE PROMPT GENERATION - Start with ===이미지 프롬프트=== marker - Create detailed Korean prompts for image generation - Prompts should reflect the article's main theme and content - Include key visual elements mentioned in the article - Specify style, mood, and composition - Format: "이미지 설명: [상세 설명]" - Add style keywords: "스타일: [관련 키워드들]" - Add mood keywords: "분위기: [관련 키워드들]" IMPORTANT: - Must complete all three steps in order - Clearly separate each section with markers - Never skip or combine steps - Ensure image prompts align with article content""", label="System message" ) max_tokens = gr.Slider( minimum=1, maximum=7800, value=7624, step=1, label="Max new tokens" ) temperature = gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P" ) # 이벤트 연결 부분 # 국가별 탭 이벤트 search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)] for comp in article_components: search_outputs.extend([ comp['group'], comp['title'], comp['image'], comp['snippet'], comp['info'] ]) search_outputs.append(articles_state) search_button.click( fn=search_and_display, inputs=[query, country, articles_state], outputs=search_outputs, show_progress=True ) # 전세계 탭 이벤트 global_search_outputs = [status_message_global, translated_query_display_global] for comp in global_article_components: global_search_outputs.extend([ comp['group'], comp['title'], comp['image'], comp['snippet'], comp['info'] ]) global_search_outputs.append(articles_state_global) search_button_global.click( fn=search_global, inputs=[query_global, region_select, articles_state_global], outputs=global_search_outputs, show_progress=True ) # AI 번역 탭 이벤트 translate_button.click( fn=respond, inputs=[ url_input, chatbot, system_message, max_tokens, temperature, top_p, ], outputs=chatbot ) # 계속 작성 버튼 이벤트 continue_button.click( fn=continue_writing, inputs=[ chatbot, system_message, max_tokens, temperature, top_p, ], outputs=chatbot ) iface.launch( server_name="0.0.0.0", server_port=7860, share=True, auth=("gini","pick"), ssl_verify=False, show_error=True )