import gradio as gr import requests import json import os from datetime import datetime, timedelta from huggingface_hub import InferenceClient API_KEY = os.getenv("SERPHOUSE_API_KEY") hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")) # 국가별 언어 코드 매핑 COUNTRY_LANGUAGES = { "South Korea": "ko", "Japan": "ja", "China": "zh", "Russia": "ru", "France": "fr", "Germany": "de", "Spain": "es", "Italy": "it", "Netherlands": "nl", "Portugal": "pt", "Thailand": "th", "Vietnam": "vi", "Indonesia": "id", "Malaysia": "ms", "Saudi Arabia": "ar", "United Arab Emirates": "ar", "Egypt": "ar", "Morocco": "ar", "Greece": "el", "Poland": "pl", "Czech Republic": "cs", "Hungary": "hu", "Turkey": "tr", "Romania": "ro", "Bulgaria": "bg", "Croatia": "hr", "Serbia": "sr", "Slovakia": "sk", "Slovenia": "sl", "Estonia": "et", "Latvia": "lv", "Lithuania": "lt" } COUNTRY_LOCATIONS = { "United States": "United States", "United Kingdom": "United Kingdom", "Canada": "Canada", "Australia": "Australia", "Germany": "Germany", "France": "France", "Japan": "Japan", "South Korea": "South Korea", "China": "China", "India": "India", "Brazil": "Brazil", "Mexico": "Mexico", "Russia": "Russia", "Italy": "Italy", "Spain": "Spain", "Netherlands": "Netherlands", "Singapore": "Singapore", "Hong Kong": "Hong Kong", "Indonesia": "Indonesia", "Malaysia": "Malaysia", "Philippines": "Philippines", "Thailand": "Thailand", "Vietnam": "Vietnam", "Belgium": "Belgium", "Denmark": "Denmark", "Finland": "Finland", "Ireland": "Ireland", "Norway": "Norway", "Poland": "Poland", "Sweden": "Sweden", "Switzerland": "Switzerland", "Austria": "Austria", "Czech Republic": "Czech Republic", "Greece": "Greece", "Hungary": "Hungary", "Portugal": "Portugal", "Romania": "Romania", "Turkey": "Turkey", "Israel": "Israel", "Saudi Arabia": "Saudi Arabia", "United Arab Emirates": "United Arab Emirates", "South Africa": "South Africa", "Argentina": "Argentina", "Chile": "Chile", "Colombia": "Colombia", "Peru": "Peru", "Venezuela": "Venezuela", "New Zealand": "New Zealand", "Bangladesh": "Bangladesh", "Pakistan": "Pakistan", "Egypt": "Egypt", "Morocco": "Morocco", "Nigeria": "Nigeria", "Kenya": "Kenya", "Ukraine": "Ukraine", "Croatia": "Croatia", "Slovakia": "Slovakia", "Bulgaria": "Bulgaria", "Serbia": "Serbia", "Estonia": "Estonia", "Latvia": "Latvia", "Lithuania": "Lithuania", "Slovenia": "Slovenia", "Luxembourg": "Luxembourg", "Malta": "Malta", "Cyprus": "Cyprus", "Iceland": "Iceland" } MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys()) def translate_query(query, country): try: if country in COUNTRY_LANGUAGES: target_lang = COUNTRY_LANGUAGES[country] prompt = f"Translate the following English text to {target_lang} language. Only output the translated text without any explanations or quotes: {query}" translated = hf_client.text_generation( prompt, max_new_tokens=100, temperature=0.3 ) return translated.strip() return query except Exception as e: print(f"Translation error: {str(e)}") return query def search_serphouse(query, country, page=1, num_result=10): url = "https://api.serphouse.com/serp/live" # 검색어 번역 translated_query = translate_query(query, country) print(f"Original query: {query}") print(f"Translated query: {translated_query}") payload = { "data": { "q": query, "domain": "google.com", "loc": COUNTRY_LOCATIONS.get(country, "United States"), "lang": "en", "device": "desktop", "serp_type": "news", "page": "1", "num": "10" } } headers = { "accept": "application/json", "content-type": "application/json", "authorization": f"Bearer {API_KEY}" } try: response = requests.post(url, json=payload, headers=headers) print("Request payload:", json.dumps(payload, indent=2)) print("Response status:", response.status_code) response.raise_for_status() return {"results": response.json(), "translated_query": translated_query} except requests.RequestException as e: return {"error": f"Error: {str(e)}", "translated_query": query} def format_results_from_raw(response_data): if "error" in response_data: return "Error: " + response_data["error"], [] try: results = response_data["results"] translated_query = response_data["translated_query"] news_results = results.get('results', {}).get('results', {}).get('news', []) if not news_results: return "검색 결과가 없습니다.", [] articles = [] for idx, result in enumerate(news_results, 1): articles.append({ "index": idx, "title": result.get("title", "제목 없음"), "link": result.get("url", result.get("link", "#")), "snippet": result.get("snippet", "내용 없음"), "channel": result.get("channel", result.get("source", "알 수 없음")), "time": result.get("time", result.get("date", "알 수 없는 시간")), "image_url": result.get("img", result.get("thumbnail", "")), "translated_query": translated_query }) return "", articles except Exception as e: return f"결과 처리 중 오류 발생: {str(e)}", [] def serphouse_search(query, country): response_data = search_serphouse(query, country) return format_results_from_raw(response_data) css = """ footer {visibility: hidden;} """ # Gradio 인터페이스 수정 with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface: gr.Markdown("검색어를 입력하고 원하는 국가를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.") with gr.Column(): with gr.Row(): query = gr.Textbox(label="검색어") country = gr.Dropdown(MAJOR_COUNTRIES, label="국가", value="South Korea") # 번역 결과를 보여주는 컴포넌트 추가 translated_query_display = gr.Markdown(visible=False) search_button = gr.Button("검색", variant="primary") progress = gr.Progress() status_message = gr.Markdown(visible=False) articles_state = gr.State([]) article_components = [] for i in range(100): with gr.Group(visible=False) as article_group: title = gr.Markdown() image = gr.Image(width=200, height=150) snippet = gr.Markdown() info = gr.Markdown() article_components.append({ 'group': article_group, 'title': title, 'image': image, 'snippet': snippet, 'info': info, 'index': i, }) def search_and_display(query, country, articles_state, progress=gr.Progress()): progress(0, desc="검색어 번역 중...") # 검색어 번역 translated_query = translate_query(query, country) translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}" progress(0.2, desc="검색 시작...") error_message, articles = serphouse_search(query, country) progress(0.5, desc="결과 처리 중...") outputs = [gr.update(value=translated_display, visible=True)] # 번역 결과 표시 if error_message: outputs.append(gr.update(value=error_message, visible=True)) for comp in article_components: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = [] else: outputs.append(gr.update(value="", visible=False)) total_articles = len(articles) for idx, comp in enumerate(article_components): progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}") if idx < len(articles): article = articles[idx] image_url = article['image_url'] image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) outputs.extend([ gr.update(visible=True), gr.update(value=f"### [{article['title']}]({article['link']})"), image_update, gr.update(value=f"**요약:** {article['snippet']}"), gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}") ]) else: outputs.extend([ gr.update(visible=False), gr.update(), gr.update(), gr.update(), gr.update() ]) articles_state = articles progress(1.0, desc="완료!") outputs.append(articles_state) outputs.append(gr.update(visible=False)) return outputs search_outputs = [translated_query_display, gr.Markdown(visible=False)] # 번역 결과 컴포넌트 추가 for comp in article_components: search_outputs.extend([comp['group'], comp['title'], comp['image'], comp['snippet'], comp['info']]) search_outputs.extend([articles_state, status_message]) search_button.click( search_and_display, inputs=[query, country, articles_state], outputs=search_outputs, show_progress=True ) iface.launch()