Spaces:
openfree
/
Running on CPU Upgrade

MoneyRadar / app.py
ginipick's picture
Update app.py
cebb472 verified
raw
history blame
52.4 kB
import gradio as gr
import requests
import json
import os
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from openai import OpenAI
from bs4 import BeautifulSoup
import re
import pathlib
import sqlite3
import pytz
# ํ•œ๊ตญ ๊ธฐ์—… ๋ฆฌ์ŠคํŠธ
KOREAN_COMPANIES = [
"SAMSUNG",
"HYNIX",
"HYUNDAI",
"KIA",
"LG",
"HANWHA",
"KB",
"SKT",
"KT",
"HANA",
"SK",
"POSCO",
"DOOSAN",
"WOORI",
"KAKAO",
"Celltrion"
]
def convert_to_seoul_time(timestamp_str):
try:
utc_time = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
utc_time = pytz.utc.localize(utc_time)
seoul_tz = pytz.timezone('Asia/Seoul')
seoul_time = utc_time.astimezone(seoul_tz)
return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
except:
return timestamp_str
def analyze_sentiment_batch(articles, client):
try:
# ๋ชจ๋“  ๊ธฐ์‚ฌ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ํ•˜๋‚˜์˜ ํ…์ŠคํŠธ๋กœ ๊ฒฐํ•ฉ
combined_text = "\n\n".join([
f"์ œ๋ชฉ: {article.get('title', '')}\n๋‚ด์šฉ: {article.get('snippet', '')}"
for article in articles
])
prompt = f"""๋‹ค์Œ ๋‰ด์Šค ๋ชจ์Œ์— ๋Œ€ํ•ด ์ „๋ฐ˜์ ์ธ ๊ฐ์„ฑ ๋ถ„์„์„ ์ˆ˜ํ–‰ํ•˜์„ธ์š”:
๋‰ด์Šค ๋‚ด์šฉ:
{combined_text}
๋‹ค์Œ ํ˜•์‹์œผ๋กœ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”:
1. ์ „๋ฐ˜์  ๊ฐ์„ฑ: [๊ธ์ •/๋ถ€์ •/์ค‘๋ฆฝ]
2. ์ฃผ์š” ๊ธ์ •์  ์š”์†Œ:
- [ํ•ญ๋ชฉ1]
- [ํ•ญ๋ชฉ2]
3. ์ฃผ์š” ๋ถ€์ •์  ์š”์†Œ:
- [ํ•ญ๋ชฉ1]
- [ํ•ญ๋ชฉ2]
4. ์ข…ํ•ฉ ํ‰๊ฐ€: [์ƒ์„ธ ์„ค๋ช…]
"""
response = client.chat.completions.create(
model="CohereForAI/c4ai-command-r-plus-08-2024",
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=1000
)
return response.choices[0].message.content
except Exception as e:
return f"๊ฐ์„ฑ ๋ถ„์„ ์‹คํŒจ: {str(e)}"
# DB ์ดˆ๊ธฐํ™” ํ•จ์ˆ˜
def init_db():
db_path = pathlib.Path("search_results.db")
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS searches
(id INTEGER PRIMARY KEY AUTOINCREMENT,
keyword TEXT,
country TEXT,
results TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''')
conn.commit()
conn.close()
# ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ €์žฅ ํ•จ์ˆ˜
def save_to_db(keyword, country, results):
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
# ์„œ์šธ ์‹œ๊ฐ„์œผ๋กœ ๋ณ€ํ™˜
seoul_tz = pytz.timezone('Asia/Seoul')
seoul_time = datetime.now(seoul_tz)
c.execute("""INSERT INTO searches
(keyword, country, results, timestamp)
VALUES (?, ?, ?, ?)""",
(keyword, country, json.dumps(results),
seoul_time.strftime('%Y-%m-%d %H:%M:%S')))
conn.commit()
conn.close()
# DB์—์„œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ํ•จ์ˆ˜
def load_from_db(keyword, country):
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
(keyword, country))
result = c.fetchone()
conn.close()
if result:
return json.loads(result[0]), convert_to_seoul_time(result[1])
return None, None
# ๊ฒฐ๊ณผ ํ‘œ์‹œ ํ•จ์ˆ˜
def display_results(articles):
output = ""
for idx, article in enumerate(articles, 1):
output += f"### {idx}. {article['title']}\n"
output += f"์ถœ์ฒ˜: {article['channel']}\n"
output += f"์‹œ๊ฐ„: {article['time']}\n"
output += f"๋งํฌ: {article['link']}\n"
output += f"์š”์•ฝ: {article['snippet']}\n\n"
return output
# ๊ธฐ์—… ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
def search_company(company):
error_message, articles = serphouse_search(company, "United States")
if not error_message and articles:
save_to_db(company, "United States", articles)
return display_results(articles)
return f"{company}์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
# ๊ธฐ์—… ๊ฒฐ๊ณผ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ํ•จ์ˆ˜
def load_company(company):
results, timestamp = load_from_db(company, "United States")
if results:
return f"### {company} ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ\n์ €์žฅ ์‹œ๊ฐ„: {timestamp}\n\n" + display_results(results)
return f"{company}์— ๋Œ€ํ•œ ์ €์žฅ๋œ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
# ํ†ต๊ณ„ ๋ถ„์„ ํ•จ์ˆ˜
def show_stats():
conn = sqlite3.connect("search_results.db")
c = conn.cursor()
output = "## ํ•œ๊ตญ ๊ธฐ์—… ๋‰ด์Šค ๋ถ„์„ ๋ฆฌํฌํŠธ\n\n"
for company in KOREAN_COMPANIES:
c.execute("""
SELECT results, timestamp
FROM searches
WHERE keyword = ?
ORDER BY timestamp DESC
LIMIT 1
""", (company,))
result = c.fetchone()
if result:
results_json, timestamp = result
articles = json.loads(results_json)
seoul_time = convert_to_seoul_time(timestamp)
output += f"### {company}\n"
output += f"- ๋งˆ์ง€๋ง‰ ์—…๋ฐ์ดํŠธ: {seoul_time}\n"
output += f"- ์ €์žฅ๋œ ๊ธฐ์‚ฌ ์ˆ˜: {len(articles)}๊ฑด\n\n"
if articles:
# ์ „์ฒด ๊ธฐ์‚ฌ์— ๋Œ€ํ•œ ๊ฐ์„ฑ ๋ถ„์„
sentiment_analysis = analyze_sentiment_batch(articles, client)
output += "#### ๋‰ด์Šค ๊ฐ์„ฑ ๋ถ„์„\n"
output += f"{sentiment_analysis}\n\n"
output += "---\n\n"
conn.close()
return output
ACCESS_TOKEN = os.getenv("HF_TOKEN")
if not ACCESS_TOKEN:
raise ValueError("HF_TOKEN environment variable is not set")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
MAX_COUNTRY_RESULTS = 100 # ๊ตญ๊ฐ€๋ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
MAX_GLOBAL_RESULTS = 1000 # ์ „์„ธ๊ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
def create_article_components(max_results):
article_components = []
for i in range(max_results):
with gr.Group(visible=False) as article_group:
title = gr.Markdown()
image = gr.Image(width=200, height=150)
snippet = gr.Markdown()
info = gr.Markdown()
article_components.append({
'group': article_group,
'title': title,
'image': image,
'snippet': snippet,
'info': info,
'index': i,
})
return article_components
API_KEY = os.getenv("SERPHOUSE_API_KEY")
# ๊ตญ๊ฐ€๋ณ„ ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘
COUNTRY_LANGUAGES = {
"United States": "en",
"KOREA": "ko",
"United Kingdom": "en",
"Taiwan": "zh-TW",
"Canada": "en",
"Australia": "en",
"Germany": "de",
"France": "fr",
"Japan": "ja",
"China": "zh",
"India": "hi",
"Brazil": "pt",
"Mexico": "es",
"Russia": "ru",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Singapore": "en",
"Hong Kong": "zh-HK",
"Indonesia": "id",
"Malaysia": "ms",
"Philippines": "tl",
"Thailand": "th",
"Vietnam": "vi",
"Belgium": "nl",
"Denmark": "da",
"Finland": "fi",
"Ireland": "en",
"Norway": "no",
"Poland": "pl",
"Sweden": "sv",
"Switzerland": "de",
"Austria": "de",
"Czech Republic": "cs",
"Greece": "el",
"Hungary": "hu",
"Portugal": "pt",
"Romania": "ro",
"Turkey": "tr",
"Israel": "he",
"Saudi Arabia": "ar",
"United Arab Emirates": "ar",
"South Africa": "en",
"Argentina": "es",
"Chile": "es",
"Colombia": "es",
"Peru": "es",
"Venezuela": "es",
"New Zealand": "en",
"Bangladesh": "bn",
"Pakistan": "ur",
"Egypt": "ar",
"Morocco": "ar",
"Nigeria": "en",
"Kenya": "sw",
"Ukraine": "uk",
"Croatia": "hr",
"Slovakia": "sk",
"Bulgaria": "bg",
"Serbia": "sr",
"Estonia": "et",
"Latvia": "lv",
"Lithuania": "lt",
"Slovenia": "sl",
"Luxembourg": "fr",
"Malta": "mt",
"Cyprus": "el",
"Iceland": "is"
}
COUNTRY_LOCATIONS = {
"United States": "United States",
"KOREA": "kr",
"United Kingdom": "United Kingdom",
"Taiwan": "Taiwan",
"Canada": "Canada",
"Australia": "Australia",
"Germany": "Germany",
"France": "France",
"Japan": "Japan",
"China": "China",
"India": "India",
"Brazil": "Brazil",
"Mexico": "Mexico",
"Russia": "Russia",
"Italy": "Italy",
"Spain": "Spain",
"Netherlands": "Netherlands",
"Singapore": "Singapore",
"Hong Kong": "Hong Kong",
"Indonesia": "Indonesia",
"Malaysia": "Malaysia",
"Philippines": "Philippines",
"Thailand": "Thailand",
"Vietnam": "Vietnam",
"Belgium": "Belgium",
"Denmark": "Denmark",
"Finland": "Finland",
"Ireland": "Ireland",
"Norway": "Norway",
"Poland": "Poland",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Austria": "Austria",
"Czech Republic": "Czech Republic",
"Greece": "Greece",
"Hungary": "Hungary",
"Portugal": "Portugal",
"Romania": "Romania",
"Turkey": "Turkey",
"Israel": "Israel",
"Saudi Arabia": "Saudi Arabia",
"United Arab Emirates": "United Arab Emirates",
"South Africa": "South Africa",
"Argentina": "Argentina",
"Chile": "Chile",
"Colombia": "Colombia",
"Peru": "Peru",
"Venezuela": "Venezuela",
"New Zealand": "New Zealand",
"Bangladesh": "Bangladesh",
"Pakistan": "Pakistan",
"Egypt": "Egypt",
"Morocco": "Morocco",
"Nigeria": "Nigeria",
"Kenya": "Kenya",
"Ukraine": "Ukraine",
"Croatia": "Croatia",
"Slovakia": "Slovakia",
"Bulgaria": "Bulgaria",
"Serbia": "Serbia",
"Estonia": "Estonia",
"Latvia": "Latvia",
"Lithuania": "Lithuania",
"Slovenia": "Slovenia",
"Luxembourg": "Luxembourg",
"Malta": "Malta",
"Cyprus": "Cyprus",
"Iceland": "Iceland"
}
# ์ง€์—ญ ์ •์˜
# ๋™์•„์‹œ์•„ ์ง€์—ญ
COUNTRY_LANGUAGES_EAST_ASIA = {
"KOREA": "ko",
"Taiwan": "zh-TW",
"Japan": "ja",
"China": "zh",
"Hong Kong": "zh-HK"
}
COUNTRY_LOCATIONS_EAST_ASIA = {
"KOREA": "KOREA",
"Taiwan": "Taiwan",
"Japan": "Japan",
"China": "China",
"Hong Kong": "Hong Kong"
}
# ๋™๋‚จ์•„์‹œ์•„/์˜ค์„ธ์•„๋‹ˆ์•„ ์ง€์—ญ
COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA = {
"Indonesia": "id",
"Malaysia": "ms",
"Philippines": "tl",
"Thailand": "th",
"Vietnam": "vi",
"Singapore": "en",
"Papua New Guinea": "en",
"Australia": "en",
"New Zealand": "en"
}
COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA = {
"Indonesia": "Indonesia",
"Malaysia": "Malaysia",
"Philippines": "Philippines",
"Thailand": "Thailand",
"Vietnam": "Vietnam",
"Singapore": "Singapore",
"Papua New Guinea": "Papua New Guinea",
"Australia": "Australia",
"New Zealand": "New Zealand"
}
# ๋™์œ ๋Ÿฝ ์ง€์—ญ
COUNTRY_LANGUAGES_EAST_EUROPE = {
"Poland": "pl",
"Czech Republic": "cs",
"Greece": "el",
"Hungary": "hu",
"Romania": "ro",
"Ukraine": "uk",
"Croatia": "hr",
"Slovakia": "sk",
"Bulgaria": "bg",
"Serbia": "sr",
"Estonia": "et",
"Latvia": "lv",
"Lithuania": "lt",
"Slovenia": "sl",
"Malta": "mt",
"Cyprus": "el",
"Iceland": "is",
"Russia": "ru"
}
COUNTRY_LOCATIONS_EAST_EUROPE = {
"Poland": "Poland",
"Czech Republic": "Czech Republic",
"Greece": "Greece",
"Hungary": "Hungary",
"Romania": "Romania",
"Ukraine": "Ukraine",
"Croatia": "Croatia",
"Slovakia": "Slovakia",
"Bulgaria": "Bulgaria",
"Serbia": "Serbia",
"Estonia": "Estonia",
"Latvia": "Latvia",
"Lithuania": "Lithuania",
"Slovenia": "Slovenia",
"Malta": "Malta",
"Cyprus": "Cyprus",
"Iceland": "Iceland",
"Russia": "Russia"
}
# ์„œ์œ ๋Ÿฝ ์ง€์—ญ
COUNTRY_LANGUAGES_WEST_EUROPE = {
"Germany": "de",
"France": "fr",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Belgium": "nl",
"Ireland": "en",
"Sweden": "sv",
"Switzerland": "de",
"Austria": "de",
"Portugal": "pt",
"Luxembourg": "fr",
"United Kingdom": "en"
}
COUNTRY_LOCATIONS_WEST_EUROPE = {
"Germany": "Germany",
"France": "France",
"Italy": "Italy",
"Spain": "Spain",
"Netherlands": "Netherlands",
"Belgium": "Belgium",
"Ireland": "Ireland",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Austria": "Austria",
"Portugal": "Portugal",
"Luxembourg": "Luxembourg",
"United Kingdom": "United Kingdom"
}
# ์ค‘๋™/์•„ํ”„๋ฆฌ์นด ์ง€์—ญ
COUNTRY_LANGUAGES_ARAB_AFRICA = {
"South Africa": "en",
"Nigeria": "en",
"Kenya": "sw",
"Egypt": "ar",
"Morocco": "ar",
"Saudi Arabia": "ar",
"United Arab Emirates": "ar",
"Israel": "he"
}
COUNTRY_LOCATIONS_ARAB_AFRICA = {
"South Africa": "South Africa",
"Nigeria": "Nigeria",
"Kenya": "Kenya",
"Egypt": "Egypt",
"Morocco": "Morocco",
"Saudi Arabia": "Saudi Arabia",
"United Arab Emirates": "United Arab Emirates",
"Israel": "Israel"
}
# ์•„๋ฉ”๋ฆฌ์นด ์ง€์—ญ
COUNTRY_LANGUAGES_AMERICA = {
"United States": "en",
"Canada": "en",
"Mexico": "es",
"Brazil": "pt",
"Argentina": "es",
"Chile": "es",
"Colombia": "es",
"Peru": "es",
"Venezuela": "es"
}
COUNTRY_LOCATIONS_AMERICA = {
"United States": "United States",
"Canada": "Canada",
"Mexico": "Mexico",
"Brazil": "Brazil",
"Argentina": "Argentina",
"Chile": "Chile",
"Colombia": "Colombia",
"Peru": "Peru",
"Venezuela": "Venezuela"
}
# ์ง€์—ญ ์„ ํƒ ๋ฆฌ์ŠคํŠธ
REGIONS = [
"๋™์•„์‹œ์•„",
"๋™๋‚จ์•„์‹œ์•„/์˜ค์„ธ์•„๋‹ˆ์•„",
"๋™์œ ๋Ÿฝ",
"์„œ์œ ๋Ÿฝ",
"์ค‘๋™/์•„ํ”„๋ฆฌ์นด",
"์•„๋ฉ”๋ฆฌ์นด"
]
@lru_cache(maxsize=100)
def translate_query(query, country):
try:
if is_english(query):
return query
if country in COUNTRY_LANGUAGES:
if country == "South Korea":
return query
target_lang = COUNTRY_LANGUAGES[country]
url = "https://translate.googleapis.com/translate_a/single"
params = {
"client": "gtx",
"sl": "auto",
"tl": target_lang,
"dt": "t",
"q": query
}
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5)
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=(5, 10))
translated_text = response.json()[0][0][0]
return translated_text
return query
except Exception as e:
print(f"๋ฒˆ์—ญ ์˜ค๋ฅ˜: {str(e)}")
return query
@lru_cache(maxsize=200)
def translate_to_korean(text):
try:
url = "https://translate.googleapis.com/translate_a/single"
params = {
"client": "gtx",
"sl": "auto",
"tl": "ko",
"dt": "t",
"q": text
}
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5)
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=(5, 10))
translated_text = response.json()[0][0][0]
return translated_text
except Exception as e:
print(f"ํ•œ๊ธ€ ๋ฒˆ์—ญ ์˜ค๋ฅ˜: {str(e)}")
return text
def is_english(text):
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
def is_korean(text):
return any('\uAC00' <= char <= '\uD7A3' for char in text)
def search_serphouse(query, country, page=1, num_result=10):
url = "https://api.serphouse.com/serp/live"
now = datetime.utcnow()
yesterday = now - timedelta(days=1)
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
translated_query = translate_query(query, country)
payload = {
"data": {
"q": translated_query,
"domain": "google.com",
"loc": COUNTRY_LOCATIONS.get(country, "United States"),
"lang": COUNTRY_LANGUAGES.get(country, "en"),
"device": "desktop",
"serp_type": "news",
"page": "1",
"num": "100",
"date_range": date_range,
"sort_by": "date"
}
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {API_KEY}"
}
try:
# ์„ธ์…˜ ์„ค์ • ๊ฐœ์„ 
session = requests.Session()
# ์žฌ์‹œ๋„ ์„ค์ • ๊ฐ•ํ™”
retries = Retry(
total=5, # ์ตœ๋Œ€ ์žฌ์‹œ๋„ ํšŸ์ˆ˜ ์ฆ๊ฐ€
backoff_factor=1, # ์žฌ์‹œ๋„ ๊ฐ„๊ฒฉ ์ฆ๊ฐ€
status_forcelist=[500, 502, 503, 504, 429], # ์žฌ์‹œ๋„ํ•  HTTP ์ƒํƒœ ์ฝ”๋“œ
allowed_methods=["POST"] # POST ์š”์ฒญ์— ๋Œ€ํ•œ ์žฌ์‹œ๋„ ํ—ˆ์šฉ
)
# ํƒ€์ž„์•„์›ƒ ์„ค์ • ์กฐ์ •
adapter = HTTPAdapter(max_retries=retries)
session.mount('http://', adapter)
session.mount('https://', adapter)
# ํƒ€์ž„์•„์›ƒ ๊ฐ’ ์ฆ๊ฐ€ (connect timeout, read timeout)
response = session.post(
url,
json=payload,
headers=headers,
timeout=(30, 30) # ์—ฐ๊ฒฐ ํƒ€์ž„์•„์›ƒ 30์ดˆ, ์ฝ๊ธฐ ํƒ€์ž„์•„์›ƒ 30์ดˆ
)
response.raise_for_status()
return {"results": response.json(), "translated_query": translated_query}
except requests.exceptions.Timeout:
return {
"error": "๊ฒ€์ƒ‰ ์‹œ๊ฐ„์ด ์ดˆ๊ณผ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ์ž ์‹œ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.",
"translated_query": query
}
except requests.exceptions.RequestException as e:
return {
"error": f"๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
"translated_query": query
}
except Exception as e:
return {
"error": f"์˜ˆ๊ธฐ์น˜ ์•Š์€ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
"translated_query": query
}
def format_results_from_raw(response_data):
if "error" in response_data:
return "Error: " + response_data["error"], []
try:
results = response_data["results"]
translated_query = response_data["translated_query"]
news_results = results.get('results', {}).get('results', {}).get('news', [])
if not news_results:
return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
# ํ•œ๊ตญ ๋„๋ฉ”์ธ ๋ฐ ํ•œ๊ตญ ๊ด€๋ จ ํ‚ค์›Œ๋“œ ํ•„ํ„ฐ๋ง
korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald']
korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
'gwangju', 'daejeon', 'ulsan', 'sejong']
filtered_articles = []
for idx, result in enumerate(news_results, 1):
url = result.get("url", result.get("link", "")).lower()
title = result.get("title", "").lower()
channel = result.get("channel", result.get("source", "")).lower()
# ํ•œ๊ตญ ๊ด€๋ จ ์ปจํ…์ธ  ํ•„ํ„ฐ๋ง
is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \
any(keyword in title.lower() for keyword in korean_keywords)
if not is_korean_content:
filtered_articles.append({
"index": idx,
"title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
"link": url,
"snippet": result.get("snippet", "๋‚ด์šฉ ์—†์Œ"),
"channel": result.get("channel", result.get("source", "์•Œ ์ˆ˜ ์—†์Œ")),
"time": result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„")),
"image_url": result.get("img", result.get("thumbnail", "")),
"translated_query": translated_query
})
return "", filtered_articles
except Exception as e:
return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
def serphouse_search(query, country):
response_data = search_serphouse(query, country)
return format_results_from_raw(response_data)
def search_and_display(query, country, articles_state, progress=gr.Progress()):
with ThreadPoolExecutor(max_workers=3) as executor:
progress(0, desc="๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ ์ค‘...")
future_translation = executor.submit(translate_query, query, country)
translated_query = future_translation.result()
translated_display = f"**์›๋ณธ ๊ฒ€์ƒ‰์–ด:** {query}\n**๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด:** {translated_query}" if translated_query != query else f"**๊ฒ€์ƒ‰์–ด:** {query}"
progress(0.3, desc="๊ฒ€์ƒ‰ ์ค‘...")
response_data = search_serphouse(query, country)
progress(0.6, desc="๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘...")
error_message, articles = format_results_from_raw(response_data)
outputs = []
outputs.append(gr.update(value="๊ฒ€์ƒ‰์„ ์ง„ํ–‰์ค‘์ž…๋‹ˆ๋‹ค...", visible=True))
outputs.append(gr.update(value=translated_display, visible=True))
if error_message:
outputs.append(gr.update(value=error_message, visible=True))
for comp in article_components:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
articles_state = []
else:
outputs.append(gr.update(value="", visible=False))
if not error_message and articles:
futures = []
for article in articles:
future = executor.submit(translate_to_korean, article['snippet'])
futures.append((article, future))
progress(0.8, desc="๋ฒˆ์—ญ ์ฒ˜๋ฆฌ ์ค‘...")
for article, future in futures:
article['korean_summary'] = future.result()
total_articles = len(articles)
for idx, comp in enumerate(article_components):
progress((idx + 1) / total_articles, desc=f"๊ฒฐ๊ณผ ํ‘œ์‹œ ์ค‘... {idx + 1}/{total_articles}")
if idx < len(articles):
article = articles[idx]
image_url = article['image_url']
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
outputs.extend([
gr.update(visible=True),
gr.update(value=f"### [{article['title']}]({article['link']})"),
image_update,
gr.update(value=f"**์š”์•ฝ:** {article['snippet']}\n\n**ํ•œ๊ธ€ ์š”์•ฝ:** {article['korean_summary']}"),
gr.update(value=f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}")
])
else:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
articles_state = articles
progress(1.0, desc="์™„๋ฃŒ!")
outputs.append(articles_state)
outputs[0] = gr.update(value="", visible=False)
return outputs
def get_region_countries(region):
"""์„ ํƒ๋œ ์ง€์—ญ์˜ ๊ตญ๊ฐ€ ๋ฐ ์–ธ์–ด ์ •๋ณด ๋ฐ˜ํ™˜"""
if region == "๋™์•„์‹œ์•„":
return COUNTRY_LOCATIONS_EAST_ASIA, COUNTRY_LANGUAGES_EAST_ASIA
elif region == "๋™๋‚จ์•„์‹œ์•„/์˜ค์„ธ์•„๋‹ˆ์•„":
return COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA, COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA
elif region == "๋™์œ ๋Ÿฝ":
return COUNTRY_LOCATIONS_EAST_EUROPE, COUNTRY_LANGUAGES_EAST_EUROPE
elif region == "์„œ์œ ๋Ÿฝ":
return COUNTRY_LOCATIONS_WEST_EUROPE, COUNTRY_LANGUAGES_WEST_EUROPE
elif region == "์ค‘๋™/์•„ํ”„๋ฆฌ์นด":
return COUNTRY_LOCATIONS_ARAB_AFRICA, COUNTRY_LANGUAGES_ARAB_AFRICA
elif region == "์•„๋ฉ”๋ฆฌ์นด":
return COUNTRY_LOCATIONS_AMERICA, COUNTRY_LANGUAGES_AMERICA
return {}, {}
def search_global(query, region, articles_state_global):
"""์ง€์—ญ๋ณ„ ๊ฒ€์ƒ‰ ํ•จ์ˆ˜"""
status_msg = f"{region} ์ง€์—ญ ๊ฒ€์ƒ‰์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค..."
all_results = []
outputs = [
gr.update(value=status_msg, visible=True),
gr.update(value=f"**๊ฒ€์ƒ‰์–ด:** {query}", visible=True),
]
for _ in global_article_components:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
outputs.append([])
yield outputs
# ์„ ํƒ๋œ ์ง€์—ญ์˜ ๊ตญ๊ฐ€ ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
locations, languages = get_region_countries(region)
total_countries = len(locations)
for idx, (country, location) in enumerate(locations.items(), 1):
try:
status_msg = f"{region} - {country} ๊ฒ€์ƒ‰ ์ค‘... ({idx}/{total_countries} ๊ตญ๊ฐ€)"
outputs[0] = gr.update(value=status_msg, visible=True)
yield outputs
error_message, articles = serphouse_search(query, country)
if not error_message and articles:
for article in articles:
article['source_country'] = country
article['region'] = region
all_results.extend(articles)
sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True)
seen_urls = set()
unique_results = []
for article in sorted_results:
url = article.get('link', '')
if url not in seen_urls:
seen_urls.add(url)
unique_results.append(article)
unique_results = unique_results[:MAX_GLOBAL_RESULTS]
outputs = [
gr.update(value=f"{region} - {idx}/{total_countries} ๊ตญ๊ฐ€ ๊ฒ€์ƒ‰ ์™„๋ฃŒ\nํ˜„์žฌ๊นŒ์ง€ ๋ฐœ๊ฒฌ๋œ ๋‰ด์Šค: {len(unique_results)}๊ฑด", visible=True),
gr.update(value=f"**๊ฒ€์ƒ‰์–ด:** {query} | **์ง€์—ญ:** {region}", visible=True),
]
for idx, comp in enumerate(global_article_components):
if idx < len(unique_results):
article = unique_results[idx]
image_url = article.get('image_url', '')
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
korean_summary = translate_to_korean(article['snippet'])
outputs.extend([
gr.update(visible=True),
gr.update(value=f"### [{article['title']}]({article['link']})"),
image_update,
gr.update(value=f"**์š”์•ฝ:** {article['snippet']}\n\n**ํ•œ๊ธ€ ์š”์•ฝ:** {korean_summary}"),
gr.update(value=f"**์ถœ์ฒ˜:** {article['channel']} | **๊ตญ๊ฐ€:** {article['source_country']} | **์ง€์—ญ:** {article['region']} | **์‹œ๊ฐ„:** {article['time']}")
])
else:
outputs.extend([
gr.update(visible=False),
gr.update(),
gr.update(),
gr.update(),
gr.update()
])
outputs.append(unique_results)
yield outputs
except Exception as e:
print(f"Error searching {country}: {str(e)}")
continue
final_status = f"{region} ๊ฒ€์ƒ‰ ์™„๋ฃŒ! ์ด {len(unique_results)}๊ฐœ์˜ ๋‰ด์Šค๊ฐ€ ๋ฐœ๊ฒฌ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
outputs[0] = gr.update(value=final_status, visible=True)
yield outputs
css = """
/* ์ „์—ญ ์Šคํƒ€์ผ */
footer {visibility: hidden;}
/* ๋ ˆ์ด์•„์›ƒ ์ปจํ…Œ์ด๋„ˆ */
#status_area {
background: rgba(255, 255, 255, 0.9);
padding: 15px;
border-bottom: 1px solid #ddd;
margin-bottom: 20px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
#results_area {
padding: 10px;
margin-top: 10px;
}
/* ํƒญ ์Šคํƒ€์ผ */
.tabs {
border-bottom: 2px solid #ddd !important;
margin-bottom: 20px !important;
}
.tab-nav {
border-bottom: none !important;
margin-bottom: 0 !important;
}
.tab-nav button {
font-weight: bold !important;
padding: 10px 20px !important;
}
.tab-nav button.selected {
border-bottom: 2px solid #1f77b4 !important;
color: #1f77b4 !important;
}
/* ์ƒํƒœ ๋ฉ”์‹œ์ง€ */
#status_area .markdown-text {
font-size: 1.1em;
color: #2c3e50;
padding: 10px 0;
}
/* ๊ธฐ๋ณธ ์ปจํ…Œ์ด๋„ˆ */
.group {
border: 1px solid #eee;
padding: 15px;
margin-bottom: 15px;
border-radius: 5px;
background: white;
}
/* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
.primary-btn {
background: #1f77b4 !important;
border: none !important;
}
/* ์ž…๋ ฅ ํ•„๋“œ */
.textbox {
border: 1px solid #ddd !important;
border-radius: 4px !important;
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์ปจํ…Œ์ด๋„ˆ */
.progress-container {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 6px;
background: #e0e0e0;
z-index: 1000;
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” */
.progress-bar {
height: 100%;
background: linear-gradient(90deg, #2196F3, #00BCD4);
box-shadow: 0 0 10px rgba(33, 150, 243, 0.5);
transition: width 0.3s ease;
animation: progress-glow 1.5s ease-in-out infinite;
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šค ํ…์ŠคํŠธ */
.progress-text {
position: fixed;
top: 8px;
left: 50%;
transform: translateX(-50%);
background: #333;
color: white;
padding: 4px 12px;
border-radius: 15px;
font-size: 14px;
z-index: 1001;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
}
/* ํ”„๋กœ๊ทธ๋ ˆ์Šค๋ฐ” ์• ๋‹ˆ๋ฉ”์ด์…˜ */
@keyframes progress-glow {
0% {
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
}
50% {
box-shadow: 0 0 20px rgba(33, 150, 243, 0.8);
}
100% {
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
}
}
/* ๋ฐ˜์‘ํ˜• ๋””์ž์ธ */
@media (max-width: 768px) {
.group {
padding: 10px;
margin-bottom: 15px;
}
.progress-text {
font-size: 12px;
padding: 3px 10px;
}
}
/* ๋กœ๋”ฉ ์ƒํƒœ ํ‘œ์‹œ ๊ฐœ์„  */
.loading {
opacity: 0.7;
pointer-events: none;
transition: opacity 0.3s ease;
}
/* ๊ฒฐ๊ณผ ์ปจํ…Œ์ด๋„ˆ ์• ๋‹ˆ๋ฉ”์ด์…˜ */
.group {
transition: all 0.3s ease;
opacity: 0;
transform: translateY(20px);
}
.group.visible {
opacity: 1;
transform: translateY(0);
}
/* Examples ์Šคํƒ€์ผ๋ง */
.examples-table {
margin-top: 10px !important;
margin-bottom: 20px !important;
}
.examples-table button {
background-color: #f0f0f0 !important;
border: 1px solid #ddd !important;
border-radius: 4px !important;
padding: 5px 10px !important;
margin: 2px !important;
transition: all 0.3s ease !important;
}
.examples-table button:hover {
background-color: #e0e0e0 !important;
transform: translateY(-1px) !important;
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
}
.examples-table .label {
font-weight: bold !important;
color: #444 !important;
margin-bottom: 5px !important;
}
"""
def get_article_content(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
session = requests.Session()
retries = Retry(total=3, backoff_factor=0.5)
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(url, headers=headers, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# ๋ฉ”ํƒ€ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
title = soup.find('meta', property='og:title') or soup.find('title')
title = title.get('content', '') if hasattr(title, 'get') else title.string if title else ''
description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'})
description = description.get('content', '') if description else ''
# ๋ณธ๋ฌธ ์ถ”์ถœ ๊ฐœ์„ 
article_content = ''
# ์ผ๋ฐ˜์ ์ธ ๊ธฐ์‚ฌ ๋ณธ๋ฌธ ์ปจํ…Œ์ด๋„ˆ ๊ฒ€์ƒ‰
content_selectors = [
'article', '.article-body', '.article-content', '#article-body',
'.story-body', '.post-content', '.entry-content', '.content-body',
'[itemprop="articleBody"]', '.story-content'
]
for selector in content_selectors:
content = soup.select_one(selector)
if content:
# ๋ถˆํ•„์š”ํ•œ ์š”์†Œ ์ œ๊ฑฐ
for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']):
tag.decompose()
# ๋‹จ๋ฝ ์ถ”์ถœ
paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
if paragraphs:
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
break
# ๋ฐฑ์—… ๋ฐฉ๋ฒ•: ๋ชจ๋“  ๋‹จ๋ฝ ์ถ”์ถœ
if not article_content:
paragraphs = soup.find_all('p')
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50])
# ์ตœ์ข… ์ฝ˜ํ…์ธ  ๊ตฌ์„ฑ
full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}"
# ํ…์ŠคํŠธ ์ •์ œ
full_content = re.sub(r'\s+', ' ', full_content) # ์—ฐ์†๋œ ๊ณต๋ฐฑ ์ œ๊ฑฐ
full_content = re.sub(r'\n\s*\n', '\n\n', full_content) # ์—ฐ์†๋œ ๋นˆ ์ค„ ์ œ๊ฑฐ
return full_content.strip()
except Exception as e:
print(f"Crawling error details: {str(e)}") # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ์ƒ์„ธ ์—๋Ÿฌ ์ถœ๋ ฅ
return f"Error crawling content: {str(e)}"
def respond(url, history, system_message, max_tokens, temperature, top_p):
if not url.startswith('http'):
history.append((url, "์˜ฌ๋ฐ”๋ฅธ URL์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."))
return history
try:
article_content = get_article_content(url)
translation_prompt = f"""๋‹ค์Œ ์˜๋ฌธ ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜๊ณ  ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.
1๋‹จ๊ณ„: ์ „๋ฌธ ๋ฒˆ์—ญ
===๋ฒˆ์—ญ ์‹œ์ž‘===
{article_content}
===๋ฒˆ์—ญ ๋===
2๋‹จ๊ณ„: ๊ธฐ์‚ฌ ์ž‘์„ฑ ๊ฐ€์ด๋“œ๋ผ์ธ
๋‹ค์Œ ์š”๊ตฌ์‚ฌํ•ญ์— ๋”ฐ๋ผ ํ•œ๊ตญ์–ด ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”:
1. ๊ตฌ์กฐ
- ํ—ค๋“œ๋ผ์ธ: ํ•ต์‹ฌ ๋‚ด์šฉ์„ ๋‹ด์€ ๊ฐ•๋ ฅํ•œ ์ œ๋ชฉ
- ๋ถ€์ œ๋ชฉ: ํ—ค๋“œ๋ผ์ธ ๋ณด์™„ ์„ค๋ช…
- ๋ฆฌ๋“œ๋ฌธ: ๊ธฐ์‚ฌ์˜ ํ•ต์‹ฌ์„ ์š”์•ฝํ•œ ์ฒซ ๋ฌธ๋‹จ
- ๋ณธ๋ฌธ: ์ƒ์„ธ ๋‚ด์šฉ ์ „๊ฐœ
2. ์ž‘์„ฑ ๊ทœ์น™
- ๊ฐ๊ด€์ ์ด๊ณ  ์ •ํ™•ํ•œ ์‚ฌ์‹ค ์ „๋‹ฌ
- ๋ฌธ์žฅ์€ '๋‹ค.'๋กœ ์ข…๊ฒฐ
- ๋‹จ๋ฝ ๊ฐ„ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ๋ฆ„
- ์ธ์šฉ๊ตฌ๋Š” ๋”ฐ์˜ดํ‘œ ์ฒ˜๋ฆฌ
- ํ•ต์‹ฌ ์ •๋ณด๋ฅผ ์•ž๋ถ€๋ถ„์— ๋ฐฐ์น˜
- ์ „๋ฌธ ์šฉ์–ด๋Š” ์ ์ ˆํ•œ ์„ค๋ช… ์ถ”๊ฐ€
3. ํ˜•์‹
- ์ ์ ˆํ•œ ๋‹จ๋ฝ ๊ตฌ๋ถ„
- ์ฝ๊ธฐ ์‰ฌ์šด ๋ฌธ์žฅ ๊ธธ์ด
- ๋…ผ๋ฆฌ์ ์ธ ์ •๋ณด ๊ตฌ์„ฑ
๊ฐ ๋‹จ๊ณ„๋Š” '===๋ฒˆ์—ญ===', '===๊ธฐ์‚ฌ==='๋กœ ๋ช…ํ™•ํžˆ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ถœ๋ ฅํ•˜์„ธ์š”.
"""
messages = [
{
"role": "system",
"content": system_message
},
{"role": "user", "content": translation_prompt}
]
history.append((url, "๋ฒˆ์—ญ ๋ฐ ๊ธฐ์‚ฌ ์ž‘์„ฑ์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค..."))
full_response = ""
for message in client.chat.completions.create(
model="CohereForAI/c4ai-command-r-plus-08-2024",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
if hasattr(message.choices[0].delta, 'content'):
token = message.choices[0].delta.content
if token:
full_response += token
history[-1] = (url, full_response)
yield history
except Exception as e:
error_message = f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
history.append((url, error_message))
yield history
return history
def continue_writing(history, system_message, max_tokens, temperature, top_p):
if not history:
return history
last_response = history[-1][1] if history else ""
continue_prompt = f"""์ด์ „ ๋‚ด์šฉ์„ ์ด์–ด์„œ ๊ณ„์† ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.
๋งˆ์ง€๋ง‰ ์‘๋‹ต: {last_response}
์ถ”๊ฐ€ ์ง€์นจ:
1. ์ด์ „ ๋‚ด์šฉ์˜ ๋งฅ๋ฝ์„ ์œ ์ง€ํ•˜๋ฉฐ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ด์–ด์„œ ์ž‘์„ฑ
2. ์ƒˆ๋กœ์šด ์ •๋ณด๋‚˜ ์ƒ์„ธ ์„ค๋ช…์„ ์ถ”๊ฐ€
3. ํ•„์š”ํ•œ ๊ฒฝ์šฐ ๋ณด์ถฉ ์„ค๋ช…์ด๋‚˜ ๋ถ„์„ ์ œ๊ณต
4. ๊ธฐ์‚ฌ ํ˜•์‹๊ณผ ์Šคํƒ€์ผ ์œ ์ง€
5. ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ถ”๊ฐ€์ ์ธ ์ด๋ฏธ์ง€ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
"""
# ๋ฉ”์‹œ์ง€ ๊ตฌ์กฐ ์ˆ˜์ •
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": continue_prompt} # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€๋กœ ์‹œ์ž‘
]
try:
full_response = ""
for message in client.chat.completions.create(
model="CohereForAI/c4ai-command-r-plus-08-2024",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
):
if hasattr(message.choices[0].delta, 'content'):
token = message.choices[0].delta.content
if token:
full_response += token
# ์ด์ „ ๋Œ€ํ™” ๊ธฐ๋ก์„ ์œ ์ง€ํ•˜๋ฉด์„œ ์ƒˆ๋กœ์šด ์‘๋‹ต ์ถ”๊ฐ€
new_history = history.copy()
new_history.append(("๊ณ„์† ์ž‘์„ฑ", full_response))
yield new_history
except Exception as e:
error_message = f"๊ณ„์† ์ž‘์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
new_history = history.copy()
new_history.append(("์˜ค๋ฅ˜", error_message))
yield new_history
return history
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
init_db()
with gr.Tabs():
# DB ์ €์žฅ/๋ถˆ๋Ÿฌ์˜ค๊ธฐ ํƒญ
with gr.Tab("DB ๊ฒ€์ƒ‰"):
gr.Markdown("## ํ•œ๊ตญ ์ฃผ์š” ๊ธฐ์—… ๋ฏธ๊ตญ ๋‰ด์Šค DB")
gr.Markdown("๊ฐ ๊ธฐ์—…์˜ ๋ฏธ๊ตญ ๋‰ด์Šค๋ฅผ ๊ฒ€์ƒ‰ํ•˜์—ฌ DB์— ์ €์žฅํ•˜๊ณ  ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
with gr.Column():
for i in range(0, len(KOREAN_COMPANIES), 2):
with gr.Row():
# ์ฒซ ๋ฒˆ์งธ ์—ด
with gr.Column():
company = KOREAN_COMPANIES[i]
with gr.Group():
gr.Markdown(f"### {company}")
with gr.Row():
search_btn = gr.Button(f"๊ฒ€์ƒ‰", variant="primary")
load_btn = gr.Button(f"์ถœ๋ ฅ", variant="secondary")
result_display = gr.Markdown()
search_btn.click(
fn=lambda c=company: search_company(c),
outputs=result_display
)
load_btn.click(
fn=lambda c=company: load_company(c),
outputs=result_display
)
# ๋‘ ๋ฒˆ์งธ ์—ด
if i + 1 < len(KOREAN_COMPANIES):
with gr.Column():
company = KOREAN_COMPANIES[i + 1]
with gr.Group():
gr.Markdown(f"### {company}")
with gr.Row():
search_btn = gr.Button(f"๊ฒ€์ƒ‰", variant="primary")
load_btn = gr.Button(f"์ถœ๋ ฅ", variant="secondary")
result_display = gr.Markdown()
search_btn.click(
fn=lambda c=company: search_company(c),
outputs=result_display
)
load_btn.click(
fn=lambda c=company: load_company(c),
outputs=result_display
)
# ์ „์ฒด ๊ฒ€์ƒ‰ ํ†ต๊ณ„
with gr.Row():
stats_btn = gr.Button("์ „์ฒด ๊ฒ€์ƒ‰ ํ†ต๊ณ„ ๋ณด๊ธฐ", variant="secondary")
stats_display = gr.Markdown()
stats_btn.click(
fn=show_stats,
outputs=stats_display
)
with gr.Tab("๊ตญ๊ฐ€๋ณ„"):
gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€(ํ•œ๊ตญ ์ œ์™ธ)๋ฅผ๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
gr.Markdown("๊ตญ๊ฐ€ ์„ ํƒํ›„ ๊ฒ€์ƒ‰์–ด์— 'ํ•œ๊ธ€'์„ ์ž…๋ ฅํ•˜๋ฉด ํ˜„์ง€ ์–ธ์–ด๋กœ ๋ฒˆ์—ญ๋˜์–ด ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: 'Taiwan' ๊ตญ๊ฐ€ ์„ ํƒํ›„ '์‚ผ์„ฑ' ์ž…๋ ฅ์‹œ 'ไธ‰ๆ˜Ÿ'์œผ๋กœ ์ž๋™ ๊ฒ€์ƒ‰")
with gr.Column():
with gr.Row():
query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
country = gr.Dropdown(
choices=sorted(list(COUNTRY_LOCATIONS.keys())),
label="๊ตญ๊ฐ€",
value="United States"
)
# Examples ์ถ”๊ฐ€
gr.Examples(
examples=[
"artificial intelligence",
"NVIDIA",
"OPENAI",
"META LLAMA",
"black forest labs",
"GOOGLE gemini",
"anthropic Claude",
"X.AI",
"HUGGINGFACE",
"HYNIX",
"Large Language model",
"CHATGPT",
"StabilityAI",
"MISTRALAI",
"QWEN",
"MIDJOURNEY",
"GPU"
],
inputs=query,
label="์ž์ฃผ ์‚ฌ์šฉ๋˜๋Š” ๊ฒ€์ƒ‰์–ด"
)
status_message = gr.Markdown("", visible=True)
translated_query_display = gr.Markdown(visible=False)
search_button = gr.Button("๊ฒ€์ƒ‰", variant="primary")
progress = gr.Progress()
articles_state = gr.State([])
article_components = []
for i in range(100):
with gr.Group(visible=False) as article_group:
title = gr.Markdown()
image = gr.Image(width=200, height=150)
snippet = gr.Markdown()
info = gr.Markdown()
article_components.append({
'group': article_group,
'title': title,
'image': image,
'snippet': snippet,
'info': info,
'index': i,
})
# ์ „์„ธ๊ณ„ ํƒญ
with gr.Tab("์ „์„ธ๊ณ„"):
gr.Markdown("๋Œ€๋ฅ™๋ณ„๋กœ 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.")
with gr.Column():
with gr.Column(elem_id="status_area"):
with gr.Row():
query_global = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
region_select = gr.Dropdown(
choices=REGIONS,
label="์ง€์—ญ ์„ ํƒ",
value="๋™์•„์‹œ์•„"
)
search_button_global = gr.Button("๊ฒ€์ƒ‰", variant="primary")
status_message_global = gr.Markdown("")
translated_query_display_global = gr.Markdown("")
with gr.Column(elem_id="results_area"):
articles_state_global = gr.State([])
global_article_components = []
for i in range(MAX_GLOBAL_RESULTS):
with gr.Group(visible=False) as article_group:
title = gr.Markdown()
image = gr.Image(width=200, height=150)
snippet = gr.Markdown()
info = gr.Markdown()
global_article_components.append({
'group': article_group,
'title': title,
'image': image,
'snippet': snippet,
'info': info,
'index': i,
})
# AI ๋ฒˆ์—ญ ํƒญ
with gr.Tab("AI ๊ธฐ์‚ฌ ์ƒ์„ฑ"):
gr.Markdown("๋‰ด์Šค URL์„ ์ž…๋ ฅํ•˜๋ฉด AI๊ฐ€ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜์—ฌ ๊ธฐ์‚ฌ ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.")
gr.Markdown("์ด๋ฏธ์ง€ ์ƒ์„ฑ: https://huggingface.co/spaces/ginipick/FLUXllama ")
with gr.Column():
chatbot = gr.Chatbot(height=600)
with gr.Row():
url_input = gr.Textbox(
label="๋‰ด์Šค URL",
placeholder="https://..."
)
with gr.Row():
translate_button = gr.Button("๊ธฐ์‚ฌ ์ƒ์„ฑ", variant="primary")
continue_button = gr.Button("๊ณ„์† ์ด์–ด์„œ ์ž‘์„ฑ", variant="secondary")
with gr.Accordion("๊ณ ๊ธ‰ ์„ค์ •", open=False):
system_message = gr.Textbox(
value="""You are a professional translator and journalist. Follow these steps strictly:
1. TRANSLATION
- Start with ===๋ฒˆ์—ญ=== marker
- Provide accurate Korean translation
- Maintain original meaning and context
2. ARTICLE WRITING
- Start with ===๊ธฐ์‚ฌ=== marker
- Write a new Korean news article based on the translation
- Follow newspaper article format
- Use formal news writing style
- End sentences with '๋‹ค.'
- Include headline and subheadline
- Organize paragraphs clearly
- Put key information first
- Use quotes appropriately
3. IMAGE PROMPT GENERATION
- Start with ===์ด๋ฏธ์ง€ ํ”„๋กฌํ”„ํŠธ=== marker
- Create detailed Korean prompts for image generation
- Prompts should reflect the article's main theme and content
- Include key visual elements mentioned in the article
- Specify style, mood, and composition
- Format: "์ด๋ฏธ์ง€ ์„ค๋ช…: [์ƒ์„ธ ์„ค๋ช…]"
- Add style keywords: "์Šคํƒ€์ผ: [๊ด€๋ จ ํ‚ค์›Œ๋“œ๋“ค]"
- Add mood keywords: "๋ถ„์œ„๊ธฐ: [๊ด€๋ จ ํ‚ค์›Œ๋“œ๋“ค]"
IMPORTANT:
- Must complete all three steps in order
- Clearly separate each section with markers
- Never skip or combine steps
- Ensure image prompts align with article content""",
label="System message"
)
max_tokens = gr.Slider(
minimum=1,
maximum=7800,
value=7624,
step=1,
label="Max new tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P"
)
# ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ ๋ถ€๋ถ„
# ๊ตญ๊ฐ€๋ณ„ ํƒญ ์ด๋ฒคํŠธ
search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)]
for comp in article_components:
search_outputs.extend([
comp['group'], comp['title'], comp['image'],
comp['snippet'], comp['info']
])
search_outputs.append(articles_state)
search_button.click(
fn=search_and_display,
inputs=[query, country, articles_state],
outputs=search_outputs,
show_progress=True
)
# ์ „์„ธ๊ณ„ ํƒญ ์ด๋ฒคํŠธ
global_search_outputs = [status_message_global, translated_query_display_global]
for comp in global_article_components:
global_search_outputs.extend([
comp['group'], comp['title'], comp['image'],
comp['snippet'], comp['info']
])
global_search_outputs.append(articles_state_global)
search_button_global.click(
fn=search_global,
inputs=[query_global, region_select, articles_state_global],
outputs=global_search_outputs,
show_progress=True
)
# AI ๋ฒˆ์—ญ ํƒญ ์ด๋ฒคํŠธ
translate_button.click(
fn=respond,
inputs=[
url_input,
chatbot,
system_message,
max_tokens,
temperature,
top_p,
],
outputs=chatbot
)
# ๊ณ„์† ์ž‘์„ฑ ๋ฒ„ํŠผ ์ด๋ฒคํŠธ
continue_button.click(
fn=continue_writing,
inputs=[
chatbot,
system_message,
max_tokens,
temperature,
top_p,
],
outputs=chatbot
)
iface.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
auth=("gini","pick"),
ssl_verify=False,
show_error=True
)