MoneyRadar / app.py
seawolf2357's picture
Update app.py
8aede25 verified
raw
history blame
8.36 kB
import gradio as gr
import requests
import json
import os
from datetime import datetime, timedelta
from bs4 import BeautifulSoup # μ›Ή νŽ˜μ΄μ§€μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜κΈ° μœ„ν•΄ μ‚¬μš©
from huggingface_hub import InferenceClient # LLM μ‚¬μš©μ„ μœ„ν•΄ ν•„μš”
# ν•„μš”ν•œ νŒ¨ν‚€μ§€ μ„€μΉ˜ (ν•„μš”ν•œ 경우 주석을 μ œκ±°ν•˜κ³  μ‹€ν–‰)
# !pip install bs4 huggingface_hub
# ν™˜κ²½ λ³€μˆ˜μ—μ„œ API ν‚€ κ°€μ Έμ˜€κΈ° (API ν‚€λŠ” μ•ˆμ „ν•˜κ²Œ κ΄€λ¦¬λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€)
API_KEY = os.getenv("SERPHOUSE_API_KEY") # 본인의 SerpHouse API ν‚€λ₯Ό ν™˜κ²½ λ³€μˆ˜λ‘œ μ„€μ •ν•˜μ„Έμš”.
HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API 토큰을 ν™˜κ²½ λ³€μˆ˜λ‘œ μ„€μ •ν•˜μ„Έμš”.
MAJOR_COUNTRIES = [
"United States", "United Kingdom", "Canada", "Australia", "Germany",
"France", "Japan", "South Korea", "China", "India",
"Brazil", "Mexico", "Russia", "Italy", "Spain",
"Netherlands", "Sweden", "Switzerland", "Norway", "Denmark",
"Finland", "Belgium", "Austria", "New Zealand", "Ireland",
"Singapore", "Hong Kong", "Israel", "United Arab Emirates", "Saudi Arabia",
"South Africa", "Turkey", "Egypt", "Poland", "Czech Republic",
"Hungary", "Greece", "Portugal", "Argentina", "Chile",
"Colombia", "Peru", "Venezuela", "Thailand", "Malaysia",
"Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
]
def search_serphouse(query, country, page=1, num_result=100):
url = "https://api.serphouse.com/serp/live"
now = datetime.utcnow()
yesterday = now - timedelta(days=1)
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
payload = {
"data": {
"q": query,
"domain": "google.com",
"loc": country,
"lang": "en",
"device": "desktop",
"serp_type": "news",
"page": str(page),
"verbatim": "1",
"num": str(num_result),
"date_range": date_range
}
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {API_KEY}"
}
try:
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
error_msg = f"Error: {str(e)}"
if response.text:
error_msg += f"\nResponse content: {response.text}"
return {"error": error_msg}
def format_results_from_raw(results):
try:
if isinstance(results, dict) and "error" in results:
return "Error: " + results["error"], []
if not isinstance(results, dict):
raise ValueError("κ²°κ³Όκ°€ 사전 ν˜•μ‹μ΄ μ•„λ‹™λ‹ˆλ‹€.")
# 'results' ν‚€ λ‚΄λΆ€μ˜ ꡬ쑰 확인 (μ€‘μ²©λœ 'results' 처리)
if 'results' in results:
results_content = results['results']
if 'results' in results_content:
results_content = results_content['results']
# 'news' ν‚€ 확인
if 'news' in results_content:
news_results = results_content['news']
else:
news_results = []
else:
news_results = []
else:
news_results = []
if not news_results:
return "검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.", []
articles = []
for idx, result in enumerate(news_results, 1):
title = result.get("title", "제λͺ© μ—†μŒ")
link = result.get("url", result.get("link", "#"))
snippet = result.get("snippet", "λ‚΄μš© μ—†μŒ")
channel = result.get("channel", result.get("source", "μ•Œ 수 μ—†μŒ"))
time = result.get("time", result.get("date", "μ•Œ 수 μ—†λŠ” μ‹œκ°„"))
image_url = result.get("img", result.get("thumbnail", ""))
articles.append({
"index": idx,
"title": title,
"link": link,
"snippet": snippet,
"channel": channel,
"time": time,
"image_url": image_url
})
return "", articles
except Exception as e:
error_message = f"κ²°κ³Ό 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
return "Error: " + error_message, []
def serphouse_search(query, country):
# νŽ˜μ΄μ§€μ™€ κ²°κ³Ό 수의 기본값을 μ„€μ •ν•©λ‹ˆλ‹€.
page = 1
num_result = 100
results = search_serphouse(query, country, page, num_result)
error_message, articles = format_results_from_raw(results)
return error_message, articles
# LLM μ„€μ •
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
def summarize_article(url):
try:
# μ›Ή νŽ˜μ΄μ§€μ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# λͺ¨λ“  ν…μŠ€νŠΈλ₯Ό μΆ”μΆœ (κ°„λ‹¨ν•œ μ˜ˆμ‹œ)
text = ' '.join([p.get_text() for p in soup.find_all('p')])
if not text.strip():
return "기사 λ‚΄μš©μ„ κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
# μš”μ•½ 생성
prompt = f"λ‹€μŒ μ˜μ–΄ 기사λ₯Ό ν•œκ΅­μ–΄λ‘œ 3λ¬Έμž₯으둜 μš”μ•½ν•˜μ„Έμš”:\n{text}"
summary = hf_client.text_generation(prompt, max_new_tokens=500)
return summary
except Exception as e:
return f"μš”μ•½ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
css = """
footer {
visibility: hidden;
}
"""
# Gradio μΈν„°νŽ˜μ΄μŠ€ ꡬ성
with gr.Blocks(css=css, title="NewsAI μ„œλΉ„μŠ€") as iface:
gr.Markdown("검색어λ₯Ό μž…λ ₯ν•˜κ³  μ›ν•˜λŠ” κ΅­κ°€λ₯Ό μ„ νƒν•˜λ©΄, 검색어와 μΌμΉ˜ν•˜λŠ” 24μ‹œκ°„ 이내 λ‰΄μŠ€λ₯Ό μ΅œλŒ€ 100개 좜λ ₯ν•©λ‹ˆλ‹€.")
with gr.Column():
with gr.Row():
query = gr.Textbox(label="검색어")
country = gr.Dropdown(MAJOR_COUNTRIES, label="κ΅­κ°€", value="South Korea")
search_button = gr.Button("검색")
output_table = gr.HTML()
summary_output = gr.Markdown(visible=False)
def search_and_display(query, country):
error_message, articles = serphouse_search(query, country)
if error_message:
return f"<p>{error_message}</p>", gr.update(visible=False)
else:
# 기사 λͺ©λ‘μ„ HTML ν…Œμ΄λΈ”λ‘œ 생성
table_html = "<table border='1' style='width:100%; text-align:left;'><tr><th>번호</th><th>제λͺ©</th><th>좜처</th><th>μ‹œκ°„</th><th>뢄석</th></tr>"
for article in articles:
# 각 기사에 λŒ€ν•΄ λ²„νŠΌμ— ν•΄λ‹Ήν•˜λŠ” JavaScript μ½”λ“œλ₯Ό μ‚½μž…
analyze_button = f"""<button onclick="analyzeArticle('{article['link']}')">뢄석</button>"""
row = f"""
<tr>
<td>{article['index']}</td>
<td><a href="{article['link']}" target="_blank">{article['title']}</a></td>
<td>{article['channel']}</td>
<td>{article['time']}</td>
<td>{analyze_button}</td>
</tr>
"""
table_html += row
table_html += "</table>"
# JavaScript ν•¨μˆ˜ μ •μ˜
js_code = """
<script>
function analyzeArticle(url) {
// Gradio의 handle_function을 μ‚¬μš©ν•˜μ—¬ Python ν•¨μˆ˜ 호좜
gradioApp().querySelector('#article_url_input textarea').value = url;
gradioApp().querySelector('#analyze_button').click();
}
</script>
"""
full_html = table_html + js_code
return full_html, gr.update(visible=True, value="") # summary_output μ΄ˆκΈ°ν™”
def analyze_article(url):
summary = summarize_article(url)
return summary
article_url_input = gr.Textbox(visible=False, elem_id="article_url_input")
analyze_button = gr.Button("뢄석", visible=False, elem_id="analyze_button")
search_button.click(
search_and_display,
inputs=[query, country],
outputs=[output_table, summary_output]
)
analyze_button.click(
analyze_article,
inputs=[article_url_input],
outputs=[summary_output]
)
iface.launch(auth=("gini", "pick"))