Spaces:
openfree
/
Running on CPU Upgrade

ginipick commited on
Commit
a8fb1c5
·
verified ·
1 Parent(s): 7796733

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1011 -0
app.py ADDED
@@ -0,0 +1,1011 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ import os
5
+ from datetime import datetime, timedelta
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from functools import lru_cache
8
+ from requests.adapters import HTTPAdapter
9
+ from requests.packages.urllib3.util.retry import Retry
10
+ from openai import OpenAI
11
+ from bs4 import BeautifulSoup
12
+ import re
13
+ import pathlib
14
+ import sqlite3
15
+ import pytz
16
+
17
+ # List of target companies/keywords
18
+ KOREAN_COMPANIES = [
19
+ "NVIDIA",
20
+ "ALPHABET",
21
+ "APPLE",
22
+ "TESLA",
23
+ "AMAZON",
24
+ "MICROSOFT",
25
+ "META",
26
+ "INTEL",
27
+ "SAMSUNG",
28
+ "HYNIX",
29
+ "BITCOIN",
30
+ "crypto",
31
+ "stock",
32
+ "Economics",
33
+ "Finance",
34
+ "investing"
35
+ ]
36
+
37
+ def convert_to_seoul_time(timestamp_str):
38
+ """
39
+ Convert a given timestamp string (UTC) to Seoul time (KST).
40
+ """
41
+ try:
42
+ dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
43
+ seoul_tz = pytz.timezone('Asia/Seoul')
44
+ seoul_time = seoul_tz.localize(dt)
45
+ return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST')
46
+ except Exception as e:
47
+ print(f"Time conversion error: {str(e)}")
48
+ return timestamp_str
49
+
50
+ def analyze_sentiment_batch(articles, client):
51
+ """
52
+ Perform a comprehensive sentiment analysis of the news articles using the OpenAI API.
53
+ """
54
+ try:
55
+ # Combine all articles into a single text
56
+ combined_text = "\n\n".join([
57
+ f"Title: {article.get('title', '')}\nContent: {article.get('snippet', '')}"
58
+ for article in articles
59
+ ])
60
+
61
+ prompt = f"""Please perform an overall sentiment analysis of the following collection of news articles:
62
+
63
+ News content:
64
+ {combined_text}
65
+
66
+ Please follow this format:
67
+ 1. Overall Sentiment: [Positive/Negative/Neutral]
68
+ 2. Key Positive Factors:
69
+ - [Item1]
70
+ - [Item2]
71
+ 3. Key Negative Factors:
72
+ - [Item1]
73
+ - [Item2]
74
+ 4. Summary: [Detailed explanation]
75
+ """
76
+
77
+ response = client.chat.completions.create(
78
+ model="CohereForAI/c4ai-command-r-plus-08-2024",
79
+ messages=[{"role": "user", "content": prompt}],
80
+ temperature=0.3,
81
+ max_tokens=1000
82
+ )
83
+
84
+ return response.choices[0].message.content
85
+ except Exception as e:
86
+ return f"Sentiment analysis failed: {str(e)}"
87
+
88
+
89
+ # Initialize the database
90
+ def init_db():
91
+ """
92
+ Initialize the SQLite database (search_results.db) if it doesn't already exist.
93
+ """
94
+ db_path = pathlib.Path("search_results.db")
95
+ conn = sqlite3.connect(db_path)
96
+ c = conn.cursor()
97
+ c.execute('''CREATE TABLE IF NOT EXISTS searches
98
+ (id INTEGER PRIMARY KEY AUTOINCREMENT,
99
+ keyword TEXT,
100
+ country TEXT,
101
+ results TEXT,
102
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''')
103
+ conn.commit()
104
+ conn.close()
105
+
106
+ def save_to_db(keyword, country, results):
107
+ """
108
+ Save the search results for a specific (keyword, country) combination into the database.
109
+ """
110
+ conn = sqlite3.connect("search_results.db")
111
+ c = conn.cursor()
112
+ seoul_tz = pytz.timezone('Asia/Seoul')
113
+ now = datetime.now(seoul_tz)
114
+ timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
115
+
116
+ c.execute("""INSERT INTO searches
117
+ (keyword, country, results, timestamp)
118
+ VALUES (?, ?, ?, ?)""",
119
+ (keyword, country, json.dumps(results), timestamp))
120
+ conn.commit()
121
+ conn.close()
122
+
123
+ def load_from_db(keyword, country):
124
+ """
125
+ Load the most recent search results for a specific (keyword, country) combination from the database.
126
+ Returns the data and the timestamp.
127
+ """
128
+ conn = sqlite3.connect("search_results.db")
129
+ c = conn.cursor()
130
+ c.execute(
131
+ "SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1",
132
+ (keyword, country)
133
+ )
134
+ result = c.fetchone()
135
+ conn.close()
136
+ if result:
137
+ return json.loads(result[0]), convert_to_seoul_time(result[1])
138
+ return None, None
139
+
140
+ def display_results(articles):
141
+ """
142
+ Convert a list of news articles into a Markdown string for display.
143
+ """
144
+ output = ""
145
+ for idx, article in enumerate(articles, 1):
146
+ output += f"### {idx}. {article['title']}\n"
147
+ output += f"Source: {article['channel']}\n"
148
+ output += f"Time: {article['time']}\n"
149
+ output += f"Link: {article['link']}\n"
150
+ output += f"Summary: {article['snippet']}\n\n"
151
+ return output
152
+
153
+
154
+ ########################################
155
+ # 1) Search => Articles + Analysis, then save to DB
156
+ ########################################
157
+ def search_company(company):
158
+ """
159
+ For a single company (or keyword), search US news.
160
+ 1) Retrieve a list of articles
161
+ 2) Perform sentiment analysis
162
+ 3) Save results to DB
163
+ 4) Return (articles + analysis) in a single output.
164
+ """
165
+ error_message, articles = serphouse_search(company, "United States")
166
+ if not error_message and articles:
167
+ # Perform sentiment analysis
168
+ analysis = analyze_sentiment_batch(articles, client)
169
+
170
+ # Prepare data to save in DB
171
+ store_dict = {
172
+ "articles": articles,
173
+ "analysis": analysis
174
+ }
175
+ save_to_db(company, "United States", store_dict)
176
+
177
+ # Prepare output for display
178
+ output = display_results(articles)
179
+ output += f"\n\n### Analysis Report\n{analysis}\n"
180
+ return output
181
+ return f"No search results found for {company}."
182
+
183
+ ########################################
184
+ # 2) Load => Return articles + analysis from DB
185
+ ########################################
186
+ def load_company(company):
187
+ """
188
+ Load the most recent US news search results for the given company (or keyword) from the database,
189
+ and return the articles + analysis in a single output.
190
+ """
191
+ data, timestamp = load_from_db(company, "United States")
192
+ if data:
193
+ articles = data.get("articles", [])
194
+ analysis = data.get("analysis", "")
195
+
196
+ output = f"### {company} Search Results\nLast Updated: {timestamp}\n\n"
197
+ output += display_results(articles)
198
+ output += f"\n\n### Analysis Report\n{analysis}\n"
199
+ return output
200
+ return f"No saved results for {company}."
201
+
202
+
203
+ ########################################
204
+ # 3) Updated show_stats() with new title
205
+ ########################################
206
+ def show_stats():
207
+ """
208
+ For each company in KOREAN_COMPANIES:
209
+ - Retrieve the most recent timestamp in DB
210
+ - Number of articles
211
+ - Sentiment analysis result
212
+ Return these in a report format.
213
+
214
+ Title changed to: "EarnBOT Analysis Report"
215
+ """
216
+ conn = sqlite3.connect("search_results.db")
217
+ c = conn.cursor()
218
+
219
+ output = "## EarnBOT Analysis Report\n\n"
220
+
221
+ data_list = []
222
+ for company in KOREAN_COMPANIES:
223
+ c.execute("""
224
+ SELECT results, timestamp
225
+ FROM searches
226
+ WHERE keyword = ?
227
+ ORDER BY timestamp DESC
228
+ LIMIT 1
229
+ """, (company,))
230
+
231
+ row = c.fetchone()
232
+ if row:
233
+ results_json, timestamp = row
234
+ data_list.append((company, timestamp, results_json))
235
+
236
+ conn.close()
237
+
238
+ def analyze_data(item):
239
+ comp, tstamp, results_json = item
240
+ data = json.loads(results_json)
241
+ articles = data.get("articles", [])
242
+ analysis = data.get("analysis", "")
243
+
244
+ count_articles = len(articles)
245
+ return (comp, tstamp, count_articles, analysis)
246
+
247
+ results_list = []
248
+ with ThreadPoolExecutor(max_workers=5) as executor:
249
+ futures = [executor.submit(analyze_data, dl) for dl in data_list]
250
+ for future in as_completed(futures):
251
+ results_list.append(future.result())
252
+
253
+ for comp, tstamp, count, analysis in results_list:
254
+ seoul_time = convert_to_seoul_time(tstamp)
255
+ output += f"### {comp}\n"
256
+ output += f"- Last updated: {seoul_time}\n"
257
+ output += f"- Number of articles stored: {count}\n\n"
258
+ if analysis:
259
+ output += "#### News Sentiment Analysis\n"
260
+ output += f"{analysis}\n\n"
261
+ output += "---\n\n"
262
+
263
+ return output
264
+
265
+
266
+ def search_all_companies():
267
+ """
268
+ Search all companies in KOREAN_COMPANIES (in parallel),
269
+ perform sentiment analysis + save to DB => return Markdown of all results.
270
+ """
271
+ overall_result = "# [Search Results for All Companies]\n\n"
272
+
273
+ def do_search(comp):
274
+ return comp, search_company(comp)
275
+
276
+ with ThreadPoolExecutor(max_workers=5) as executor:
277
+ futures = [executor.submit(do_search, c) for c in KOREAN_COMPANIES]
278
+ for future in as_completed(futures):
279
+ comp, res_text = future.result()
280
+ overall_result += f"## {comp}\n"
281
+ overall_result += res_text + "\n\n"
282
+
283
+ return overall_result
284
+
285
+ def load_all_companies():
286
+ """
287
+ Load articles + analysis for all companies in KOREAN_COMPANIES from the DB => return Markdown.
288
+ """
289
+ overall_result = "# [All Companies Data Output]\n\n"
290
+
291
+ for comp in KOREAN_COMPANIES:
292
+ overall_result += f"## {comp}\n"
293
+ overall_result += load_company(comp)
294
+ overall_result += "\n"
295
+ return overall_result
296
+
297
+ def full_summary_report():
298
+ """
299
+ 1) Search all companies (in parallel) -> 2) Load results -> 3) Show sentiment analysis stats
300
+ Return a combined report with all three steps.
301
+ """
302
+ # 1) Search all companies => store to DB
303
+ search_result_text = search_all_companies()
304
+
305
+ # 2) Load all results => from DB
306
+ load_result_text = load_all_companies()
307
+
308
+ # 3) Show stats => EarnBOT Analysis Report
309
+ stats_text = show_stats()
310
+
311
+ combined_report = (
312
+ "# Full Analysis Summary Report\n\n"
313
+ "Executed in the following order:\n"
314
+ "1. Search all companies (parallel) + sentiment analysis => 2. Load results from DB => 3. Show overall sentiment analysis stats\n\n"
315
+ f"{search_result_text}\n\n"
316
+ f"{load_result_text}\n\n"
317
+ "## [Overall Sentiment Analysis Stats]\n\n"
318
+ f"{stats_text}"
319
+ )
320
+ return combined_report
321
+
322
+
323
+ ########################################
324
+ # Additional feature: User custom search
325
+ ########################################
326
+ def search_custom(query, country):
327
+ """
328
+ For a user-provided (query, country):
329
+ 1) Search + sentiment analysis => save to DB
330
+ 2) Load from DB => display articles + analysis
331
+ """
332
+ error_message, articles = serphouse_search(query, country)
333
+ if error_message:
334
+ return f"An error occurred: {error_message}"
335
+ if not articles:
336
+ return "No results were found for your query."
337
+
338
+ # 1) Perform analysis
339
+ analysis = analyze_sentiment_batch(articles, client)
340
+
341
+ # 2) Save to DB
342
+ save_data = {
343
+ "articles": articles,
344
+ "analysis": analysis
345
+ }
346
+ save_to_db(query, country, save_data)
347
+
348
+ # 3) Reload from DB
349
+ loaded_data, timestamp = load_from_db(query, country)
350
+ if not loaded_data:
351
+ return "Failed to load data from DB."
352
+
353
+ # 4) Prepare final output
354
+ out = f"## [Custom Search Results]\n\n"
355
+ out += f"**Keyword**: {query}\n\n"
356
+ out += f"**Country**: {country}\n\n"
357
+ out += f"**Timestamp**: {timestamp}\n\n"
358
+
359
+ arts = loaded_data.get("articles", [])
360
+ analy = loaded_data.get("analysis", "")
361
+
362
+ out += display_results(arts)
363
+ out += f"### News Sentiment Analysis\n{analy}\n"
364
+
365
+ return out
366
+
367
+
368
+ ########################################
369
+ # API Authentication
370
+ ########################################
371
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
372
+ if not ACCESS_TOKEN:
373
+ raise ValueError("HF_TOKEN environment variable is not set")
374
+
375
+ client = OpenAI(
376
+ base_url="https://api-inference.huggingface.co/v1/",
377
+ api_key=ACCESS_TOKEN,
378
+ )
379
+
380
+ API_KEY = os.getenv("SERPHOUSE_API_KEY")
381
+
382
+
383
+ ########################################
384
+ # Country-specific settings
385
+ ########################################
386
+ COUNTRY_LANGUAGES = {
387
+ "United States": "en",
388
+ "KOREA": "ko",
389
+ "United Kingdom": "en",
390
+ "Taiwan": "zh-TW",
391
+ "Canada": "en",
392
+ "Australia": "en",
393
+ "Germany": "de",
394
+ "France": "fr",
395
+ "Japan": "ja",
396
+ "China": "zh",
397
+ "India": "hi",
398
+ "Brazil": "pt",
399
+ "Mexico": "es",
400
+ "Russia": "ru",
401
+ "Italy": "it",
402
+ "Spain": "es",
403
+ "Netherlands": "nl",
404
+ "Singapore": "en",
405
+ "Hong Kong": "zh-HK",
406
+ "Indonesia": "id",
407
+ "Malaysia": "ms",
408
+ "Philippines": "tl",
409
+ "Thailand": "th",
410
+ "Vietnam": "vi",
411
+ "Belgium": "nl",
412
+ "Denmark": "da",
413
+ "Finland": "fi",
414
+ "Ireland": "en",
415
+ "Norway": "no",
416
+ "Poland": "pl",
417
+ "Sweden": "sv",
418
+ "Switzerland": "de",
419
+ "Austria": "de",
420
+ "Czech Republic": "cs",
421
+ "Greece": "el",
422
+ "Hungary": "hu",
423
+ "Portugal": "pt",
424
+ "Romania": "ro",
425
+ "Turkey": "tr",
426
+ "Israel": "he",
427
+ "Saudi Arabia": "ar",
428
+ "United Arab Emirates": "ar",
429
+ "South Africa": "en",
430
+ "Argentina": "es",
431
+ "Chile": "es",
432
+ "Colombia": "es",
433
+ "Peru": "es",
434
+ "Venezuela": "es",
435
+ "New Zealand": "en",
436
+ "Bangladesh": "bn",
437
+ "Pakistan": "ur",
438
+ "Egypt": "ar",
439
+ "Morocco": "ar",
440
+ "Nigeria": "en",
441
+ "Kenya": "sw",
442
+ "Ukraine": "uk",
443
+ "Croatia": "hr",
444
+ "Slovakia": "sk",
445
+ "Bulgaria": "bg",
446
+ "Serbia": "sr",
447
+ "Estonia": "et",
448
+ "Latvia": "lv",
449
+ "Lithuania": "lt",
450
+ "Slovenia": "sl",
451
+ "Luxembourg": "Luxembourg",
452
+ "Malta": "Malta",
453
+ "Cyprus": "Cyprus",
454
+ "Iceland": "Iceland"
455
+ }
456
+
457
+ COUNTRY_LOCATIONS = {
458
+ "United States": "United States",
459
+ "KOREA": "kr",
460
+ "United Kingdom": "United Kingdom",
461
+ "Taiwan": "Taiwan",
462
+ "Canada": "Canada",
463
+ "Australia": "Australia",
464
+ "Germany": "Germany",
465
+ "France": "France",
466
+ "Japan": "Japan",
467
+ "China": "China",
468
+ "India": "India",
469
+ "Brazil": "Brazil",
470
+ "Mexico": "Mexico",
471
+ "Russia": "Russia",
472
+ "Italy": "Italy",
473
+ "Spain": "Spain",
474
+ "Netherlands": "Netherlands",
475
+ "Singapore": "Singapore",
476
+ "Hong Kong": "Hong Kong",
477
+ "Indonesia": "Indonesia",
478
+ "Malaysia": "Malaysia",
479
+ "Philippines": "Philippines",
480
+ "Thailand": "Thailand",
481
+ "Vietnam": "Vietnam",
482
+ "Belgium": "Belgium",
483
+ "Denmark": "Denmark",
484
+ "Finland": "Finland",
485
+ "Ireland": "Ireland",
486
+ "Norway": "Norway",
487
+ "Poland": "Poland",
488
+ "Sweden": "Sweden",
489
+ "Switzerland": "Switzerland",
490
+ "Austria": "Austria",
491
+ "Czech Republic": "Czech Republic",
492
+ "Greece": "Greece",
493
+ "Hungary": "Hungary",
494
+ "Portugal": "Portugal",
495
+ "Romania": "Romania",
496
+ "Turkey": "Turkey",
497
+ "Israel": "Israel",
498
+ "Saudi Arabia": "Saudi Arabia",
499
+ "United Arab Emirates": "United Arab Emirates",
500
+ "South Africa": "South Africa",
501
+ "Argentina": "Argentina",
502
+ "Chile": "Chile",
503
+ "Colombia": "Colombia",
504
+ "Peru": "Peru",
505
+ "Venezuela": "Venezuela",
506
+ "New Zealand": "New Zealand",
507
+ "Bangladesh": "Bangladesh",
508
+ "Pakistan": "Pakistan",
509
+ "Egypt": "Egypt",
510
+ "Morocco": "Morocco",
511
+ "Nigeria": "Nigeria",
512
+ "Kenya": "Kenya",
513
+ "Ukraine": "Ukraine",
514
+ "Croatia": "Croatia",
515
+ "Slovakia": "Slovakia",
516
+ "Bulgaria": "Bulgaria",
517
+ "Serbia": "Serbia",
518
+ "Estonia": "et",
519
+ "Latvia": "lv",
520
+ "Lithuania": "lt",
521
+ "Slovenia": "sl",
522
+ "Luxembourg": "Luxembourg",
523
+ "Malta": "Malta",
524
+ "Cyprus": "Cyprus",
525
+ "Iceland": "Iceland"
526
+ }
527
+
528
+
529
+ @lru_cache(maxsize=100)
530
+ def translate_query(query, country):
531
+ """
532
+ Use the unofficial Google Translation API to translate the query into the target country's language.
533
+ If the query is already in English, or if translation fails, return the original query.
534
+ """
535
+ try:
536
+ if is_english(query):
537
+ return query
538
+
539
+ if country in COUNTRY_LANGUAGES:
540
+ if country == "South Korea":
541
+ return query
542
+ target_lang = COUNTRY_LANGUAGES[country]
543
+
544
+ url = "https://translate.googleapis.com/translate_a/single"
545
+ params = {
546
+ "client": "gtx",
547
+ "sl": "auto",
548
+ "tl": target_lang,
549
+ "dt": "t",
550
+ "q": query
551
+ }
552
+
553
+ session = requests.Session()
554
+ retries = Retry(total=3, backoff_factor=0.5)
555
+ session.mount('https://', HTTPAdapter(max_retries=retries))
556
+
557
+ response = session.get(url, params=params, timeout=(5, 10))
558
+ translated_text = response.json()[0][0][0]
559
+ return translated_text
560
+ return query
561
+
562
+ except Exception as e:
563
+ print(f"Translation error: {str(e)}")
564
+ return query
565
+
566
+ def is_english(text):
567
+ """
568
+ Check if a string is (mostly) English by verifying character code ranges.
569
+ """
570
+ return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
571
+
572
+ def search_serphouse(query, country, page=1, num_result=10):
573
+ """
574
+ Send a real-time search request to the SerpHouse API,
575
+ specifying the 'news' tab (sort_by=date) for the given query.
576
+ Returns a dict with 'results' or 'error'.
577
+ """
578
+ url = "https://api.serphouse.com/serp/live"
579
+
580
+ now = datetime.utcnow()
581
+ yesterday = now - timedelta(days=1)
582
+ date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
583
+
584
+ translated_query = translate_query(query, country)
585
+
586
+ payload = {
587
+ "data": {
588
+ "q": translated_query,
589
+ "domain": "google.com",
590
+ "loc": COUNTRY_LOCATIONS.get(country, "United States"),
591
+ "lang": COUNTRY_LANGUAGES.get(country, "en"),
592
+ "device": "desktop",
593
+ "serp_type": "news",
594
+ "page": str(page),
595
+ "num": "100",
596
+ "date_range": date_range,
597
+ "sort_by": "date"
598
+ }
599
+ }
600
+
601
+ headers = {
602
+ "accept": "application/json",
603
+ "content-type": "application/json",
604
+ "authorization": f"Bearer {API_KEY}"
605
+ }
606
+
607
+ try:
608
+ session = requests.Session()
609
+
610
+ retries = Retry(
611
+ total=5,
612
+ backoff_factor=1,
613
+ status_forcelist=[500, 502, 503, 504, 429],
614
+ allowed_methods=["POST"]
615
+ )
616
+
617
+ adapter = HTTPAdapter(max_retries=retries)
618
+ session.mount('http://', adapter)
619
+ session.mount('https://', adapter)
620
+
621
+ response = session.post(
622
+ url,
623
+ json=payload,
624
+ headers=headers,
625
+ timeout=(30, 30)
626
+ )
627
+
628
+ response.raise_for_status()
629
+ return {"results": response.json(), "translated_query": translated_query}
630
+
631
+ except requests.exceptions.Timeout:
632
+ return {
633
+ "error": "Search timed out. Please try again later.",
634
+ "translated_query": query
635
+ }
636
+ except requests.exceptions.RequestException as e:
637
+ return {
638
+ "error": f"Error during search: {str(e)}",
639
+ "translated_query": query
640
+ }
641
+ except Exception as e:
642
+ return {
643
+ "error": f"Unexpected error occurred: {str(e)}",
644
+ "translated_query": query
645
+ }
646
+
647
+ def format_results_from_raw(response_data):
648
+ """
649
+ Process the SerpHouse API response data and return (error_message, article_list).
650
+ """
651
+ if "error" in response_data:
652
+ return "Error: " + response_data["error"], []
653
+
654
+ try:
655
+ results = response_data["results"]
656
+ translated_query = response_data["translated_query"]
657
+
658
+ news_results = results.get('results', {}).get('results', {}).get('news', [])
659
+ if not news_results:
660
+ return "No search results found.", []
661
+
662
+ # Filter out Korean domains and Korean keywords (example filtering)
663
+ korean_domains = [
664
+ '.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun',
665
+ 'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'
666
+ ]
667
+ korean_keywords = [
668
+ 'korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu',
669
+ 'gwangju', 'daejeon', 'ulsan', 'sejong'
670
+ ]
671
+
672
+ filtered_articles = []
673
+ for idx, result in enumerate(news_results, 1):
674
+ url = result.get("url", result.get("link", "")).lower()
675
+ title = result.get("title", "").lower()
676
+ channel = result.get("channel", result.get("source", "")).lower()
677
+
678
+ is_korean_content = (
679
+ any(domain in url or domain in channel for domain in korean_domains) or
680
+ any(keyword in title for keyword in korean_keywords)
681
+ )
682
+
683
+ # Exclude Korean content
684
+ if not is_korean_content:
685
+ filtered_articles.append({
686
+ "index": idx,
687
+ "title": result.get("title", "No Title"),
688
+ "link": url,
689
+ "snippet": result.get("snippet", "No Content"),
690
+ "channel": result.get("channel", result.get("source", "Unknown")),
691
+ "time": result.get("time", result.get("date", "Unknown Time")),
692
+ "image_url": result.get("img", result.get("thumbnail", "")),
693
+ "translated_query": translated_query
694
+ })
695
+
696
+ return "", filtered_articles
697
+ except Exception as e:
698
+ return f"Error processing results: {str(e)}", []
699
+
700
+ def serphouse_search(query, country):
701
+ """
702
+ Helper function to search and then format results.
703
+ Returns (error_message, article_list).
704
+ """
705
+ response_data = search_serphouse(query, country)
706
+ return format_results_from_raw(response_data)
707
+
708
+
709
+ # Refined, modern, and sleek custom CSS
710
+ css = """
711
+ body {
712
+ background: linear-gradient(to bottom right, #f9fafb, #ffffff);
713
+ font-family: 'Arial', sans-serif;
714
+ }
715
+
716
+ /* Hide default Gradio footer */
717
+ footer {
718
+ visibility: hidden;
719
+ }
720
+
721
+ /* Header/Status area */
722
+ #status_area {
723
+ background: rgba(255, 255, 255, 0.9);
724
+ padding: 15px;
725
+ border-bottom: 1px solid #ddd;
726
+ margin-bottom: 20px;
727
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1);
728
+ }
729
+
730
+ /* Results area */
731
+ #results_area {
732
+ padding: 10px;
733
+ margin-top: 10px;
734
+ }
735
+
736
+ /* Tabs style */
737
+ .tabs {
738
+ border-bottom: 2px solid #ddd !important;
739
+ margin-bottom: 20px !important;
740
+ }
741
+
742
+ .tab-nav {
743
+ border-bottom: none !important;
744
+ margin-bottom: 0 !important;
745
+ }
746
+
747
+ .tab-nav button {
748
+ font-weight: bold !important;
749
+ padding: 10px 20px !important;
750
+ background-color: #f0f0f0 !important;
751
+ border: 1px solid #ccc !important;
752
+ border-radius: 5px !important;
753
+ margin-right: 5px !important;
754
+ }
755
+
756
+ .tab-nav button.selected {
757
+ border-bottom: 2px solid #1f77b4 !important;
758
+ background-color: #e6f2fa !important;
759
+ color: #1f77b4 !important;
760
+ }
761
+
762
+ /* Status message styling */
763
+ #status_area .markdown-text {
764
+ font-size: 1.1em;
765
+ color: #2c3e50;
766
+ padding: 10px 0;
767
+ }
768
+
769
+ /* Main container grouping */
770
+ .group {
771
+ border: 1px solid #eee;
772
+ padding: 15px;
773
+ margin-bottom: 15px;
774
+ border-radius: 5px;
775
+ background: white;
776
+ transition: all 0.3s ease;
777
+ opacity: 0;
778
+ transform: translateY(20px);
779
+ }
780
+ .group.visible {
781
+ opacity: 1;
782
+ transform: translateY(0);
783
+ }
784
+
785
+ /* Buttons */
786
+ .primary-btn {
787
+ background: #1f77b4 !important;
788
+ border: none !important;
789
+ color: #fff !important;
790
+ border-radius: 5px !important;
791
+ padding: 10px 20px !important;
792
+ cursor: pointer !important;
793
+ }
794
+ .primary-btn:hover {
795
+ background: #155a8c !important;
796
+ }
797
+
798
+ .secondary-btn {
799
+ background: #f0f0f0 !important;
800
+ border: 1px solid #ccc !important;
801
+ color: #333 !important;
802
+ border-radius: 5px !important;
803
+ padding: 10px 20px !important;
804
+ cursor: pointer !important;
805
+ }
806
+ .secondary-btn:hover {
807
+ background: #e0e0e0 !important;
808
+ }
809
+
810
+ /* Input fields */
811
+ .textbox {
812
+ border: 1px solid #ddd !important;
813
+ border-radius: 4px !important;
814
+ }
815
+
816
+ /* Progress bar container */
817
+ .progress-container {
818
+ position: fixed;
819
+ top: 0;
820
+ left: 0;
821
+ width: 100%;
822
+ height: 6px;
823
+ background: #e0e0e0;
824
+ z-index: 1000;
825
+ }
826
+
827
+ /* Progress bar */
828
+ .progress-bar {
829
+ height: 100%;
830
+ background: linear-gradient(90deg, #2196F3, #00BCD4);
831
+ box-shadow: 0 0 10px rgba(33, 150, 243, 0.5);
832
+ transition: width 0.3s ease;
833
+ animation: progress-glow 1.5s ease-in-out infinite;
834
+ }
835
+
836
+ /* Progress text */
837
+ .progress-text {
838
+ position: fixed;
839
+ top: 8px;
840
+ left: 50%;
841
+ transform: translateX(-50%);
842
+ background: #333;
843
+ color: white;
844
+ padding: 4px 12px;
845
+ border-radius: 15px;
846
+ font-size: 14px;
847
+ z-index: 1001;
848
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2);
849
+ }
850
+
851
+ /* Progress bar animation */
852
+ @keyframes progress-glow {
853
+ 0% {
854
+ box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
855
+ }
856
+ 50% {
857
+ box-shadow: 0 0 20px rgba(33, 150, 243, 0.8);
858
+ }
859
+ 100% {
860
+ box-shadow: 0 0 5px rgba(33, 150, 243, 0.5);
861
+ }
862
+ }
863
+
864
+ /* Loading state */
865
+ .loading {
866
+ opacity: 0.7;
867
+ pointer-events: none;
868
+ transition: opacity 0.3s ease;
869
+ }
870
+
871
+ /* Responsive design for smaller screens */
872
+ @media (max-width: 768px) {
873
+ .group {
874
+ padding: 10px;
875
+ margin-bottom: 15px;
876
+ }
877
+
878
+ .progress-text {
879
+ font-size: 12px;
880
+ padding: 3px 10px;
881
+ }
882
+ }
883
+
884
+ /* Example section styling */
885
+ .examples-table {
886
+ margin-top: 10px !important;
887
+ margin-bottom: 20px !important;
888
+ }
889
+
890
+ .examples-table button {
891
+ background-color: #f0f0f0 !important;
892
+ border: 1px solid #ddd !important;
893
+ border-radius: 4px !important;
894
+ padding: 5px 10px !important;
895
+ margin: 2px !important;
896
+ transition: all 0.3s ease !important;
897
+ }
898
+
899
+ .examples-table button:hover {
900
+ background-color: #e0e0e0 !important;
901
+ transform: translateY(-1px) !important;
902
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important;
903
+ }
904
+
905
+ .examples-table .label {
906
+ font-weight: bold !important;
907
+ color: #444 !important;
908
+ margin-bottom: 5px !important;
909
+ }
910
+ """
911
+
912
+ # --- Gradio Interface ---
913
+ with gr.Blocks(css=css, title="NewsAI Service") as iface:
914
+ init_db()
915
+
916
+ with gr.Tabs():
917
+ with gr.Tab("EarnBot"):
918
+ gr.Markdown("## EarnBot: AI-powered Analysis of Global Big Tech Companies and Investment Outlook")
919
+ gr.Markdown(
920
+ " * Click on 'Generate Full Analysis Summary Report' to create a comprehensive automated report.\n"
921
+ " * You can also 'Search (automatically save to DB)' and 'Load from DB (automatically retrieve)' for each listed company.\n"
922
+ " * Additionally, feel free to search/analyze any custom keyword in your chosen country."
923
+ )
924
+
925
+ # User custom search section
926
+ with gr.Group():
927
+ gr.Markdown("### Custom Search")
928
+ with gr.Row():
929
+ with gr.Column():
930
+ user_input = gr.Textbox(
931
+ label="Enter your keyword",
932
+ placeholder="e.g., Apple, Samsung, etc.",
933
+ elem_classes="textbox"
934
+ )
935
+ with gr.Column():
936
+ country_selection = gr.Dropdown(
937
+ choices=list(COUNTRY_LOCATIONS.keys()),
938
+ value="United States",
939
+ label="Select Country"
940
+ )
941
+ with gr.Column():
942
+ custom_search_btn = gr.Button("Search", variant="primary", elem_classes="primary-btn")
943
+
944
+ custom_search_output = gr.Markdown()
945
+
946
+ custom_search_btn.click(
947
+ fn=search_custom,
948
+ inputs=[user_input, country_selection],
949
+ outputs=custom_search_output
950
+ )
951
+
952
+ # Button to generate a full report
953
+ with gr.Row():
954
+ full_report_btn = gr.Button("Generate Full Analysis Summary Report", variant="primary", elem_classes="primary-btn")
955
+ full_report_display = gr.Markdown()
956
+
957
+ full_report_btn.click(
958
+ fn=full_summary_report,
959
+ outputs=full_report_display
960
+ )
961
+
962
+ # Individual search/load for companies in KOREAN_COMPANIES
963
+ with gr.Column():
964
+ for i in range(0, len(KOREAN_COMPANIES), 2):
965
+ with gr.Row():
966
+ # Left column
967
+ with gr.Column():
968
+ company = KOREAN_COMPANIES[i]
969
+ with gr.Group():
970
+ gr.Markdown(f"### {company}")
971
+ with gr.Row():
972
+ search_btn = gr.Button("Search", variant="primary", elem_classes="primary-btn")
973
+ load_btn = gr.Button("Load from DB", variant="secondary", elem_classes="secondary-btn")
974
+ result_display = gr.Markdown()
975
+
976
+ search_btn.click(
977
+ fn=lambda c=company: search_company(c),
978
+ outputs=result_display
979
+ )
980
+ load_btn.click(
981
+ fn=lambda c=company: load_company(c),
982
+ outputs=result_display
983
+ )
984
+
985
+ # Right column (if exists)
986
+ if i + 1 < len(KOREAN_COMPANIES):
987
+ with gr.Column():
988
+ company = KOREAN_COMPANIES[i + 1]
989
+ with gr.Group():
990
+ gr.Markdown(f"### {company}")
991
+ with gr.Row():
992
+ search_btn = gr.Button("Search", variant="primary", elem_classes="primary-btn")
993
+ load_btn = gr.Button("Load from DB", variant="secondary", elem_classes="secondary-btn")
994
+ result_display = gr.Markdown()
995
+
996
+ search_btn.click(
997
+ fn=lambda c=company: search_company(c),
998
+ outputs=result_display
999
+ )
1000
+ load_btn.click(
1001
+ fn=lambda c=company: load_company(c),
1002
+ outputs=result_display
1003
+ )
1004
+
1005
+ iface.launch(
1006
+ server_name="0.0.0.0",
1007
+ server_port=7860,
1008
+ share=True,
1009
+ ssl_verify=False,
1010
+ show_error=True
1011
+ )