seawolf2357 commited on
Commit
b58dbad
ยท
verified ยท
1 Parent(s): b1a1529

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -83
app.py CHANGED
@@ -8,11 +8,89 @@ from huggingface_hub import InferenceClient
8
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
 
11
- # ๊ตญ๊ฐ€๋ณ„ ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  COUNTRY_LANGUAGES = {
13
  "South Korea": "ko",
14
  "Japan": "ja",
15
  "China": "zh",
 
16
  "Russia": "ru",
17
  "France": "fr",
18
  "Germany": "de",
@@ -27,93 +105,46 @@ COUNTRY_LANGUAGES = {
27
  "Saudi Arabia": "ar",
28
  "United Arab Emirates": "ar",
29
  "Egypt": "ar",
30
- "Morocco": "ar"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
 
33
- COUNTRY_LOCATIONS = {
34
- "United States": "United States",
35
- "United Kingdom": "United Kingdom",
36
- "Canada": "Canada",
37
- "Australia": "Australia",
38
- "Germany": "Germany",
39
- "France": "France",
40
- "Japan": "Japan",
41
- "South Korea": "South Korea",
42
- "China": "China",
43
- "India": "India",
44
- "Brazil": "Brazil",
45
- "Mexico": "Mexico",
46
- "Russia": "Russia",
47
- "Italy": "Italy",
48
- "Spain": "Spain",
49
- "Netherlands": "Netherlands",
50
- "Singapore": "Singapore",
51
- "Hong Kong": "Hong Kong",
52
- "Indonesia": "Indonesia",
53
- "Malaysia": "Malaysia",
54
- "Philippines": "Philippines",
55
- "Thailand": "Thailand",
56
- "Vietnam": "Vietnam",
57
- "Belgium": "Belgium",
58
- "Denmark": "Denmark",
59
- "Finland": "Finland",
60
- "Ireland": "Ireland",
61
- "Norway": "Norway",
62
- "Poland": "Poland",
63
- "Sweden": "Sweden",
64
- "Switzerland": "Switzerland",
65
- "Austria": "Austria",
66
- "Czech Republic": "Czech Republic",
67
- "Greece": "Greece",
68
- "Hungary": "Hungary",
69
- "Portugal": "Portugal",
70
- "Romania": "Romania",
71
- "Turkey": "Turkey",
72
- "Israel": "Israel",
73
- "Saudi Arabia": "Saudi Arabia",
74
- "United Arab Emirates": "United Arab Emirates",
75
- "South Africa": "South Africa",
76
- "Argentina": "Argentina",
77
- "Chile": "Chile",
78
- "Colombia": "Colombia",
79
- "Peru": "Peru",
80
- "Venezuela": "Venezuela",
81
- "New Zealand": "New Zealand",
82
- "Bangladesh": "Bangladesh",
83
- "Pakistan": "Pakistan",
84
- "Egypt": "Egypt",
85
- "Morocco": "Morocco",
86
- "Nigeria": "Nigeria",
87
- "Kenya": "Kenya",
88
- "Ukraine": "Ukraine",
89
- "Croatia": "Croatia",
90
- "Slovakia": "Slovakia",
91
- "Bulgaria": "Bulgaria",
92
- "Serbia": "Serbia",
93
- "Estonia": "Estonia",
94
- "Latvia": "Latvia",
95
- "Lithuania": "Lithuania",
96
- "Slovenia": "Slovenia",
97
- "Luxembourg": "Luxembourg",
98
- "Malta": "Malta",
99
- "Cyprus": "Cyprus",
100
- "Iceland": "Iceland"
101
- }
102
-
103
- MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys())
104
-
105
- def is_english(text):
106
- # ์˜์–ด๋กœ๋งŒ ๊ตฌ์„ฑ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ
107
- return all(ord(char) < 128 for char in text.replace(' ', ''))
108
-
109
  def translate_query(query, country):
110
  try:
111
- # ์˜์–ด ์ž…๋ ฅ์ธ ๊ฒฝ์šฐ ๋ฒˆ์—ญํ•˜์ง€ ์•Š๊ณ  ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
112
  if is_english(query):
113
  print(f"English query detected, using original: {query}")
114
  return query[:255]
115
 
116
- # ํ•œ๊ธ€ ์ž…๋ ฅ์ด๊ณ  South Korea๊ฐ€ ์„ ํƒ๋œ ๊ฒฝ์šฐ
117
  if country == "South Korea":
118
  return query[:255]
119
 
@@ -124,7 +155,8 @@ def translate_query(query, country):
124
  # ๋ฒˆ์—ญ ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ 
125
  prompt = f"""Translate this text to {target_lang} language.
126
  For Japanese, use Kanji and Kana.
127
- For Chinese, use Simplified Chinese.
 
128
  For Korean, use Hangul.
129
  Only output the translated text without any explanation.
130
  Text to translate: {query}"""
@@ -145,6 +177,10 @@ def translate_query(query, country):
145
 
146
 
147
 
 
 
 
 
148
  def search_serphouse(query, country, page=1, num_result=10):
149
  url = "https://api.serphouse.com/serp/live"
150
 
@@ -157,12 +193,12 @@ def search_serphouse(query, country, page=1, num_result=10):
157
  "data": {
158
  "q": translated_query,
159
  "domain": "google.com",
160
- "loc": COUNTRY_LOCATIONS.get(country, "United States"),
161
  "lang": COUNTRY_LANGUAGES.get(country, "en"),
162
  "device": "desktop",
163
  "serp_type": "news",
164
  "page": "1",
165
- "num": "10" # num์œผ๋กœ ๋ณ€๊ฒฝ
166
  }
167
  }
168
 
 
8
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
 
11
+
12
+
13
+ COUNTRY_CODES = {
14
+ "United States": "US",
15
+ "United Kingdom": "GB",
16
+ "Canada": "CA",
17
+ "Australia": "AU",
18
+ "Germany": "DE",
19
+ "France": "FR",
20
+ "Japan": "JP",
21
+ "South Korea": "KR",
22
+ "China": "CN",
23
+ "Taiwan": "TW", # ๋Œ€๋งŒ ์ถ”๊ฐ€
24
+ "India": "IN",
25
+ "Brazil": "BR",
26
+ "Mexico": "MX",
27
+ "Russia": "RU",
28
+ "Italy": "IT",
29
+ "Spain": "ES",
30
+ "Netherlands": "NL",
31
+ "Singapore": "SG",
32
+ "Hong Kong": "HK",
33
+ "Indonesia": "ID",
34
+ "Malaysia": "MY",
35
+ "Philippines": "PH",
36
+ "Thailand": "TH",
37
+ "Vietnam": "VN",
38
+ "Belgium": "BE",
39
+ "Denmark": "DK",
40
+ "Finland": "FI",
41
+ "Ireland": "IE",
42
+ "Norway": "NO",
43
+ "Poland": "PL",
44
+ "Sweden": "SE",
45
+ "Switzerland": "CH",
46
+ "Austria": "AT",
47
+ "Czech Republic": "CZ",
48
+ "Greece": "GR",
49
+ "Hungary": "HU",
50
+ "Portugal": "PT",
51
+ "Romania": "RO",
52
+ "Turkey": "TR",
53
+ "Israel": "IL",
54
+ "Saudi Arabia": "SA",
55
+ "United Arab Emirates": "AE",
56
+ "South Africa": "ZA",
57
+ "Argentina": "AR",
58
+ "Chile": "CL",
59
+ "Colombia": "CO",
60
+ "Peru": "PE",
61
+ "Venezuela": "VE",
62
+ "New Zealand": "NZ",
63
+ "Bangladesh": "BD",
64
+ "Pakistan": "PK",
65
+ "Egypt": "EG",
66
+ "Morocco": "MA",
67
+ "Nigeria": "NG",
68
+ "Kenya": "KE",
69
+ "Ukraine": "UA",
70
+ "Croatia": "HR",
71
+ "Slovakia": "SK",
72
+ "Bulgaria": "BG",
73
+ "Serbia": "RS",
74
+ "Estonia": "EE",
75
+ "Latvia": "LV",
76
+ "Lithuania": "LT",
77
+ "Slovenia": "SI",
78
+ "Luxembourg": "LU",
79
+ "Malta": "MT",
80
+ "Cyprus": "CY",
81
+ "Iceland": "IS"
82
+ }
83
+
84
+
85
+ def is_english(text):
86
+ # ์˜์–ด๋กœ๋งŒ ๊ตฌ์„ฑ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ
87
+ return all(ord(char) < 128 for char in text.replace(' ', ''))
88
+
89
  COUNTRY_LANGUAGES = {
90
  "South Korea": "ko",
91
  "Japan": "ja",
92
  "China": "zh",
93
+ "Taiwan": "zh-tw", # ๋Œ€๋งŒ์–ด(๋ฒˆ์ฒด ์ค‘๊ตญ์–ด) ์ถ”๊ฐ€
94
  "Russia": "ru",
95
  "France": "fr",
96
  "Germany": "de",
 
105
  "Saudi Arabia": "ar",
106
  "United Arab Emirates": "ar",
107
  "Egypt": "ar",
108
+ "Morocco": "ar",
109
+ "Greece": "el",
110
+ "Poland": "pl",
111
+ "Czech Republic": "cs",
112
+ "Hungary": "hu",
113
+ "Turkey": "tr",
114
+ "Romania": "ro",
115
+ "Bulgaria": "bg",
116
+ "Croatia": "hr",
117
+ "Serbia": "sr",
118
+ "Slovakia": "sk",
119
+ "Slovenia": "sl",
120
+ "Estonia": "et",
121
+ "Latvia": "lv",
122
+ "Lithuania": "lt",
123
+ "Ukraine": "uk",
124
+ "Israel": "he",
125
+ "Bangladesh": "bn",
126
+ "Pakistan": "ur",
127
+ "Finland": "fi",
128
+ "Denmark": "da",
129
+ "Norway": "no",
130
+ "Sweden": "sv",
131
+ "Iceland": "is",
132
+ "Philippines": "fil",
133
+ "Brazil": "pt-br",
134
+ "Argentina": "es-ar",
135
+ "Chile": "es-cl",
136
+ "Colombia": "es-co",
137
+ "Peru": "es-pe",
138
+ "Venezuela": "es-ve"
139
  }
140
 
141
+ # ๋ฒˆ์—ญ ํ”„๋กฌํ”„ํŠธ ์ˆ˜์ •
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  def translate_query(query, country):
143
  try:
 
144
  if is_english(query):
145
  print(f"English query detected, using original: {query}")
146
  return query[:255]
147
 
 
148
  if country == "South Korea":
149
  return query[:255]
150
 
 
155
  # ๋ฒˆ์—ญ ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ 
156
  prompt = f"""Translate this text to {target_lang} language.
157
  For Japanese, use Kanji and Kana.
158
+ For Chinese (China), use Simplified Chinese.
159
+ For Chinese (Taiwan), use Traditional Chinese.
160
  For Korean, use Hangul.
161
  Only output the translated text without any explanation.
162
  Text to translate: {query}"""
 
177
 
178
 
179
 
180
+ # MAJOR_COUNTRIES ์ •์˜ ์ˆ˜์ •
181
+ MAJOR_COUNTRIES = list(COUNTRY_CODES.keys()) # COUNTRY_LOCATIONS ๋Œ€์‹  COUNTRY_CODES ์‚ฌ์šฉ
182
+
183
+ # search_serphouse ํ•จ์ˆ˜ ์ˆ˜์ •
184
  def search_serphouse(query, country, page=1, num_result=10):
185
  url = "https://api.serphouse.com/serp/live"
186
 
 
193
  "data": {
194
  "q": translated_query,
195
  "domain": "google.com",
196
+ "country_code": COUNTRY_CODES.get(country, "US"), # country_code ์‚ฌ์šฉ
197
  "lang": COUNTRY_LANGUAGES.get(country, "en"),
198
  "device": "desktop",
199
  "serp_type": "news",
200
  "page": "1",
201
+ "num": "10"
202
  }
203
  }
204