ginipick commited on
Commit
92d447e
ยท
verified ยท
1 Parent(s): 3b2a23f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -0
app.py CHANGED
@@ -7,8 +7,17 @@ from concurrent.futures import ThreadPoolExecutor
7
  from functools import lru_cache
8
  from requests.adapters import HTTPAdapter
9
  from requests.packages.urllib3.util.retry import Retry
 
 
10
 
 
 
 
11
 
 
 
 
 
12
 
13
  MAX_COUNTRY_RESULTS = 100 # ๊ตญ๊ฐ€๋ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
14
  MAX_GLOBAL_RESULTS = 1000 # ์ „์„ธ๊ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
@@ -825,6 +834,88 @@ footer {visibility: hidden;}
825
  }
826
  """
827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
828
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
829
  with gr.Tabs():
830
  # ๊ตญ๊ฐ€๋ณ„ ํƒญ
@@ -902,6 +993,66 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
902
  'index': i,
903
  })
904
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
905
  # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ ๋ถ€๋ถ„
906
  # ๊ตญ๊ฐ€๋ณ„ ํƒญ ์ด๋ฒคํŠธ
907
  search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)]
 
7
  from functools import lru_cache
8
  from requests.adapters import HTTPAdapter
9
  from requests.packages.urllib3.util.retry import Retry
10
+ from openai import OpenAI
11
+ from bs4 import BeautifulSoup
12
 
13
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
14
+ if not ACCESS_TOKEN:
15
+ raise ValueError("HF_TOKEN environment variable is not set")
16
 
17
+ client = OpenAI(
18
+ base_url="https://api-inference.huggingface.co/v1/",
19
+ api_key=ACCESS_TOKEN,
20
+ )
21
 
22
  MAX_COUNTRY_RESULTS = 100 # ๊ตญ๊ฐ€๋ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
23
  MAX_GLOBAL_RESULTS = 1000 # ์ „์„ธ๊ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
 
834
  }
835
  """
836
 
837
+
838
+ def get_article_content(url):
839
+ try:
840
+ headers = {
841
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
842
+ }
843
+ response = requests.get(url, headers=headers)
844
+ soup = BeautifulSoup(response.content, 'html.parser')
845
+
846
+ # ์ผ๋ฐ˜์ ์ธ ๊ธฐ์‚ฌ ๋ณธ๋ฌธ ์ปจํ…Œ์ด๋„ˆ ๊ฒ€์ƒ‰
847
+ article_body = None
848
+ possible_content_elements = [
849
+ soup.find('article'),
850
+ soup.find('div', class_='article-body'),
851
+ soup.find('div', class_='content'),
852
+ soup.find('div', {'id': 'article-body'})
853
+ ]
854
+
855
+ for element in possible_content_elements:
856
+ if element:
857
+ article_body = element
858
+ break
859
+
860
+ if article_body:
861
+ # ๋ถˆํ•„์š”ํ•œ ์š”์†Œ ์ œ๊ฑฐ
862
+ for tag in article_body.find_all(['script', 'style', 'nav', 'header', 'footer']):
863
+ tag.decompose()
864
+
865
+ content = ' '.join([p.get_text().strip() for p in article_body.find_all('p') if p.get_text().strip()])
866
+ else:
867
+ content = ' '.join([p.get_text().strip() for p in soup.find_all('p') if p.get_text().strip()])
868
+
869
+ return content
870
+ except Exception as e:
871
+ return f"Error crawling content: {str(e)}"
872
+
873
+ def respond(
874
+ url,
875
+ history: list[tuple[str, str]],
876
+ system_message,
877
+ max_tokens,
878
+ temperature,
879
+ top_p,
880
+ ):
881
+ if not url.startswith('http'):
882
+ return "Please enter a valid URL"
883
+
884
+ # ๊ธฐ์‚ฌ ๋‚ด์šฉ ์ถ”์ถœ
885
+ article_content = get_article_content(url)
886
+
887
+ # ๋ฒˆ์—ญ ์š”์ฒญ์„ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
888
+ translation_prompt = f"""Please translate the following article to Korean.
889
+ Maintain the original meaning and context while making it natural in Korean.
890
+
891
+ Article: {article_content}
892
+
893
+ Korean translation:"""
894
+
895
+ messages = [
896
+ {"role": "system", "content": system_message or "You are a helpful translator that translates articles to Korean."},
897
+ {"role": "user", "content": translation_prompt}
898
+ ]
899
+
900
+ response = ""
901
+
902
+ try:
903
+ for message in client.chat.completions.create(
904
+ model="CohereForAI/c4ai-command-r-plus-08-2024",
905
+ max_tokens=max_tokens,
906
+ stream=True,
907
+ temperature=temperature,
908
+ top_p=top_p,
909
+ messages=messages,
910
+ ):
911
+ token = message.choices[0].delta.content
912
+ if token:
913
+ response += token
914
+ yield response
915
+ except Exception as e:
916
+ yield f"Translation error: {str(e)}"
917
+
918
+
919
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
920
  with gr.Tabs():
921
  # ๊ตญ๊ฐ€๋ณ„ ํƒญ
 
993
  'index': i,
994
  })
995
 
996
+
997
+ # AI ๋ฒˆ์—ญ ํƒญ ์ถ”๊ฐ€
998
+ with gr.Tab("AI ๋ฒˆ์—ญ"):
999
+ gr.Markdown("๋‰ด์Šค URL์„ ์ž…๋ ฅํ•˜๋ฉด AI๊ฐ€ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜์—ฌ ๊ธฐ์‚ฌ ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.")
1000
+
1001
+ with gr.Column():
1002
+ chatbot = gr.Chatbot(height=600)
1003
+
1004
+ with gr.Row():
1005
+ url_input = gr.Textbox(
1006
+ label="๋‰ด์Šค URL",
1007
+ placeholder="https://..."
1008
+ )
1009
+
1010
+ with gr.Accordion("๊ณ ๊ธ‰ ์„ค์ •", open=False):
1011
+ system_message = gr.Textbox(
1012
+ value="You are a helpful translator that translates articles to Korean accurately and naturally. 1) '๋ฒˆ์—ญ'์„ ๋จผ์ € ์ถœ๋ ฅํ•˜๋ผ. 2) ์ด์–ด์„œ '๋ฒˆ์—ญ๋œ ๋‚ด์šฉ'์— ๋Œ€ํ•ด '๋‰ด์Šค ๊ธฐ์‚ฌ' ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜์—ฌ ์‹ ๋ฌธ ๊ธฐ์‚ฌ ์–‘์‹์œผ๋กœ ์ถœ๋ ฅํ•˜๋ผ. ๋ฐ˜๋“œ์‹œ ์ค„ ๋ฐ”๊พธ๊ธฐ๋“ฑ ๊ฐ€๋…์„ฑ ์žˆ๊ฒŒ ์ถœ๋ ฅํ•˜์—ฌ์•ผ ํ•˜๋ฉฐ, '์Šต๋‹ˆ๋‹ค.'.'์ž…๋‹ˆ๋‹ค.'๋“ฑ์„ ์‚ฌ์šฉํ•˜์ง€ ๋ง๊ณ  '๋‹ค.'๋กœ ๋๋‚ด์•ผ ํ•œ๋‹ค. ์ฒ ์ €ํ•˜๊ฒŒ ์‹ ๋ฌธ ๊ธฐ์‚ฌ ํ˜•์‹์„ ์ „๋ฌธ์ ์ธ ๊ธฐ์ž๊ฐ€ ์ž‘์„ฑํ•˜๋Š” ํ˜•์‹์ด์–ด์•ผ ํ•œ๋‹ค.",
1013
+ label="System message"
1014
+ )
1015
+ max_tokens = gr.Slider(
1016
+ minimum=1,
1017
+ maximum=4000,
1018
+ value=3824,
1019
+ step=1,
1020
+ label="Max new tokens"
1021
+ )
1022
+ temperature = gr.Slider(
1023
+ minimum=0.1,
1024
+ maximum=4.0,
1025
+ value=0.7,
1026
+ step=0.1,
1027
+ label="Temperature"
1028
+ )
1029
+ top_p = gr.Slider(
1030
+ minimum=0.1,
1031
+ maximum=1.0,
1032
+ value=0.95,
1033
+ step=0.05,
1034
+ label="Top-P"
1035
+ )
1036
+
1037
+ translate_button = gr.Button("๋ฒˆ์—ญ", variant="primary")
1038
+
1039
+ # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
1040
+ translate_button.click(
1041
+ fn=respond,
1042
+ inputs=[
1043
+ url_input,
1044
+ chatbot,
1045
+ system_message,
1046
+ max_tokens,
1047
+ temperature,
1048
+ top_p,
1049
+ ],
1050
+ outputs=chatbot
1051
+ )
1052
+
1053
+
1054
+
1055
+
1056
  # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ ๋ถ€๋ถ„
1057
  # ๊ตญ๊ฐ€๋ณ„ ํƒญ ์ด๋ฒคํŠธ
1058
  search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)]