openfree commited on
Commit
01f15df
β€’
1 Parent(s): 2ff143a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -23
app.py CHANGED
@@ -1,9 +1,18 @@
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import os
4
  import pandas as pd
5
  from typing import List, Tuple
6
 
 
 
 
 
 
 
 
 
7
  # LLM Models Definition
8
  LLM_MODELS = {
9
  "Cohere c4ai-crp-08-2024": "CohereForAI/c4ai-command-r-plus-08-2024", # Default
@@ -12,10 +21,10 @@ LLM_MODELS = {
12
 
13
  def get_client(model_name="Cohere c4ai-crp-08-2024"):
14
  try:
15
- return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
16
  except Exception:
17
  # If primary model fails, try backup model
18
- return InferenceClient(LLM_MODELS["Meta Llama3.3-70B"], token=os.getenv("HF_TOKEN"))
19
 
20
  def analyze_file_content(content, file_type):
21
  """Analyze file content and return structural summary"""
@@ -25,9 +34,9 @@ def analyze_file_content(content, file_type):
25
  header = lines[0]
26
  columns = header.count('|') - 1
27
  rows = len(lines) - 3
28
- return f"πŸ“Š Dataset Structure: {columns} columns, {rows} data samples"
29
  except:
30
- return "❌ Dataset structure analysis failed"
31
 
32
  lines = content.split('\n')
33
  total_lines = len(lines)
@@ -37,11 +46,11 @@ def analyze_file_content(content, file_type):
37
  functions = len([line for line in lines if 'def ' in line])
38
  classes = len([line for line in lines if 'class ' in line])
39
  imports = len([line for line in lines if 'import ' in line or 'from ' in line])
40
- return f"πŸ’» Code Structure: {total_lines} lines (Functions: {functions}, Classes: {classes}, Imports: {imports})"
41
 
42
  paragraphs = content.count('\n\n') + 1
43
  words = len(content.split())
44
- return f"πŸ“ Document Structure: {total_lines} lines, {paragraphs} paragraphs, ~{words} words"
45
 
46
  def read_uploaded_file(file):
47
  if file is None:
@@ -58,23 +67,23 @@ def read_uploaded_file(file):
58
  for encoding in encodings:
59
  try:
60
  df = pd.read_csv(file.name, encoding=encoding)
61
- content = f"πŸ“Š Data Preview:\n{df.head(10).to_markdown(index=False)}\n\n"
62
- content += f"\nπŸ“ˆ Data Information:\n"
63
- content += f"- Total Rows: {len(df)}\n"
64
- content += f"- Total Columns: {len(df.columns)}\n"
65
- content += f"- Column List: {', '.join(df.columns)}\n"
66
- content += f"\nπŸ“‹ Column Data Types:\n"
67
  for col, dtype in df.dtypes.items():
68
  content += f"- {col}: {dtype}\n"
69
  null_counts = df.isnull().sum()
70
  if null_counts.any():
71
- content += f"\n⚠️ Missing Values:\n"
72
  for col, null_count in null_counts[null_counts > 0].items():
73
- content += f"- {col}: {null_count} missing\n"
74
  return content, "csv"
75
  except UnicodeDecodeError:
76
  continue
77
- raise UnicodeDecodeError(f"❌ Unable to read file with supported encodings ({', '.join(encodings)})")
78
  else:
79
  encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
80
  for encoding in encodings:
@@ -84,9 +93,9 @@ def read_uploaded_file(file):
84
  return content, "text"
85
  except UnicodeDecodeError:
86
  continue
87
- raise UnicodeDecodeError(f"❌ Unable to read file with supported encodings ({', '.join(encodings)})")
88
  except Exception as e:
89
- return f"❌ Error reading file: {str(e)}", "error"
90
 
91
  def format_history(history):
92
  formatted_history = []
@@ -96,7 +105,6 @@ def format_history(history):
96
  formatted_history.append({"role": "assistant", "content": assistant_msg})
97
  return formatted_history
98
 
99
- # μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ μˆ˜μ •
100
  def chat(message, history, uploaded_file, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
101
  system_prefix = """μ €λŠ” μ—¬λŸ¬λΆ„μ˜ μΉœκ·Όν•˜κ³  지적인 AI μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€. λ‹€μŒκ³Ό 같은 μ›μΉ™μœΌλ‘œ μ†Œν†΅ν•˜κ² μŠ΅λ‹ˆλ‹€:
102
 
@@ -121,7 +129,7 @@ def chat(message, history, uploaded_file, system_message="", max_tokens=4000, te
121
  else:
122
  system_message += f"\n\n파일 λ‚΄μš©:\n```\n{content}\n```"
123
 
124
- if message == "Starting file analysis...":
125
  message = f"""[파일 ꡬ쑰 뢄석] {file_summary}
126
 
127
  λ‹€μŒ κ΄€μ μ—μ„œ 도움을 λ“œλ¦¬κ² μŠ΅λ‹ˆλ‹€:
@@ -173,8 +181,12 @@ def chat(message, history, uploaded_file, system_message="", max_tokens=4000, te
173
  ]
174
  yield "", error_history
175
 
176
- # UI ν…μŠ€νŠΈ ν•œκΈ€ν™”
177
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", title="GiniGEN πŸ€–") as demo:
 
 
 
 
178
  gr.HTML(
179
  """
180
  <div style="text-align: center; max-width: 800px; margin: 0 auto;">
@@ -189,6 +201,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", title="GiniGEN πŸ€–") as demo
189
  chatbot = gr.Chatbot(
190
  height=600,
191
  label="λŒ€ν™”μ°½ πŸ’¬",
 
192
  type="messages"
193
  )
194
  msg = gr.Textbox(
@@ -215,7 +228,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", title="GiniGEN πŸ€–") as demo
215
  temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="μ°½μ˜μ„± μˆ˜μ€€ 🌑️")
216
  top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="응닡 λ‹€μ–‘μ„± πŸ“ˆ")
217
 
218
- # μ˜ˆμ‹œ 질문 μˆ˜μ •
219
  gr.Examples(
220
  examples=[
221
  ["μ•ˆλ…•ν•˜μ„Έμš”! μ–΄λ–€ 도움이 ν•„μš”ν•˜μ‹ κ°€μš”? 🀝"],
@@ -228,5 +241,28 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", title="GiniGEN πŸ€–") as demo
228
  inputs=msg,
229
  )
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  if __name__ == "__main__":
232
  demo.launch()
 
1
+ import os
2
+ from dotenv import load_dotenv
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
 
5
  import pandas as pd
6
  from typing import List, Tuple
7
 
8
+ # .env 파일 λ‘œλ“œ
9
+ load_dotenv()
10
+
11
+ # HuggingFace 토큰 μ„€μ •
12
+ HF_TOKEN = os.getenv("HF_TOKEN")
13
+ if not HF_TOKEN:
14
+ raise ValueError("HF_TOKEN이 μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. .env νŒŒμΌμ— HF_TOKEN을 μ„€μ •ν•΄μ£Όμ„Έμš”.")
15
+
16
  # LLM Models Definition
17
  LLM_MODELS = {
18
  "Cohere c4ai-crp-08-2024": "CohereForAI/c4ai-command-r-plus-08-2024", # Default
 
21
 
22
  def get_client(model_name="Cohere c4ai-crp-08-2024"):
23
  try:
24
+ return InferenceClient(LLM_MODELS[model_name], token=HF_TOKEN)
25
  except Exception:
26
  # If primary model fails, try backup model
27
+ return InferenceClient(LLM_MODELS["Meta Llama3.3-70B"], token=HF_TOKEN)
28
 
29
  def analyze_file_content(content, file_type):
30
  """Analyze file content and return structural summary"""
 
34
  header = lines[0]
35
  columns = header.count('|') - 1
36
  rows = len(lines) - 3
37
+ return f"πŸ“Š 데이터셋 ꡬ쑰: {columns}개 컬럼, {rows}개 데이터"
38
  except:
39
+ return "❌ 데이터셋 ꡬ쑰 뢄석 μ‹€νŒ¨"
40
 
41
  lines = content.split('\n')
42
  total_lines = len(lines)
 
46
  functions = len([line for line in lines if 'def ' in line])
47
  classes = len([line for line in lines if 'class ' in line])
48
  imports = len([line for line in lines if 'import ' in line or 'from ' in line])
49
+ return f"πŸ’» μ½”λ“œ ꡬ쑰: {total_lines}쀄 (ν•¨μˆ˜: {functions}, 클래슀: {classes}, μž„ν¬νŠΈ: {imports})"
50
 
51
  paragraphs = content.count('\n\n') + 1
52
  words = len(content.split())
53
+ return f"πŸ“ λ¬Έμ„œ ꡬ쑰: {total_lines}쀄, {paragraphs}단락, μ•½ {words}단어"
54
 
55
  def read_uploaded_file(file):
56
  if file is None:
 
67
  for encoding in encodings:
68
  try:
69
  df = pd.read_csv(file.name, encoding=encoding)
70
+ content = f"πŸ“Š 데이터 미리보기:\n{df.head(10).to_markdown(index=False)}\n\n"
71
+ content += f"\nπŸ“ˆ 데이터 정보:\n"
72
+ content += f"- 전체 ν–‰ 수: {len(df)}\n"
73
+ content += f"- 전체 μ—΄ 수: {len(df.columns)}\n"
74
+ content += f"- 컬럼 λͺ©λ‘: {', '.join(df.columns)}\n"
75
+ content += f"\nπŸ“‹ 컬럼 데이터 νƒ€μž…:\n"
76
  for col, dtype in df.dtypes.items():
77
  content += f"- {col}: {dtype}\n"
78
  null_counts = df.isnull().sum()
79
  if null_counts.any():
80
+ content += f"\n⚠️ 결츑치:\n"
81
  for col, null_count in null_counts[null_counts > 0].items():
82
+ content += f"- {col}: {null_count}개 λˆ„λ½\n"
83
  return content, "csv"
84
  except UnicodeDecodeError:
85
  continue
86
+ raise UnicodeDecodeError(f"❌ μ§€μ›λ˜λŠ” μΈμ½”λ”©μœΌλ‘œ νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€ ({', '.join(encodings)})")
87
  else:
88
  encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
89
  for encoding in encodings:
 
93
  return content, "text"
94
  except UnicodeDecodeError:
95
  continue
96
+ raise UnicodeDecodeError(f"❌ μ§€μ›λ˜λŠ” μΈμ½”λ”©μœΌλ‘œ νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€ ({', '.join(encodings)})")
97
  except Exception as e:
98
+ return f"❌ 파일 읽기 였λ₯˜: {str(e)}", "error"
99
 
100
  def format_history(history):
101
  formatted_history = []
 
105
  formatted_history.append({"role": "assistant", "content": assistant_msg})
106
  return formatted_history
107
 
 
108
  def chat(message, history, uploaded_file, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
109
  system_prefix = """μ €λŠ” μ—¬λŸ¬λΆ„μ˜ μΉœκ·Όν•˜κ³  지적인 AI μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€. λ‹€μŒκ³Ό 같은 μ›μΉ™μœΌλ‘œ μ†Œν†΅ν•˜κ² μŠ΅λ‹ˆλ‹€:
110
 
 
129
  else:
130
  system_message += f"\n\n파일 λ‚΄μš©:\n```\n{content}\n```"
131
 
132
+ if message == "파일 뢄석을 μ‹œμž‘ν•©λ‹ˆλ‹€...":
133
  message = f"""[파일 ꡬ쑰 뢄석] {file_summary}
134
 
135
  λ‹€μŒ κ΄€μ μ—μ„œ 도움을 λ“œλ¦¬κ² μŠ΅λ‹ˆλ‹€:
 
181
  ]
182
  yield "", error_history
183
 
184
+ css = """
185
+ footer {visibility: hidden}
186
+ """
187
+
188
+ # UI ꡬ성
189
+ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="GiniGEN πŸ€–") as demo:
190
  gr.HTML(
191
  """
192
  <div style="text-align: center; max-width: 800px; margin: 0 auto;">
 
201
  chatbot = gr.Chatbot(
202
  height=600,
203
  label="λŒ€ν™”μ°½ πŸ’¬",
204
+ show_label=True,
205
  type="messages"
206
  )
207
  msg = gr.Textbox(
 
228
  temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="μ°½μ˜μ„± μˆ˜μ€€ 🌑️")
229
  top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="응닡 λ‹€μ–‘μ„± πŸ“ˆ")
230
 
231
+ # μ˜ˆμ‹œ 질문
232
  gr.Examples(
233
  examples=[
234
  ["μ•ˆλ…•ν•˜μ„Έμš”! μ–΄λ–€ 도움이 ν•„μš”ν•˜μ‹ κ°€μš”? 🀝"],
 
241
  inputs=msg,
242
  )
243
 
244
+ # 이벀트 바인딩
245
+ msg.submit(
246
+ chat,
247
+ inputs=[msg, chatbot, file_upload, system_message, max_tokens, temperature, top_p],
248
+ outputs=[msg, chatbot]
249
+ )
250
+
251
+ send.click(
252
+ chat,
253
+ inputs=[msg, chatbot, file_upload, system_message, max_tokens, temperature, top_p],
254
+ outputs=[msg, chatbot]
255
+ )
256
+
257
+ # 파일 μ—…λ‘œλ“œμ‹œ μžλ™ 뢄석
258
+ file_upload.change(
259
+ lambda: "파일 뢄석을 μ‹œμž‘ν•©λ‹ˆλ‹€...",
260
+ outputs=msg
261
+ ).then(
262
+ chat,
263
+ inputs=[msg, chatbot, file_upload, system_message, max_tokens, temperature, top_p],
264
+ outputs=[msg, chatbot]
265
+ )
266
+
267
  if __name__ == "__main__":
268
  demo.launch()