Update app.py
Browse files
app.py
CHANGED
@@ -16,21 +16,32 @@ def get_client(model_name):
|
|
16 |
return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
|
17 |
|
18 |
def analyze_file_content(content, file_type):
|
19 |
-
"""νμΌ λ΄μ©μ λΆμνμ¬
|
20 |
if file_type == 'parquet':
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
#
|
24 |
lines = content.split('\n')
|
25 |
total_lines = len(lines)
|
26 |
non_empty_lines = len([line for line in lines if line.strip()])
|
27 |
|
28 |
-
|
|
|
29 |
functions = len([line for line in lines if 'def ' in line])
|
30 |
classes = len([line for line in lines if 'class ' in line])
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
34 |
|
35 |
def read_uploaded_file(file):
|
36 |
if file is None:
|
@@ -57,38 +68,49 @@ def format_history(history):
|
|
57 |
return formatted_history
|
58 |
|
59 |
def chat(message, history, uploaded_file, model_name, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
|
60 |
-
system_prefix = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
if uploaded_file:
|
63 |
content, file_type = read_uploaded_file(uploaded_file)
|
64 |
if file_type == "error":
|
65 |
-
|
|
|
66 |
|
67 |
-
# νμΌ λ΄μ© λΆμ λ° μμ½
|
68 |
file_summary = analyze_file_content(content, file_type)
|
69 |
|
70 |
if file_type == 'parquet':
|
71 |
system_message += f"\n\nνμΌ λ΄μ©:\n```markdown\n{content}\n```"
|
72 |
else:
|
73 |
-
system_message += f"\n\nνμΌ λ΄μ©:\n
|
74 |
|
75 |
if message == "νμΌ λΆμμ μμν©λλ€.":
|
76 |
-
message = f"""[
|
77 |
|
78 |
-
λ€μ
|
79 |
-
1. νμΌμ
|
80 |
-
2. μ£Όμ
|
81 |
-
3.
|
82 |
-
4.
|
83 |
-
5.
|
|
|
84 |
|
85 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
86 |
messages.extend(format_history(history))
|
87 |
messages.append({"role": "user", "content": message})
|
88 |
|
89 |
-
response = ""
|
90 |
try:
|
91 |
client = get_client(model_name)
|
|
|
|
|
92 |
for msg in client.chat_completion(
|
93 |
messages,
|
94 |
max_tokens=max_tokens,
|
@@ -98,14 +120,12 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
|
|
98 |
):
|
99 |
token = msg.choices[0].delta.get('content', None)
|
100 |
if token:
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
return "", history
|
105 |
except Exception as e:
|
106 |
error_msg = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
107 |
-
|
108 |
-
return "", history
|
109 |
|
110 |
css = """
|
111 |
footer {visibility: hidden}
|
@@ -132,7 +152,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
132 |
)
|
133 |
|
134 |
file_upload = gr.File(
|
135 |
-
label="νμΌ μ
λ‘λ",
|
136 |
file_types=["text", ".parquet"],
|
137 |
type="filepath"
|
138 |
)
|
@@ -147,25 +167,31 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
147 |
msg.submit(
|
148 |
chat,
|
149 |
inputs=[msg, chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
150 |
-
outputs=[msg, chatbot]
|
|
|
|
|
|
|
|
|
|
|
151 |
)
|
152 |
|
153 |
# νμΌ μ
λ‘λ μ μλ λΆμ
|
154 |
file_upload.change(
|
155 |
chat,
|
156 |
inputs=[gr.Textbox(value="νμΌ λΆμμ μμν©λλ€."), chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
157 |
-
outputs=[msg, chatbot]
|
|
|
158 |
)
|
159 |
|
160 |
# μμ μΆκ°
|
161 |
gr.Examples(
|
162 |
examples=[
|
163 |
-
["
|
164 |
-
["
|
165 |
-
["
|
166 |
-
["
|
167 |
-
["
|
168 |
-
["
|
169 |
],
|
170 |
inputs=msg,
|
171 |
)
|
|
|
16 |
return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
|
17 |
|
18 |
def analyze_file_content(content, file_type):
|
19 |
+
"""νμΌ λ΄μ©μ λΆμνμ¬ κ΅¬μ‘°μ μμ½μ λ°ν"""
|
20 |
if file_type == 'parquet':
|
21 |
+
try:
|
22 |
+
# Parquet νμΌ κ΅¬μ‘° λΆμ
|
23 |
+
columns = content.split('\n')[0].count('|') - 1
|
24 |
+
rows = content.count('\n') - 2 # ν€λμ ꡬλΆμ μ μΈ
|
25 |
+
return f"λ°μ΄ν°μ
ꡬ쑰: {columns}κ° μ»¬λΌ, {rows}κ° λ°μ΄ν° μν"
|
26 |
+
except:
|
27 |
+
return "λ°μ΄ν°μ
ꡬ쑰 λΆμ μ€ν¨"
|
28 |
|
29 |
+
# ν
μ€νΈ/μ½λ νμΌμ κ²½μ°
|
30 |
lines = content.split('\n')
|
31 |
total_lines = len(lines)
|
32 |
non_empty_lines = len([line for line in lines if line.strip()])
|
33 |
|
34 |
+
# μ½λ νμΌ νΉμ§ λΆμ
|
35 |
+
if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
|
36 |
functions = len([line for line in lines if 'def ' in line])
|
37 |
classes = len([line for line in lines if 'class ' in line])
|
38 |
+
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
39 |
+
return f"μ½λ ꡬ쑰 λΆμ: μ΄ {total_lines}μ€ (ν¨μ {functions}κ°, ν΄λμ€ {classes}κ°, μν¬νΈ {imports}κ°)"
|
40 |
+
|
41 |
+
# μΌλ° ν
μ€νΈ λ¬Έμ λΆμ
|
42 |
+
paragraphs = content.count('\n\n') + 1
|
43 |
+
words = len(content.split())
|
44 |
+
return f"λ¬Έμ ꡬ쑰 λΆμ: μ΄ {total_lines}μ€, {paragraphs}κ° λ¬Έλ¨, μ½ {words}κ° λ¨μ΄"
|
45 |
|
46 |
def read_uploaded_file(file):
|
47 |
if file is None:
|
|
|
68 |
return formatted_history
|
69 |
|
70 |
def chat(message, history, uploaded_file, model_name, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
|
71 |
+
system_prefix = """λλ νμΌ λΆμ μ λ¬Έκ°μ
λλ€. μ
λ‘λλ νμΌμ λ΄μ©μ κΉμ΄ μκ² λΆμνμ¬ λ€μκ³Ό κ°μ κ΄μ μμ μ€λͺ
ν΄μΌ ν©λλ€:
|
72 |
+
|
73 |
+
1. νμΌμ μ λ°μ μΈ κ΅¬μ‘°μ ꡬμ±
|
74 |
+
2. μ£Όμ λ΄μ©κ³Ό ν¨ν΄ λΆμ
|
75 |
+
3. λ°μ΄ν°μ νΉμ§κ³Ό μλ―Έ
|
76 |
+
4. μ μ¬μ νμ© λ°©μ
|
77 |
+
5. μ£Όμν΄μΌ ν μ μ΄λ κ°μ κ°λ₯ν λΆλΆ
|
78 |
+
|
79 |
+
μ λ¬Έκ°μ κ΄μ μμ μμΈνκ³ κ΅¬μ‘°μ μΈ λΆμμ μ 곡νλ, μ΄ν΄νκΈ° μ½κ² μ€λͺ
νμΈμ. λΆμ κ²°κ³Όλ Markdown νμμΌλ‘ μμ±νκ³ , κ°λ₯ν ν ꡬ체μ μΈ μμλ₯Ό ν¬ν¨νμΈμ."""
|
80 |
|
81 |
if uploaded_file:
|
82 |
content, file_type = read_uploaded_file(uploaded_file)
|
83 |
if file_type == "error":
|
84 |
+
yield "", history + [[message, content]]
|
85 |
+
return
|
86 |
|
87 |
+
# νμΌ λ΄μ© λΆμ λ° κ΅¬μ‘°μ μμ½
|
88 |
file_summary = analyze_file_content(content, file_type)
|
89 |
|
90 |
if file_type == 'parquet':
|
91 |
system_message += f"\n\nνμΌ λ΄μ©:\n```markdown\n{content}\n```"
|
92 |
else:
|
93 |
+
system_message += f"\n\nνμΌ λ΄μ©:\n```\n{content}\n```"
|
94 |
|
95 |
if message == "νμΌ λΆμμ μμν©λλ€.":
|
96 |
+
message = f"""[ꡬ쑰 λΆμ] {file_summary}
|
97 |
|
98 |
+
λ€μ κ΄μ μμ μμΈ λΆμμ μ 곡ν΄μ£ΌμΈμ:
|
99 |
+
1. νμΌμ μ λ°μ μΈ κ΅¬μ‘°μ νμ
|
100 |
+
2. μ£Όμ λ΄μ© λ° κ΅¬μ±μμ λΆμ
|
101 |
+
3. λ°μ΄ν°/λ΄μ©μ νΉμ§κ³Ό ν¨ν΄
|
102 |
+
4. νμ§ λ° μμ±λ νκ°
|
103 |
+
5. κ°μ κ°λ₯ν λΆλΆ μ μ
|
104 |
+
6. μ€μ νμ© λ°©μ λ° μΆμ²μ¬ν"""
|
105 |
|
106 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
107 |
messages.extend(format_history(history))
|
108 |
messages.append({"role": "user", "content": message})
|
109 |
|
|
|
110 |
try:
|
111 |
client = get_client(model_name)
|
112 |
+
partial_message = ""
|
113 |
+
|
114 |
for msg in client.chat_completion(
|
115 |
messages,
|
116 |
max_tokens=max_tokens,
|
|
|
120 |
):
|
121 |
token = msg.choices[0].delta.get('content', None)
|
122 |
if token:
|
123 |
+
partial_message += token
|
124 |
+
yield "", history + [[message, partial_message]]
|
125 |
+
|
|
|
126 |
except Exception as e:
|
127 |
error_msg = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
128 |
+
yield "", history + [[message, error_msg]]
|
|
|
129 |
|
130 |
css = """
|
131 |
footer {visibility: hidden}
|
|
|
152 |
)
|
153 |
|
154 |
file_upload = gr.File(
|
155 |
+
label="νμΌ μ
λ‘λ (ν
μ€νΈ, μ½λ, λ°μ΄ν° νμΌ)",
|
156 |
file_types=["text", ".parquet"],
|
157 |
type="filepath"
|
158 |
)
|
|
|
167 |
msg.submit(
|
168 |
chat,
|
169 |
inputs=[msg, chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
170 |
+
outputs=[msg, chatbot],
|
171 |
+
queue=True
|
172 |
+
).then(
|
173 |
+
lambda: gr.update(interactive=True),
|
174 |
+
None,
|
175 |
+
[msg]
|
176 |
)
|
177 |
|
178 |
# νμΌ μ
λ‘λ μ μλ λΆμ
|
179 |
file_upload.change(
|
180 |
chat,
|
181 |
inputs=[gr.Textbox(value="νμΌ λΆμμ μμν©λλ€."), chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
182 |
+
outputs=[msg, chatbot],
|
183 |
+
queue=True
|
184 |
)
|
185 |
|
186 |
# μμ μΆκ°
|
187 |
gr.Examples(
|
188 |
examples=[
|
189 |
+
["νμΌμ μ λ°μ μΈ κ΅¬μ‘°μ νΉμ§μ μμΈν μ€λͺ
ν΄μ£ΌμΈμ."],
|
190 |
+
["μ΄ νμΌμ μ£Όμ ν¨ν΄κ³Ό νΉμ§μ λΆμν΄μ£ΌμΈμ."],
|
191 |
+
["νμΌμ νμ§κ³Ό κ°μ κ°λ₯ν λΆλΆμ νκ°ν΄μ£ΌμΈμ."],
|
192 |
+
["μ΄ νμΌμ μ€μ λ‘ μ΄λ»κ² νμ©ν μ μμκΉμ?"],
|
193 |
+
["νμΌμ μ£Όμ λ΄μ©μ μμ½νκ³ ν΅μ¬ μΈμ¬μ΄νΈλ₯Ό λμΆν΄μ£ΌμΈμ."],
|
194 |
+
["μ΄μ λΆμμ μ΄μ΄μ λ μμΈν μ€λͺ
ν΄μ£ΌμΈμ."],
|
195 |
],
|
196 |
inputs=msg,
|
197 |
)
|