Update app.py
Browse files
app.py
CHANGED
@@ -17,11 +17,13 @@ def get_client(model_name):
|
|
17 |
|
18 |
def analyze_file_content(content, file_type):
|
19 |
"""ํ์ผ ๋ด์ฉ์ ๋ถ์ํ์ฌ ๊ตฌ์กฐ์ ์์ฝ์ ๋ฐํ"""
|
20 |
-
if file_type
|
21 |
try:
|
22 |
-
#
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
return f"๋ฐ์ดํฐ์
๊ตฌ์กฐ: {columns}๊ฐ ์ปฌ๋ผ, {rows}๊ฐ ๋ฐ์ดํฐ ์ํ"
|
26 |
except:
|
27 |
return "๋ฐ์ดํฐ์
๊ตฌ์กฐ ๋ถ์ ์คํจ"
|
@@ -31,14 +33,12 @@ def analyze_file_content(content, file_type):
|
|
31 |
total_lines = len(lines)
|
32 |
non_empty_lines = len([line for line in lines if line.strip()])
|
33 |
|
34 |
-
# ์ฝ๋ ํ์ผ ํน์ง ๋ถ์
|
35 |
if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
|
36 |
functions = len([line for line in lines if 'def ' in line])
|
37 |
classes = len([line for line in lines if 'class ' in line])
|
38 |
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
39 |
return f"์ฝ๋ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค (ํจ์ {functions}๊ฐ, ํด๋์ค {classes}๊ฐ, ์ํฌํธ {imports}๊ฐ)"
|
40 |
|
41 |
-
# ์ผ๋ฐ ํ
์คํธ ๋ฌธ์ ๋ถ์
|
42 |
paragraphs = content.count('\n\n') + 1
|
43 |
words = len(content.split())
|
44 |
return f"๋ฌธ์ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค, {paragraphs}๊ฐ ๋ฌธ๋จ, ์ฝ {words}๊ฐ ๋จ์ด"
|
@@ -47,14 +47,23 @@ def read_uploaded_file(file):
|
|
47 |
if file is None:
|
48 |
return "", ""
|
49 |
try:
|
50 |
-
|
|
|
|
|
51 |
df = pd.read_parquet(file.name, engine='pyarrow')
|
52 |
content = df.head(10).to_markdown(index=False)
|
53 |
return content, "parquet"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
else:
|
55 |
-
|
56 |
-
|
57 |
-
content = content.decode('utf-8')
|
58 |
return content, "text"
|
59 |
except Exception as e:
|
60 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "error"
|
@@ -73,8 +82,10 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
|
|
73 |
1. ํ์ผ์ ์ ๋ฐ์ ์ธ ๊ตฌ์กฐ์ ๊ตฌ์ฑ
|
74 |
2. ์ฃผ์ ๋ด์ฉ๊ณผ ํจํด ๋ถ์
|
75 |
3. ๋ฐ์ดํฐ์ ํน์ง๊ณผ ์๋ฏธ
|
|
|
|
|
76 |
4. ์ ์ฌ์ ํ์ฉ ๋ฐฉ์
|
77 |
-
5.
|
78 |
|
79 |
์ ๋ฌธ๊ฐ์ ๊ด์ ์์ ์์ธํ๊ณ ๊ตฌ์กฐ์ ์ธ ๋ถ์์ ์ ๊ณตํ๋, ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ค๋ช
ํ์ธ์. ๋ถ์ ๊ฒฐ๊ณผ๋ Markdown ํ์์ผ๋ก ์์ฑํ๊ณ , ๊ฐ๋ฅํ ํ ๊ตฌ์ฒด์ ์ธ ์์๋ฅผ ํฌํจํ์ธ์."""
|
80 |
|
@@ -87,7 +98,7 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
|
|
87 |
# ํ์ผ ๋ด์ฉ ๋ถ์ ๋ฐ ๊ตฌ์กฐ์ ์์ฝ
|
88 |
file_summary = analyze_file_content(content, file_type)
|
89 |
|
90 |
-
if file_type
|
91 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```markdown\n{content}\n```"
|
92 |
else:
|
93 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```\n{content}\n```"
|
@@ -152,8 +163,8 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
152 |
)
|
153 |
|
154 |
file_upload = gr.File(
|
155 |
-
label="ํ์ผ ์
๋ก๋ (ํ
์คํธ, ์ฝ๋,
|
156 |
-
file_types=["text", ".parquet"],
|
157 |
type="filepath"
|
158 |
)
|
159 |
|
|
|
17 |
|
18 |
def analyze_file_content(content, file_type):
|
19 |
"""ํ์ผ ๋ด์ฉ์ ๋ถ์ํ์ฌ ๊ตฌ์กฐ์ ์์ฝ์ ๋ฐํ"""
|
20 |
+
if file_type in ['parquet', 'csv']:
|
21 |
try:
|
22 |
+
# ๋ฐ์ดํฐ์
๊ตฌ์กฐ ๋ถ์
|
23 |
+
lines = content.split('\n')
|
24 |
+
header = lines[0]
|
25 |
+
columns = header.count('|') - 1
|
26 |
+
rows = len(lines) - 3 # ํค๋์ ๊ตฌ๋ถ์ ์ ์ธ
|
27 |
return f"๋ฐ์ดํฐ์
๊ตฌ์กฐ: {columns}๊ฐ ์ปฌ๋ผ, {rows}๊ฐ ๋ฐ์ดํฐ ์ํ"
|
28 |
except:
|
29 |
return "๋ฐ์ดํฐ์
๊ตฌ์กฐ ๋ถ์ ์คํจ"
|
|
|
33 |
total_lines = len(lines)
|
34 |
non_empty_lines = len([line for line in lines if line.strip()])
|
35 |
|
|
|
36 |
if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
|
37 |
functions = len([line for line in lines if 'def ' in line])
|
38 |
classes = len([line for line in lines if 'class ' in line])
|
39 |
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
40 |
return f"์ฝ๋ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค (ํจ์ {functions}๊ฐ, ํด๋์ค {classes}๊ฐ, ์ํฌํธ {imports}๊ฐ)"
|
41 |
|
|
|
42 |
paragraphs = content.count('\n\n') + 1
|
43 |
words = len(content.split())
|
44 |
return f"๋ฌธ์ ๊ตฌ์กฐ ๋ถ์: ์ด {total_lines}์ค, {paragraphs}๊ฐ ๋ฌธ๋จ, ์ฝ {words}๊ฐ ๋จ์ด"
|
|
|
47 |
if file is None:
|
48 |
return "", ""
|
49 |
try:
|
50 |
+
file_ext = os.path.splitext(file.name)[1].lower()
|
51 |
+
|
52 |
+
if file_ext == '.parquet':
|
53 |
df = pd.read_parquet(file.name, engine='pyarrow')
|
54 |
content = df.head(10).to_markdown(index=False)
|
55 |
return content, "parquet"
|
56 |
+
elif file_ext == '.csv':
|
57 |
+
df = pd.read_csv(file.name)
|
58 |
+
content = f"๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
|
59 |
+
content += f"\n๋ฐ์ดํฐ ์ ๋ณด:\n"
|
60 |
+
content += f"- ์ด ํ ์: {len(df)}\n"
|
61 |
+
content += f"- ์ด ์ด ์: {len(df.columns)}\n"
|
62 |
+
content += f"- ์ปฌ๋ผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
|
63 |
+
return content, "csv"
|
64 |
else:
|
65 |
+
with open(file.name, 'r', encoding='utf-8') as f:
|
66 |
+
content = f.read()
|
|
|
67 |
return content, "text"
|
68 |
except Exception as e:
|
69 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "error"
|
|
|
82 |
1. ํ์ผ์ ์ ๋ฐ์ ์ธ ๊ตฌ์กฐ์ ๊ตฌ์ฑ
|
83 |
2. ์ฃผ์ ๋ด์ฉ๊ณผ ํจํด ๋ถ์
|
84 |
3. ๋ฐ์ดํฐ์ ํน์ง๊ณผ ์๋ฏธ
|
85 |
+
- ๋ฐ์ดํฐ์
์ ๊ฒฝ์ฐ: ์ปฌ๋ผ์ ์๋ฏธ, ๋ฐ์ดํฐ ํ์
, ๊ฐ์ ๋ถํฌ
|
86 |
+
- ํ
์คํธ/์ฝ๋์ ๊ฒฝ์ฐ: ๊ตฌ์กฐ์ ํน์ง, ์ฃผ์ ํจํด
|
87 |
4. ์ ์ฌ์ ํ์ฉ ๋ฐฉ์
|
88 |
+
5. ๋ฐ์ดํฐ ํ์ง ๋ฐ ๊ฐ์ ๊ฐ๋ฅํ ๋ถ๋ถ
|
89 |
|
90 |
์ ๋ฌธ๊ฐ์ ๊ด์ ์์ ์์ธํ๊ณ ๊ตฌ์กฐ์ ์ธ ๋ถ์์ ์ ๊ณตํ๋, ์ดํดํ๊ธฐ ์ฝ๊ฒ ์ค๋ช
ํ์ธ์. ๋ถ์ ๊ฒฐ๊ณผ๋ Markdown ํ์์ผ๋ก ์์ฑํ๊ณ , ๊ฐ๋ฅํ ํ ๊ตฌ์ฒด์ ์ธ ์์๋ฅผ ํฌํจํ์ธ์."""
|
91 |
|
|
|
98 |
# ํ์ผ ๋ด์ฉ ๋ถ์ ๋ฐ ๊ตฌ์กฐ์ ์์ฝ
|
99 |
file_summary = analyze_file_content(content, file_type)
|
100 |
|
101 |
+
if file_type in ['parquet', 'csv']:
|
102 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```markdown\n{content}\n```"
|
103 |
else:
|
104 |
system_message += f"\n\nํ์ผ ๋ด์ฉ:\n```\n{content}\n```"
|
|
|
163 |
)
|
164 |
|
165 |
file_upload = gr.File(
|
166 |
+
label="ํ์ผ ์
๋ก๋ (ํ
์คํธ, ์ฝ๋, CSV, Parquet ํ์ผ)",
|
167 |
+
file_types=["text", ".csv", ".parquet"],
|
168 |
type="filepath"
|
169 |
)
|
170 |
|