openfree commited on
Commit
99d94e0
โ€ข
1 Parent(s): da20c1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -17,11 +17,13 @@ def get_client(model_name):
17
 
18
  def analyze_file_content(content, file_type):
19
  """ํŒŒ์ผ ๋‚ด์šฉ์„ ๋ถ„์„ํ•˜์—ฌ ๊ตฌ์กฐ์  ์š”์•ฝ์„ ๋ฐ˜ํ™˜"""
20
- if file_type == 'parquet':
21
  try:
22
- # Parquet ํŒŒ์ผ ๊ตฌ์กฐ ๋ถ„์„
23
- columns = content.split('\n')[0].count('|') - 1
24
- rows = content.count('\n') - 2 # ํ—ค๋”์™€ ๊ตฌ๋ถ„์„  ์ œ์™ธ
 
 
25
  return f"๋ฐ์ดํ„ฐ์…‹ ๊ตฌ์กฐ: {columns}๊ฐœ ์ปฌ๋Ÿผ, {rows}๊ฐœ ๋ฐ์ดํ„ฐ ์ƒ˜ํ”Œ"
26
  except:
27
  return "๋ฐ์ดํ„ฐ์…‹ ๊ตฌ์กฐ ๋ถ„์„ ์‹คํŒจ"
@@ -31,14 +33,12 @@ def analyze_file_content(content, file_type):
31
  total_lines = len(lines)
32
  non_empty_lines = len([line for line in lines if line.strip()])
33
 
34
- # ์ฝ”๋“œ ํŒŒ์ผ ํŠน์ง• ๋ถ„์„
35
  if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
36
  functions = len([line for line in lines if 'def ' in line])
37
  classes = len([line for line in lines if 'class ' in line])
38
  imports = len([line for line in lines if 'import ' in line or 'from ' in line])
39
  return f"์ฝ”๋“œ ๊ตฌ์กฐ ๋ถ„์„: ์ด {total_lines}์ค„ (ํ•จ์ˆ˜ {functions}๊ฐœ, ํด๋ž˜์Šค {classes}๊ฐœ, ์ž„ํฌํŠธ {imports}๊ฐœ)"
40
 
41
- # ์ผ๋ฐ˜ ํ…์ŠคํŠธ ๋ฌธ์„œ ๋ถ„์„
42
  paragraphs = content.count('\n\n') + 1
43
  words = len(content.split())
44
  return f"๋ฌธ์„œ ๊ตฌ์กฐ ๋ถ„์„: ์ด {total_lines}์ค„, {paragraphs}๊ฐœ ๋ฌธ๋‹จ, ์•ฝ {words}๊ฐœ ๋‹จ์–ด"
@@ -47,14 +47,23 @@ def read_uploaded_file(file):
47
  if file is None:
48
  return "", ""
49
  try:
50
- if file.name.endswith('.parquet'):
 
 
51
  df = pd.read_parquet(file.name, engine='pyarrow')
52
  content = df.head(10).to_markdown(index=False)
53
  return content, "parquet"
 
 
 
 
 
 
 
 
54
  else:
55
- content = file.read()
56
- if isinstance(content, bytes):
57
- content = content.decode('utf-8')
58
  return content, "text"
59
  except Exception as e:
60
  return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "error"
@@ -73,8 +82,10 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
73
  1. ํŒŒ์ผ์˜ ์ „๋ฐ˜์ ์ธ ๊ตฌ์กฐ์™€ ๊ตฌ์„ฑ
74
  2. ์ฃผ์š” ๋‚ด์šฉ๊ณผ ํŒจํ„ด ๋ถ„์„
75
  3. ๋ฐ์ดํ„ฐ์˜ ํŠน์ง•๊ณผ ์˜๋ฏธ
 
 
76
  4. ์ž ์žฌ์  ํ™œ์šฉ ๋ฐฉ์•ˆ
77
- 5. ์ฃผ์˜ํ•ด์•ผ ํ•  ์ ์ด๋‚˜ ๊ฐœ์„  ๊ฐ€๋Šฅํ•œ ๋ถ€๋ถ„
78
 
79
  ์ „๋ฌธ๊ฐ€์  ๊ด€์ ์—์„œ ์ƒ์„ธํ•˜๊ณ  ๊ตฌ์กฐ์ ์ธ ๋ถ„์„์„ ์ œ๊ณตํ•˜๋˜, ์ดํ•ดํ•˜๊ธฐ ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜์„ธ์š”. ๋ถ„์„ ๊ฒฐ๊ณผ๋Š” Markdown ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜๊ณ , ๊ฐ€๋Šฅํ•œ ํ•œ ๊ตฌ์ฒด์ ์ธ ์˜ˆ์‹œ๋ฅผ ํฌํ•จํ•˜์„ธ์š”."""
80
 
@@ -87,7 +98,7 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
87
  # ํŒŒ์ผ ๋‚ด์šฉ ๋ถ„์„ ๋ฐ ๊ตฌ์กฐ์  ์š”์•ฝ
88
  file_summary = analyze_file_content(content, file_type)
89
 
90
- if file_type == 'parquet':
91
  system_message += f"\n\nํŒŒ์ผ ๋‚ด์šฉ:\n```markdown\n{content}\n```"
92
  else:
93
  system_message += f"\n\nํŒŒ์ผ ๋‚ด์šฉ:\n```\n{content}\n```"
@@ -152,8 +163,8 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
152
  )
153
 
154
  file_upload = gr.File(
155
- label="ํŒŒ์ผ ์—…๋กœ๋“œ (ํ…์ŠคํŠธ, ์ฝ”๋“œ, ๋ฐ์ดํ„ฐ ํŒŒ์ผ)",
156
- file_types=["text", ".parquet"],
157
  type="filepath"
158
  )
159
 
 
17
 
18
  def analyze_file_content(content, file_type):
19
  """ํŒŒ์ผ ๋‚ด์šฉ์„ ๋ถ„์„ํ•˜์—ฌ ๊ตฌ์กฐ์  ์š”์•ฝ์„ ๋ฐ˜ํ™˜"""
20
+ if file_type in ['parquet', 'csv']:
21
  try:
22
+ # ๋ฐ์ดํ„ฐ์…‹ ๊ตฌ์กฐ ๋ถ„์„
23
+ lines = content.split('\n')
24
+ header = lines[0]
25
+ columns = header.count('|') - 1
26
+ rows = len(lines) - 3 # ํ—ค๋”์™€ ๊ตฌ๋ถ„์„  ์ œ์™ธ
27
  return f"๋ฐ์ดํ„ฐ์…‹ ๊ตฌ์กฐ: {columns}๊ฐœ ์ปฌ๋Ÿผ, {rows}๊ฐœ ๋ฐ์ดํ„ฐ ์ƒ˜ํ”Œ"
28
  except:
29
  return "๋ฐ์ดํ„ฐ์…‹ ๊ตฌ์กฐ ๋ถ„์„ ์‹คํŒจ"
 
33
  total_lines = len(lines)
34
  non_empty_lines = len([line for line in lines if line.strip()])
35
 
 
36
  if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
37
  functions = len([line for line in lines if 'def ' in line])
38
  classes = len([line for line in lines if 'class ' in line])
39
  imports = len([line for line in lines if 'import ' in line or 'from ' in line])
40
  return f"์ฝ”๋“œ ๊ตฌ์กฐ ๋ถ„์„: ์ด {total_lines}์ค„ (ํ•จ์ˆ˜ {functions}๊ฐœ, ํด๋ž˜์Šค {classes}๊ฐœ, ์ž„ํฌํŠธ {imports}๊ฐœ)"
41
 
 
42
  paragraphs = content.count('\n\n') + 1
43
  words = len(content.split())
44
  return f"๋ฌธ์„œ ๊ตฌ์กฐ ๋ถ„์„: ์ด {total_lines}์ค„, {paragraphs}๊ฐœ ๋ฌธ๋‹จ, ์•ฝ {words}๊ฐœ ๋‹จ์–ด"
 
47
  if file is None:
48
  return "", ""
49
  try:
50
+ file_ext = os.path.splitext(file.name)[1].lower()
51
+
52
+ if file_ext == '.parquet':
53
  df = pd.read_parquet(file.name, engine='pyarrow')
54
  content = df.head(10).to_markdown(index=False)
55
  return content, "parquet"
56
+ elif file_ext == '.csv':
57
+ df = pd.read_csv(file.name)
58
+ content = f"๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
59
+ content += f"\n๋ฐ์ดํ„ฐ ์ •๋ณด:\n"
60
+ content += f"- ์ด ํ–‰ ์ˆ˜: {len(df)}\n"
61
+ content += f"- ์ด ์—ด ์ˆ˜: {len(df.columns)}\n"
62
+ content += f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
63
+ return content, "csv"
64
  else:
65
+ with open(file.name, 'r', encoding='utf-8') as f:
66
+ content = f.read()
 
67
  return content, "text"
68
  except Exception as e:
69
  return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "error"
 
82
  1. ํŒŒ์ผ์˜ ์ „๋ฐ˜์ ์ธ ๊ตฌ์กฐ์™€ ๊ตฌ์„ฑ
83
  2. ์ฃผ์š” ๋‚ด์šฉ๊ณผ ํŒจํ„ด ๋ถ„์„
84
  3. ๋ฐ์ดํ„ฐ์˜ ํŠน์ง•๊ณผ ์˜๋ฏธ
85
+ - ๋ฐ์ดํ„ฐ์…‹์˜ ๊ฒฝ์šฐ: ์ปฌ๋Ÿผ์˜ ์˜๋ฏธ, ๋ฐ์ดํ„ฐ ํƒ€์ž…, ๊ฐ’์˜ ๋ถ„ํฌ
86
+ - ํ…์ŠคํŠธ/์ฝ”๋“œ์˜ ๊ฒฝ์šฐ: ๊ตฌ์กฐ์  ํŠน์ง•, ์ฃผ์š” ํŒจํ„ด
87
  4. ์ž ์žฌ์  ํ™œ์šฉ ๋ฐฉ์•ˆ
88
+ 5. ๋ฐ์ดํ„ฐ ํ’ˆ์งˆ ๋ฐ ๊ฐœ์„  ๊ฐ€๋Šฅํ•œ ๋ถ€๋ถ„
89
 
90
  ์ „๋ฌธ๊ฐ€์  ๊ด€์ ์—์„œ ์ƒ์„ธํ•˜๊ณ  ๊ตฌ์กฐ์ ์ธ ๋ถ„์„์„ ์ œ๊ณตํ•˜๋˜, ์ดํ•ดํ•˜๊ธฐ ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜์„ธ์š”. ๋ถ„์„ ๊ฒฐ๊ณผ๋Š” Markdown ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜๊ณ , ๊ฐ€๋Šฅํ•œ ํ•œ ๊ตฌ์ฒด์ ์ธ ์˜ˆ์‹œ๋ฅผ ํฌํ•จํ•˜์„ธ์š”."""
91
 
 
98
  # ํŒŒ์ผ ๋‚ด์šฉ ๋ถ„์„ ๋ฐ ๊ตฌ์กฐ์  ์š”์•ฝ
99
  file_summary = analyze_file_content(content, file_type)
100
 
101
+ if file_type in ['parquet', 'csv']:
102
  system_message += f"\n\nํŒŒ์ผ ๋‚ด์šฉ:\n```markdown\n{content}\n```"
103
  else:
104
  system_message += f"\n\nํŒŒ์ผ ๋‚ด์šฉ:\n```\n{content}\n```"
 
163
  )
164
 
165
  file_upload = gr.File(
166
+ label="ํŒŒ์ผ ์—…๋กœ๋“œ (ํ…์ŠคํŠธ, ์ฝ”๋“œ, CSV, Parquet ํŒŒ์ผ)",
167
+ file_types=["text", ".csv", ".parquet"],
168
  type="filepath"
169
  )
170