Update app.py
Browse files
app.py
CHANGED
@@ -3,50 +3,57 @@ import pandas as pd
|
|
3 |
import re
|
4 |
from collections import Counter
|
5 |
from openpyxl import Workbook
|
|
|
6 |
|
7 |
def process_excel(file):
|
8 |
# ์์
ํ์ผ ์ฝ๊ธฐ
|
9 |
-
df = pd.read_excel(file
|
10 |
|
11 |
-
# D4๋ถํฐ
|
12 |
-
|
13 |
-
|
14 |
-
# ํค์๋ ์ถ์ถ ๋ฐ
|
15 |
-
|
16 |
-
for
|
17 |
-
|
18 |
-
keywords =
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
# Gradio ์ธํฐํ์ด์ค
|
44 |
-
|
45 |
fn=process_excel,
|
46 |
-
inputs=gr.File(label="์์
|
47 |
-
outputs=gr.File(label="๊ฒฐ๊ณผ
|
48 |
-
title="์์
ํค์๋ ๋ถ์๊ธฐ"
|
49 |
)
|
50 |
|
51 |
if __name__ == "__main__":
|
52 |
-
|
|
|
3 |
import re
|
4 |
from collections import Counter
|
5 |
from openpyxl import Workbook
|
6 |
+
from openpyxl.chart import BarChart, Reference
|
7 |
|
8 |
def process_excel(file):
|
9 |
# ์์
ํ์ผ ์ฝ๊ธฐ
|
10 |
+
df = pd.read_excel(file)
|
11 |
|
12 |
+
# D์ด(D4๋ถํฐ)์ ์ํ๋ช
๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ
|
13 |
+
data = df.iloc[3:, 3].dropna().astype(str) # D4๋ถํฐ D์ด ์ ์ฒด ๊ฐ์ ธ์ค๊ธฐ (4๋ฒ์งธ ํ๋ถํฐ ์์)
|
14 |
+
|
15 |
+
# ํค์๋ ์ถ์ถ ๋ฐ ์ฒ๋ฆฌ
|
16 |
+
keyword_list = []
|
17 |
+
for item in data:
|
18 |
+
keywords = re.findall(r'\b\w+\b', item) # ํน์๋ฌธ์ ์ ๊ฑฐํ๊ณ ํค์๋ ์ถ์ถ
|
19 |
+
keywords = list(set(keywords)) # ์ค๋ณต ์ ๊ฑฐ
|
20 |
+
keyword_list.extend(keywords)
|
21 |
+
|
22 |
+
# ํค์๋ ๋น๋์ ๊ณ์ฐ
|
23 |
+
keyword_count = Counter(keyword_list)
|
24 |
+
|
25 |
+
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํ
|
26 |
+
result_df = pd.DataFrame(keyword_count.items(), columns=['ํค์๋', '๋น๋']).sort_values(by='๋น๋', ascending=False).reset_index(drop=True)
|
27 |
+
|
28 |
+
# A4์ B4 ์
๋ถํฐ ๋ฐ์ดํฐ๊ฐ ๋ค์ด๊ฐ๋๋ก ์์
|
29 |
+
with pd.ExcelWriter('keyword_result.xlsx', engine='openpyxl') as writer:
|
30 |
+
result_df.to_excel(writer, index=False, startrow=3, startcol=0) # A4 ์
์ ํด๋นํ๋ 3๋ฒ์งธ ํ, 0๋ฒ์งธ ์ด๋ถํฐ ์์
|
31 |
+
|
32 |
+
# ์ํฌ๋ถ ๋ฐ ์ํธ ๊ฐ์ ธ์ค๊ธฐ
|
33 |
+
workbook =
|
34 |
+
sheet = writer.sheets['Sheet1']
|
35 |
+
|
36 |
+
# ์ฐจํธ ์์ฑ
|
37 |
+
chart = BarChart()
|
38 |
+
data = Reference(sheet, min_col=2, min_row=4, max_row=3 + len(result_df), max_col=2)
|
39 |
+
categories = Reference(sheet, min_col=1, min_row=4, max_row=3 + len(result_df))
|
40 |
+
chart.add_data(data, titles_from_data=True)
|
41 |
+
chart.set_categories(categories)
|
42 |
+
chart.title = "ํค์๋ ๋น๋์"
|
43 |
+
chart.x_axis.title = "ํค์๋"
|
44 |
+
chart.y_axis.title = "๋น๋"
|
45 |
+
|
46 |
+
# ์ฐจํธ๋ฅผ ์ํธ์ ์ถ๊ฐ
|
47 |
+
sheet.add_chart(chart, "E4") # E4 ์
์ ์ฐจํธ๋ฅผ ์ถ๊ฐ
|
48 |
+
|
49 |
+
return 'keyword_result.xlsx'
|
50 |
|
51 |
+
# Gradio ์ธํฐํ์ด์ค ์์ฑ
|
52 |
+
interface = gr.Interface(
|
53 |
fn=process_excel,
|
54 |
+
inputs=gr.File(label="์์
ํ์ผ ์
๋ก๋"),
|
55 |
+
outputs=gr.File(label="๋ถ์ ๊ฒฐ๊ณผ ํ์ผ")
|
|
|
56 |
)
|
57 |
|
58 |
if __name__ == "__main__":
|
59 |
+
interface.launch()
|