Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,64 +1,46 @@
|
|
1 |
-
|
2 |
import pandas as pd
|
|
|
3 |
from collections import Counter
|
4 |
-
import os
|
5 |
|
6 |
-
|
7 |
-
app.config['UPLOAD_FOLDER'] = 'uploads/'
|
8 |
-
app.config['PROCESSED_FOLDER'] = 'processed/'
|
9 |
-
|
10 |
-
# ๋๋ ํ ๋ฆฌ ์์ฑ
|
11 |
-
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
12 |
-
os.makedirs(app.config['PROCESSED_FOLDER'], exist_ok=True)
|
13 |
-
|
14 |
-
def extract_keywords(text):
|
15 |
-
# ํ
์คํธ์์ ์ค๋ณต์ ์ ๊ฑฐํ๊ณ ๊ณต๋ฐฑ์ผ๋ก ๋ถ๋ฆฌํ์ฌ ํค์๋ ๋ฆฌ์คํธ ๋ฐํ
|
16 |
-
keywords = list(set(text.split(" ")))
|
17 |
-
return keywords
|
18 |
-
|
19 |
-
def process_excel(input_file, output_file):
|
20 |
# ์์
ํ์ผ ์ฝ๊ธฐ
|
21 |
-
df = pd.read_excel(
|
|
|
|
|
|
|
|
|
|
|
22 |
all_keywords = []
|
23 |
-
|
24 |
-
for
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
result_df =
|
36 |
-
|
37 |
-
|
|
|
|
|
38 |
result_df.to_excel(output_file, index=False)
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
return send_file(output_file, as_attachment=True)
|
52 |
-
|
53 |
-
return '''
|
54 |
-
<!doctype html>
|
55 |
-
<title>Upload Excel File</title>
|
56 |
-
<h1>Excel ํ์ผ์ ์
๋ก๋ํ์ธ์</h1>
|
57 |
-
<form method=post enctype=multipart/form-data>
|
58 |
-
<input type=file name=file>
|
59 |
-
<input type=submit value=Upload>
|
60 |
-
</form>
|
61 |
-
'''
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
-
|
|
|
1 |
+
import gradio as gr
|
2 |
import pandas as pd
|
3 |
+
import re
|
4 |
from collections import Counter
|
|
|
5 |
|
6 |
+
def process_excel(file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# ์์
ํ์ผ ์ฝ๊ธฐ
|
8 |
+
df = pd.read_excel(file)
|
9 |
+
|
10 |
+
# D์ด์ ๋ฐ์ดํฐ ์ถ์ถ
|
11 |
+
product_names = df.iloc[:, 3].dropna() # D์ด์ 0๋ถํฐ ์์ํ๋ฏ๋ก index๋ 3
|
12 |
+
|
13 |
+
# ํค์๋ ์ถ์ถ ๋ฐ ๋น๋ ๊ณ์ฐ
|
14 |
all_keywords = []
|
15 |
+
|
16 |
+
for name in product_names:
|
17 |
+
# ํน์๋ฌธ์ ์ ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค์ผ๋ก ๋ถํ
|
18 |
+
words = re.sub(r'[^\w\s]', '', name).split()
|
19 |
+
# ์ค๋ณต ์ ๊ฑฐ
|
20 |
+
unique_words = set(words)
|
21 |
+
all_keywords.extend(unique_words)
|
22 |
+
|
23 |
+
# ๋น๋ ๊ณ์ฐ
|
24 |
+
keyword_counts = Counter(all_keywords)
|
25 |
+
|
26 |
+
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ์ ๋ฆฌ
|
27 |
+
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
|
28 |
+
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
|
29 |
+
|
30 |
+
# ์์
ํ์ผ๋ก ์ ์ฅ
|
31 |
+
output_file = "/mnt/data/keyword_counts.xlsx"
|
32 |
result_df.to_excel(output_file, index=False)
|
33 |
+
|
34 |
+
return output_file
|
35 |
+
|
36 |
+
# Gradio ์ธํฐํ์ด์ค ์ ์
|
37 |
+
iface = gr.Interface(
|
38 |
+
fn=process_excel,
|
39 |
+
inputs="file",
|
40 |
+
outputs="file",
|
41 |
+
title="Excel Keyword Extractor",
|
42 |
+
description="์์
ํ์ผ์ D์ด์์ ํค์๋๋ฅผ ์ถ์ถํ๊ณ ๋น๋๋ฅผ ๊ณ์ฐํ์ฌ ์๋ก์ด ์์
ํ์ผ๋ก ์ถ๋ ฅํฉ๋๋ค."
|
43 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
if __name__ == "__main__":
|
46 |
+
iface.launch()
|