Kims12 commited on
Commit
0b61b2b
โ€ข
1 Parent(s): 3fc9d20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -56
app.py CHANGED
@@ -1,64 +1,46 @@
1
- from flask import Flask, request, redirect, url_for, send_file, render_template
2
  import pandas as pd
 
3
  from collections import Counter
4
- import os
5
 
6
- app = Flask(__name__)
7
- app.config['UPLOAD_FOLDER'] = 'uploads/'
8
- app.config['PROCESSED_FOLDER'] = 'processed/'
9
-
10
- # ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
11
- os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
12
- os.makedirs(app.config['PROCESSED_FOLDER'], exist_ok=True)
13
-
14
- def extract_keywords(text):
15
- # ํ…์ŠคํŠธ์—์„œ ์ค‘๋ณต์„ ์ œ๊ฑฐํ•˜๊ณ  ๊ณต๋ฐฑ์œผ๋กœ ๋ถ„๋ฆฌํ•˜์—ฌ ํ‚ค์›Œ๋“œ ๋ฆฌ์ŠคํŠธ ๋ฐ˜ํ™˜
16
- keywords = list(set(text.split(" ")))
17
- return keywords
18
-
19
- def process_excel(input_file, output_file):
20
  # ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
21
- df = pd.read_excel(input_file)
 
 
 
 
 
22
  all_keywords = []
23
-
24
- for index, row in df.iterrows():
25
- product_name = row['์ƒํ’ˆ๋ช…(๋งํฌ)']
26
- if pd.notna(product_name):
27
- keywords = extract_keywords(product_name)
28
- all_keywords.extend(keywords)
29
-
30
- # ํ‚ค์›Œ๋“œ ๋นˆ๋„์ˆ˜ ๊ณ„์‚ฐ
31
- keyword_counter = Counter(all_keywords)
32
-
33
- # ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜
34
- result_df = pd.DataFrame(keyword_counter.items(), columns=['ํ‚ค์›Œ๋“œ', '๋นˆ๋„์ˆ˜'])
35
- result_df = result_df.sort_values(by='๋นˆ๋„์ˆ˜', ascending=False)
36
-
37
- # ๊ฒฐ๊ณผ๋ฅผ ์ƒˆ๋กœ์šด ์—‘์…€ ํŒŒ์ผ๋กœ ์ €์žฅ
 
 
38
  result_df.to_excel(output_file, index=False)
39
-
40
- @app.route('/', methods=['GET', 'POST'])
41
- def upload_file():
42
- if request.method == 'POST':
43
- file = request.files['file']
44
- if file and file.filename.endswith('.xlsx'):
45
- filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
46
- file.save(filepath)
47
-
48
- output_file = os.path.join(app.config['PROCESSED_FOLDER'], 'output.xlsx')
49
- process_excel(filepath, output_file)
50
-
51
- return send_file(output_file, as_attachment=True)
52
-
53
- return '''
54
- <!doctype html>
55
- <title>Upload Excel File</title>
56
- <h1>Excel ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”</h1>
57
- <form method=post enctype=multipart/form-data>
58
- <input type=file name=file>
59
- <input type=submit value=Upload>
60
- </form>
61
- '''
62
 
63
  if __name__ == "__main__":
64
- app.run(debug=True)
 
1
+ import gradio as gr
2
  import pandas as pd
3
+ import re
4
  from collections import Counter
 
5
 
6
+ def process_excel(file):
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
8
+ df = pd.read_excel(file)
9
+
10
+ # D์—ด์˜ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
11
+ product_names = df.iloc[:, 3].dropna() # D์—ด์€ 0๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋ฏ€๋กœ index๋Š” 3
12
+
13
+ # ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ๋นˆ๋„ ๊ณ„์‚ฐ
14
  all_keywords = []
15
+
16
+ for name in product_names:
17
+ # ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค€์œผ๋กœ ๋ถ„ํ• 
18
+ words = re.sub(r'[^\w\s]', '', name).split()
19
+ # ์ค‘๋ณต ์ œ๊ฑฐ
20
+ unique_words = set(words)
21
+ all_keywords.extend(unique_words)
22
+
23
+ # ๋นˆ๋„ ๊ณ„์‚ฐ
24
+ keyword_counts = Counter(all_keywords)
25
+
26
+ # ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ์ •๋ฆฌ
27
+ result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
28
+ result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
29
+
30
+ # ์—‘์…€ ํŒŒ์ผ๋กœ ์ €์žฅ
31
+ output_file = "/mnt/data/keyword_counts.xlsx"
32
  result_df.to_excel(output_file, index=False)
33
+
34
+ return output_file
35
+
36
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
37
+ iface = gr.Interface(
38
+ fn=process_excel,
39
+ inputs="file",
40
+ outputs="file",
41
+ title="Excel Keyword Extractor",
42
+ description="์—‘์…€ ํŒŒ์ผ์˜ D์—ด์—์„œ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜๊ณ  ๋นˆ๋„๋ฅผ ๊ณ„์‚ฐํ•˜์—ฌ ์ƒˆ๋กœ์šด ์—‘์…€ ํŒŒ์ผ๋กœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
43
+ )
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  if __name__ == "__main__":
46
+ iface.launch()