Spaces:

Kims12
/

1_4_keyword

Sleeping

App Files Files Community

Kims12 commited on Aug 19

Commit

0b61b2b

•

1 Parent(s): 3fc9d20

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -56

app.py CHANGED Viewed

@@ -1,64 +1,46 @@
-from flask import Flask, request, redirect, url_for, send_file, render_template
 import pandas as pd
 from collections import Counter
-import os
-app = Flask(__name__)
-app.config['UPLOAD_FOLDER'] = 'uploads/'
-app.config['PROCESSED_FOLDER'] = 'processed/'
-# 디렉토리 생성
-os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-os.makedirs(app.config['PROCESSED_FOLDER'], exist_ok=True)
-def extract_keywords(text):
-    # 텍스트에서 중복을 제거하고 공백으로 분리하여 키워드 리스트 반환
-    keywords = list(set(text.split(" ")))
-    return keywords
-def process_excel(input_file, output_file):
     # 엑셀 파일 읽기
-    df = pd.read_excel(input_file)
     all_keywords = []
-    for index, row in df.iterrows():
-        product_name = row['상품명(링크)']
-        if pd.notna(product_name):
-            keywords = extract_keywords(product_name)
-            all_keywords.extend(keywords)
-    # 키워드 빈도수 계산
-    keyword_counter = Counter(all_keywords)
-    # 결과를 데이터프레임으로 변환
-    result_df = pd.DataFrame(keyword_counter.items(), columns=['키워드', '빈도수'])
-    result_df = result_df.sort_values(by='빈도수', ascending=False)
-    # 결과를 새로운 엑셀 파일로 저장
     result_df.to_excel(output_file, index=False)
-@app.route('/', methods=['GET', 'POST'])
-def upload_file():
-    if request.method == 'POST':
-        file = request.files['file']
-        if file and file.filename.endswith('.xlsx'):
-            filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
-            file.save(filepath)
-            output_file = os.path.join(app.config['PROCESSED_FOLDER'], 'output.xlsx')
-            process_excel(filepath, output_file)
-            return send_file(output_file, as_attachment=True)
-    return '''
-    <!doctype html>
-    <title>Upload Excel File</title>
-    <h1>Excel 파일을 업로드하세요</h1>
-    <form method=post enctype=multipart/form-data>
-      <input type=file name=file>
-      <input type=submit value=Upload>
-    </form>
-    '''
 if __name__ == "__main__":
-    app.run(debug=True)

+import gradio as gr
 import pandas as pd
+import re
 from collections import Counter
+def process_excel(file):
     # 엑셀 파일 읽기
+    df = pd.read_excel(file)
+    # D열의 데이터 추출
+    product_names = df.iloc[:, 3].dropna()  # D열은 0부터 시작하므로 index는 3
+    # 키워드 추출 및 빈도 계산
     all_keywords = []
+    for name in product_names:
+        # 특수문자 제거 및 공백 기준으로 분할
+        words = re.sub(r'[^\w\s]', '', name).split()
+        # 중복 제거
+        unique_words = set(words)
+        all_keywords.extend(unique_words)
+    # 빈도 계산
+    keyword_counts = Counter(all_keywords)
+    # 결과를 데이터프레임으로 정리
+    result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
+    result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
+    # 엑셀 파일로 저장
+    output_file = "/mnt/data/keyword_counts.xlsx"
     result_df.to_excel(output_file, index=False)
+    return output_file
+# Gradio 인터페이스 정의
+iface = gr.Interface(
+    fn=process_excel,
+    inputs="file",
+    outputs="file",
+    title="Excel Keyword Extractor",
+    description="엑셀 파일의 D열에서 키워드를 추출하고 빈도를 계산하여 새로운 엑셀 파일로 출력합니다."
+)
 if __name__ == "__main__":
+    iface.launch()