1_4_keyword / app.py
Kims12's picture
Update app.py
0b61b2b verified
raw
history blame
1.41 kB
import gradio as gr
import pandas as pd
import re
from collections import Counter
def process_excel(file):
# ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
df = pd.read_excel(file)
# D์—ด์˜ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
product_names = df.iloc[:, 3].dropna() # D์—ด์€ 0๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋ฏ€๋กœ index๋Š” 3
# ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ๋นˆ๋„ ๊ณ„์‚ฐ
all_keywords = []
for name in product_names:
# ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค€์œผ๋กœ ๋ถ„ํ• 
words = re.sub(r'[^\w\s]', '', name).split()
# ์ค‘๋ณต ์ œ๊ฑฐ
unique_words = set(words)
all_keywords.extend(unique_words)
# ๋นˆ๋„ ๊ณ„์‚ฐ
keyword_counts = Counter(all_keywords)
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ์ •๋ฆฌ
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
# ์—‘์…€ ํŒŒ์ผ๋กœ ์ €์žฅ
output_file = "/mnt/data/keyword_counts.xlsx"
result_df.to_excel(output_file, index=False)
return output_file
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
iface = gr.Interface(
fn=process_excel,
inputs="file",
outputs="file",
title="Excel Keyword Extractor",
description="์—‘์…€ ํŒŒ์ผ์˜ D์—ด์—์„œ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜๊ณ  ๋นˆ๋„๋ฅผ ๊ณ„์‚ฐํ•˜์—ฌ ์ƒˆ๋กœ์šด ์—‘์…€ ํŒŒ์ผ๋กœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
)
if __name__ == "__main__":
iface.launch()