Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import re | |
from collections import Counter | |
def process_excel(file): | |
# ์์ ํ์ผ ์ฝ๊ธฐ | |
df = pd.read_excel(file) | |
# D์ด์ ๋ฐ์ดํฐ ์ถ์ถ | |
product_names = df.iloc[:, 3].dropna() # D์ด์ 0๋ถํฐ ์์ํ๋ฏ๋ก index๋ 3 | |
# ํค์๋ ์ถ์ถ ๋ฐ ๋น๋ ๊ณ์ฐ | |
all_keywords = [] | |
for name in product_names: | |
# ํน์๋ฌธ์ ์ ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค์ผ๋ก ๋ถํ | |
words = re.sub(r'[^\w\s]', '', name).split() | |
# ์ค๋ณต ์ ๊ฑฐ | |
unique_words = set(words) | |
all_keywords.extend(unique_words) | |
# ๋น๋ ๊ณ์ฐ | |
keyword_counts = Counter(all_keywords) | |
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ์ ๋ฆฌ | |
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency']) | |
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True) | |
# ์์ ํ์ผ๋ก ์ ์ฅ | |
output_file = "/mnt/data/keyword_counts.xlsx" | |
result_df.to_excel(output_file, index=False) | |
return output_file | |
# Gradio ์ธํฐํ์ด์ค ์ ์ | |
iface = gr.Interface( | |
fn=process_excel, | |
inputs="file", | |
outputs="file", | |
title="Excel Keyword Extractor", | |
description="์์ ํ์ผ์ D์ด์์ ํค์๋๋ฅผ ์ถ์ถํ๊ณ ๋น๋๋ฅผ ๊ณ์ฐํ์ฌ ์๋ก์ด ์์ ํ์ผ๋ก ์ถ๋ ฅํฉ๋๋ค." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |